# Environmental variable correlations

This script extracts nitrogen, phosphorus, woody vegetation pixel coverage, and mean annual precipitation from google earth engine for correlation

The purpose is to show how the locust outbreak model disentangled the positive correlation between nitrogen and these variables

read the manuscript for further discussion

# Import libraries and initialize google earth engine

In [1]:
import ee
import pandas as pd
import os 
import time

# Initialize Earth Engine
ee.Initialize()


# set relative work directory

In [3]:
os.chdir(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))
os.getcwd()

'/home/datascience/herbivore_nutrient_interactions'

# Load Australia shape

In [4]:

# Load the countries feature collection
countries = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")

# Filter the feature collection to get the boundary of Australia
australia = countries.filter(ee.Filter.eq('country_na', 'Australia'))



# Load in rasters

In [5]:

# Load the two rasters
raster1 = ee.ImageCollection('CSIRO/SLGA') \
    .filter(ee.Filter.eq('attribute_code', 'NTO')) \
    .select(['NTO_000_005_EV', 'NTO_005_015_EV']).mean()

raster2 = ee.ImageCollection("NASA/MEASURES/GFCC/TC/v3") \
    .select("tree_canopy_cover") \
    .filter(ee.Filter.calendarRange(2000, 2017, 'year')).mean()

raster3 = ee.Image("WORLDCLIM/V1/BIO").select('bio12')

raster4 = ee.ImageCollection('CSIRO/SLGA') \
    .filter(ee.Filter.eq('attribute_code', 'PTO')) \
    .select(['PTO_000_005_EV', 'PTO_005_015_EV']).mean()

# Combine the temperature bands from each image into a single image
temp_bands = raster1.addBands(raster2).addBands(raster3).addBands(raster4)



# sample the australia shapefile

In [6]:

# Generate random points
sample = ee.FeatureCollection.randomPoints(
        region=australia, points=100000, seed=420, maxError=1
)

# Sample the the bands using the sample point feature collection.
imgSamp = temp_bands.sampleRegions(
  collection = sample,
  scale = 30
)


# For loop to extract data from google earth engine

In [8]:

# Set batch size
batch_size = 500
total_points = 100000
df = pd.DataFrame()

# Process in batches of 500
for start in range(0, total_points, batch_size):
    # Define the end of the batch
    end = min(start + batch_size, total_points)

    # Get the batch of points
    batch_sample = sample.toList(batch_size, start)
    batch_sample_fc = ee.FeatureCollection(batch_sample)

    # Sample the raster data
    img_samp_batch = temp_bands.sampleRegions(
        collection=batch_sample_fc,
        scale=30
    )

    # Get the result as a list of dictionaries
    try:
        sample_dict = img_samp_batch.getInfo()['features']
        rows = [feature['properties'] for feature in sample_dict]
        df_batch = pd.DataFrame(rows)

        # Append the batch to the main DataFrame
        df = pd.concat([df, df_batch], ignore_index=True)

        print(f"Processed batch {start + 1} to {end}")

        # Add a delay to avoid overwhelming the API
        time.sleep(1)

    except Exception as e:
        print(f"Error processing batch {start + 1} to {end}: {e}")

# Display the DataFrame
print(df.head())

Processed batch 1 to 500
Processed batch 501 to 1000
Processed batch 1001 to 1500
Processed batch 1501 to 2000
Processed batch 2001 to 2500
Processed batch 2501 to 3000
Processed batch 3001 to 3500
Processed batch 3501 to 4000
Processed batch 4001 to 4500
Processed batch 4501 to 5000
Processed batch 5001 to 5500
Processed batch 5501 to 6000
Processed batch 6001 to 6500
Processed batch 6501 to 7000
Processed batch 7001 to 7500
Processed batch 7501 to 8000
Processed batch 8001 to 8500
Processed batch 8501 to 9000
Processed batch 9001 to 9500
Processed batch 9501 to 10000
Processed batch 10001 to 10500
Processed batch 10501 to 11000
Processed batch 11001 to 11500
Processed batch 11501 to 12000
Processed batch 12001 to 12500
Processed batch 12501 to 13000
Processed batch 13001 to 13500
Processed batch 13501 to 14000
Processed batch 14001 to 14500
Processed batch 14501 to 15000
Processed batch 15001 to 15500
Processed batch 15501 to 16000
Processed batch 16001 to 16500
Processed batch 16501

In [10]:
df.to_csv('data/processed/spatial_modeling/environmental_correlation_data.csv', index=False)
