In [1]:
# Import the necessary modules for the protocol
import ee as ee
ee.Initialize()
import pandas as pd
from scipy.spatial import ConvexHull
from sklearn.decomposition import PCA
import numpy as np
from itertools import combinations

In [2]:
def assessExtrapolation(importedData, compositeImage, propOfVariance):
    
    # Excise the columns of interest from the data frame
    variablesOfInterest = importedData.drop(['system:index', '.geo'], axis=1)
    
    # Compute the mean and standard deviation of each band, then standardize the point data
    meanVector = variablesOfInterest.mean()
    stdVector = variablesOfInterest.std()
    standardizedData = (variablesOfInterest-meanVector)/stdVector
    
    # Then standardize the composite from which the points were sampled
    meanList = meanVector.tolist()
    stdList = stdVector.tolist()
    bandNames = list(meanVector.index)
    meanImage = ee.Image(meanList).rename(bandNames)
    stdImage = ee.Image(stdList).rename(bandNames)
    standardizedImage = compositeImage.subtract(meanImage).divide(stdImage)
    
    # Run a PCA on the point samples
    pcaOutput = PCA()
    pcaOutput.fit(standardizedData)
    
    # Save the cumulative variance represented by each PC
    cumulativeVariance = np.cumsum(np.round(pcaOutput.explained_variance_ratio_, decimals=4)*100)
    
    # Make a list of PC names for future organizational purposes
    pcNames = ['PC'+str(x) for x in range(1,variablesOfInterest.shape[1]+1)]
    
    # Get the PC loadings as a data frame
    loadingsDF = pd.DataFrame(pcaOutput.components_,columns=[str(x)+'_Loads' for x in bandNames],index=pcNames)
    
    # Get the original data transformed into PC space
    transformedData = pd.DataFrame(pcaOutput.fit_transform(standardizedData,standardizedData),columns=pcNames)
    
    # Make principal components images, multiplying the standardized image by each of the eigenvectors
    # Collect each one of the images in a single image collection;
    
    # First step: make an image collection wherein each image is a PC loadings image
    listOfLoadings = ee.List(loadingsDF.values.tolist());
    eePCNames = ee.List(pcNames)
    zippedList = eePCNames.zip(listOfLoadings)
    def makeLoadingsImage(zippedValue):
        return ee.Image.constant(ee.List(zippedValue).get(1)).rename(bandNames).set('PC',ee.List(zippedValue).get(0))
    loadingsImageCollection = ee.ImageCollection(zippedList.map(makeLoadingsImage))
    
    # Second step: multiply each of the loadings image by the standardized image and reduce it using a "sum"
    # to finalize the matrix multiplication
    def finalizePCImages(loadingsImage):
        return ee.Image(loadingsImage).multiply(standardizedImage).reduce('sum').rename([ee.String(ee.Image(loadingsImage).get('PC'))]).set('PC',ee.String(ee.Image(loadingsImage).get('PC')))
    principalComponentsImages = loadingsImageCollection.map(finalizePCImages)
    
    # Choose how many principal components are of interest in this analysis based on amount of
    # variance explained
    numberOfComponents = sum(i < propOfVariance for i in cumulativeVariance)+1
    print('Number of Principal Components being used:',numberOfComponents)
    
    # Compute the combinations of the principal components being used to compute the 2-D convex hulls
    tupleCombinations = list(combinations(list(pcNames[0:numberOfComponents]),2))
    print('Number of Combinations being used:',len(tupleCombinations))
    
    # Generate convex hulls for an example of the principal components of interest
    cHullCoordsList = list()
    for c in tupleCombinations:
        firstPC = c[0]
        secondPC = c[1]
        outputCHull = ConvexHull(transformedData[[firstPC,secondPC]])
        listOfCoordinates = transformedData.loc[outputCHull.vertices][[firstPC,secondPC]].values.tolist()
        flattenedList = [val for sublist in listOfCoordinates for val in sublist]
        cHullCoordsList.append(flattenedList)
    
    # Reformat the image collection to an image with band names that can be selected programmatically
    pcImage = principalComponentsImages.toBands().rename(pcNames)
    
    # Generate an image collection with each PC selected with it's matching PC
    listOfPCs = ee.List(tupleCombinations)
    listOfCHullCoords = ee.List(cHullCoordsList)
    zippedListPCsAndCHulls = listOfPCs.zip(listOfCHullCoords)
    
    def makeToClassifyImages(zippedListPCsAndCHulls):
        imageToClassify = pcImage.select(ee.List(zippedListPCsAndCHulls).get(0)).set('CHullCoords',ee.List(zippedListPCsAndCHulls).get(1))
        classifiedImage = imageToClassify.rename('u','v').classify(ee.Classifier.spectralRegion([imageToClassify.get('CHullCoords')]))
        return classifiedImage
    classifedImages = ee.ImageCollection(zippedListPCsAndCHulls.map(makeToClassifyImages))
    finalImageToExport = classifedImages.sum().divide(ee.Image.constant(len(tupleCombinations)))
    
    return finalImageToExport



In [3]:
# Instantiate the composite that was used to sample the points
compositeImage_Richness = ee.Image("users/devinrouth/Earthworm_PCA_IntExt_2020/earthworm_richness_comp_masked")
bandNames = [
  "Aridity_RichnessScaled",
  "CECSOL_RichnessCutScaled",
  "CHELSA_bio10_15_RichnessCutScaled",
  "CHELSA_bio10_7_RichnessCutScaled",
  "CLYPPT_RichnessCutScaled",
  "ORCDRC_RichnessCutScaled",
  "PETyr_RichnessScaled",
  "PHIHOX_RichnessCutScaled",
  "SLTPPT_RichnessCutScaled",
  "Snow_newValues_WGS84",
  "elevation_RichnessScaled"
]
print('Composite Bands',bandNames)

# Import the data and view a summary of it
importedData_Richness = pd.read_csv('20200115_Earthworm_RichnessPointsSampled.csv');
print(importedData_Richness.info())
# print(importedData_Richness.describe())

print('\n')
print('\n')
print('\n')
print('\n')

cleanedImage_Richness = compositeImage_Richness.select(bandNames)
print('Bands being used',cleanedImage_Richness.bandNames().getInfo())

# Input the proportion of variance that you would like to cover when running the script
propOfVariance = 90

Composite Bands ['Aridity_RichnessScaled', 'CECSOL_RichnessCutScaled', 'CHELSA_bio10_15_RichnessCutScaled', 'CHELSA_bio10_7_RichnessCutScaled', 'CLYPPT_RichnessCutScaled', 'ORCDRC_RichnessCutScaled', 'PETyr_RichnessScaled', 'PHIHOX_RichnessCutScaled', 'SLTPPT_RichnessCutScaled', 'Snow_newValues_WGS84', 'elevation_RichnessScaled']
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6187 entries, 0 to 6186
Data columns (total 13 columns):
system:index                         6187 non-null object
Aridity_RichnessScaled               6187 non-null float64
CECSOL_RichnessCutScaled             6187 non-null float64
CHELSA_bio10_15_RichnessCutScaled    6187 non-null float64
CHELSA_bio10_7_RichnessCutScaled     6187 non-null float64
CLYPPT_RichnessCutScaled             6187 non-null float64
ORCDRC_RichnessCutScaled             6187 non-null float64
PETyr_RichnessScaled                 6187 non-null float64
PHIHOX_RichnessCutScaled             6187 non-null float64
SLTPPT_RichnessCutScaled       

In [4]:
# Apply the function
finalImageToExport_Richness = assessExtrapolation(importedData_Richness, cleanedImage_Richness, propOfVariance)

Number of Principal Components being used: 7
Number of Combinations being used: 21


In [5]:
# Export the image to test it
unboundedGeo = ee.Geometry.Polygon([-180, 88, 0, 88, 180, 88, 180, -88, 0, -88, -180, -88], None, False);
task = ee.batch.Export.image.toAsset(
    image = finalImageToExport_Richness,
    description = 'Earthworm_Richness_PCA_CHull_IntExt',
    assetId = 'users/devinrouth/Earthworm_PCA_IntExt_2020/Earthworm_Richness_PCA_CHull_IntExt_20200115',
    region = unboundedGeo.getInfo()['coordinates'],
    maxPixels = 1e13,
    crs = 'EPSG:4326',
    crsTransform = '[0.008333333333333333,0,-180,0,-0.008333333333333333,90]'
)

task.start()

In [6]:
# Instantiate the composite that was used to sample the points
compositeImage_Abundance = ee.Image("users/devinrouth/Earthworm_PCA_IntExt_2020/earthworm_abundance_comp_masked")
bandNames = [
  "Aridity_AbundanceScaled",
  "CECSOL_AbundanceCutScaled",
  "CHELSA_bio10_15_AbundanceCutScaled",
  "CHELSA_bio10_7_AbundanceCutScaled",
  "CLYPPT_AbundanceCutScaled",
  "ORCDRC_AbundanceCutScaled",
  "PETyr_AbundanceScaled",
  "PHIHOX_AbundanceCutScaled",
  "SLTPPT_AbundanceCutScaled",
  "Snow_newValues_WGS84",
  "elevation_AbundanceScaled"
]
print('Composite Bands',bandNames)

# Import the data and view a summary of it
importedData_Abundance = pd.read_csv('20200115_Earthworm_AbundancePointsSampled.csv');
print(importedData_Abundance.info())
# print(importedData_Abundance.describe())

print('\n')
print('\n')
print('\n')
print('\n')

cleanedImage_Abundance = compositeImage_Abundance.select(bandNames)
print('Bands being used',cleanedImage_Abundance.bandNames().getInfo())

# Input the proportion of variance that you would like to cover when running the script
propOfVariance = 90

Composite Bands ['Aridity_AbundanceScaled', 'CECSOL_AbundanceCutScaled', 'CHELSA_bio10_15_AbundanceCutScaled', 'CHELSA_bio10_7_AbundanceCutScaled', 'CLYPPT_AbundanceCutScaled', 'ORCDRC_AbundanceCutScaled', 'PETyr_AbundanceScaled', 'PHIHOX_AbundanceCutScaled', 'SLTPPT_AbundanceCutScaled', 'Snow_newValues_WGS84', 'elevation_AbundanceScaled']
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7628 entries, 0 to 7627
Data columns (total 13 columns):
system:index                          7628 non-null object
Aridity_AbundanceScaled               7628 non-null float64
CECSOL_AbundanceCutScaled             7628 non-null float64
CHELSA_bio10_15_AbundanceCutScaled    7628 non-null float64
CHELSA_bio10_7_AbundanceCutScaled     7628 non-null float64
CLYPPT_AbundanceCutScaled             7628 non-null float64
ORCDRC_AbundanceCutScaled             7628 non-null float64
PETyr_AbundanceScaled                 7628 non-null float64
PHIHOX_AbundanceCutScaled             7628 non-null float64
SLTPPT_Abund

In [7]:
# Apply the function
finalImageToExport_Abundance = assessExtrapolation(importedData_Abundance, cleanedImage_Abundance, propOfVariance)

Number of Principal Components being used: 7
Number of Combinations being used: 21


In [8]:
# Export the image to test it
unboundedGeo = ee.Geometry.Polygon([-180, 88, 0, 88, 180, 88, 180, -88, 0, -88, -180, -88], None, False);
task = ee.batch.Export.image.toAsset(
    image = finalImageToExport_Abundance,
    description = 'Earthworm_Abundance_PCA_CHull_IntExt',
    assetId = 'users/devinrouth/Earthworm_PCA_IntExt_2020/Earthworm_Abundance_PCA_CHull_IntExt_20200115',
    region = unboundedGeo.getInfo()['coordinates'],
    maxPixels = 1e13,
    crs = 'EPSG:4326',
    crsTransform = '[0.008333333333333333,0,-180,0,-0.008333333333333333,90]'
)

task.start()

In [9]:
# Instantiate the composite that was used to sample the points
compositeImage_Biomass = ee.Image("users/devinrouth/Earthworm_PCA_IntExt_2020/earthworm_biomass_comp_masked")
bandNames = [
  "CECSOL_BiomassCutScaled",
  "CHELSA_bio10_12_BiomassCutScaled",
  "CHELSA_bio10_15_BiomassCutScaled",
  "CHELSA_bio10_7_BiomassCutScaled",
  "CLYPPT_BiomassCutScaled",
  "ORCDRC_BiomassCutScaled",
  "PETyr_BiomassScaled",
  "PHIHOX_BiomassCutScaled",
  "SLTPPT_BiomassCutScaled",
  "Snow_newValues_WGS84",
  "elevation_BiomassScaled"
]
print('Composite Bands',bandNames)

# Import the data and view a summary of it
importedData_Biomass = pd.read_csv('20200115_Earthworm_BiomassPointsSampled.csv');
print(importedData_Biomass.info())
# print(importedData_Biomass.describe())

print('\n')
print('\n')
print('\n')
print('\n')

cleanedImage_Biomass = compositeImage_Biomass.select(bandNames)
print('Bands being used',cleanedImage_Biomass.bandNames().getInfo())

# Input the proportion of variance that you would like to cover when running the script
propOfVariance = 90

Composite Bands ['CECSOL_BiomassCutScaled', 'CHELSA_bio10_12_BiomassCutScaled', 'CHELSA_bio10_15_BiomassCutScaled', 'CHELSA_bio10_7_BiomassCutScaled', 'CLYPPT_BiomassCutScaled', 'ORCDRC_BiomassCutScaled', 'PETyr_BiomassScaled', 'PHIHOX_BiomassCutScaled', 'SLTPPT_BiomassCutScaled', 'Snow_newValues_WGS84', 'elevation_BiomassScaled']
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3650 entries, 0 to 3649
Data columns (total 13 columns):
system:index                        3650 non-null object
CECSOL_BiomassCutScaled             3650 non-null float64
CHELSA_bio10_12_BiomassCutScaled    3650 non-null float64
CHELSA_bio10_15_BiomassCutScaled    3650 non-null float64
CHELSA_bio10_7_BiomassCutScaled     3650 non-null float64
CLYPPT_BiomassCutScaled             3650 non-null float64
ORCDRC_BiomassCutScaled             3650 non-null float64
PETyr_BiomassScaled                 3650 non-null float64
PHIHOX_BiomassCutScaled             3650 non-null float64
SLTPPT_BiomassCutScaled             365

In [10]:
# Apply the function
finalImageToExport_Biomass = assessExtrapolation(importedData_Biomass, cleanedImage_Biomass, propOfVariance)

Number of Principal Components being used: 7
Number of Combinations being used: 21


In [11]:
# Export the image to test it
unboundedGeo = ee.Geometry.Polygon([-180, 88, 0, 88, 180, 88, 180, -88, 0, -88, -180, -88], None, False);
task = ee.batch.Export.image.toAsset(
    image = finalImageToExport_Biomass,
    description = 'Earthworm_Biomass_PCA_CHull_IntExt',
    assetId = 'users/devinrouth/Earthworm_PCA_IntExt_2020/Earthworm_Biomass_PCA_CHull_IntExt_20200115',
    region = unboundedGeo.getInfo()['coordinates'],
    maxPixels = 1e13,
    crs = 'EPSG:4326',
    crsTransform = '[0.008333333333333333,0,-180,0,-0.008333333333333333,90]'
)

task.start()