In [2]:
import ee
import numpy as np
import pandas as pd
import geopandas as gpd
from earthshot import mon_stats
from earthshot import water_viz as vis
from earthshot import normalize as norm
from statistics import mean

from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score

In [3]:
ee.Initialize()

In [4]:
#Function to convert FeatureCollection into DataFrame
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df['geometry'] = map(lambda s: np.shape(s), df.geometry)    
    return df

In [5]:
#Function to convert an image into featurecollection into Dataframe for ML processing
def extract_point_values(img_name, pts):
    image = img_name

    fc_image_red = image.reduceRegions(collection=pts,
                                  reducer=ee.Reducer.mean(),
                                  scale=30)

    # Convert to Pandas Dataframe
    df_image_red = fc2df(fc_image_red)


    return df_image_red

In [6]:
#Import Global MAR Inventory
MAR_swales = ee.FeatureCollection('users/amgadellaboudy/Global_MAR_Inventory')

In [7]:
#Import variables in (Slope, Porosity, Runoff, Soil Types, Annual Precipitation), restrict to global MAR locations
slope_img = ee.Image('users/jamesmcc/merit_slope/merit_terrain_slope').clip(MAR_swales)

smap_usda_clim = ee.ImageCollection('users/jamesmcc/smap_usda_climatology')
avail_porosity = (smap_usda_clim
                  .filter(ee.Filter.eq('band', 'avail_porosity_mm')))
avail_porosity_img = avail_porosity.sum()
avail_porosity_img= avail_porosity_img.clip(MAR_swales)

runoff_clim = ee.ImageCollection("ECMWF/ERA5_LAND/MONTHLY").select('surface_runoff')
runoff_clim_m = mon_stats.bands_avgs(['surface_runoff'], runoff_clim)
runoff_img = ee.ImageCollection(runoff_clim_m['avgs'].get('surface_runoff')).sum().multiply(720)
runoff_img = runoff_img.clip(MAR_swales)

precip_clim = ee.Image("OpenLandMap/CLM/CLM_PRECIPITATION_SM2RAIN_M/v01")
precip_img = precip_clim.reduce('sum')
precip_img = precip_img.clip(MAR_swales)

soil_types = ee.Image("OpenLandMap/SOL/SOL_TEXTURE-CLASS_USDA-TT_M/v02")

top_soils = soil_types.expression('soil_0 + soil_10 + soil_30',
                             {'soil_0': soil_types.select('b0'),
                             'soil_10': soil_types.select('b10'),
                             'soil_30': soil_types.select('b30')})

bottom_soils = soil_types.expression('soil_60 + soil_100 + soil_200',
                                 {'soil_60': soil_types.select('b60'),
                                  'soil_100': soil_types.select('b100'),
                                  'soil_200': soil_types.select('b200')})

top_soil_img = top_soils.clip(MAR_swales)
bottom_soil_img = bottom_soils.clip(MAR_swales)

water_clim = ee.ImageCollection('GLCF/GLS_WATER').select('water')
water_img = water_clim.reduce(reducer = ee.Reducer.min())

In [None]:
#Create dataframes out of each variable, concatenate them to create dataframe for ML

df_slope = extract_point_values(slope_img, MAR_swales)
df_slope.rename(columns = {'mean' : 'Slope'}, inplace = True)

porosity = extract_point_values(avail_porosity_img, MAR_swales)
porosity.rename(columns = {'mean' : 'Porosity'}, inplace = True)
porosity = porosity['Porosity']

runoff = extract_point_values(runoff_img, MAR_swales)
runoff.rename(columns = {'mean' : 'Runoff'}, inplace = True)
runoff = runoff['Runoff']

top_soil = extract_point_values(top_soil_img, MAR_swales)
top_soil.rename(columns = {'mean' : 'Top Soils'}, inplace = True)
top_soil = top_soil['Top Soils']

bottom_soil = extract_point_values(bottom_soil_img, MAR_swales)
bottom_soil.rename(columns = {'mean' : 'Bottom Soils'}, inplace = True)
bottom_soil = bottom_soil['Bottom Soils']

precip = extract_point_values(precip_img, MAR_swales)
precip.rename(columns = {'mean' : 'Precip'}, inplace = True)
precip = precip['Precip']

water = extract_point_values(water_img, MAR_swales)
water.rename(columns = {'mean' : 'Water'}, inplace = True)
water = water['Water']

df_all = pd.concat([df_slope, porosity, runoff, top_soil, bottom_soil, precip, water], axis = 1)

df_all.head()



In [None]:
df_new = df_all[['latitude','longitude','Slope','Porosity','Runoff','Top Soils','Bottom Soils', 'Precip', 'Water','main_mar_t']]
dummies_df = pd.get_dummies(df_new['Water'])
df_dummies = pd.concat([df_new,dummies_df], axis = 1)
df_dummies.drop(labels = [1,4,200,201], axis = 1, inplace = True)
df_dummies.dropna(inplace = True)
df_dummies.head()

In [None]:
#Setup Random Forest Classifier model
from sklearn.model_selection import train_test_split
X = df_dummies.drop('main_mar_t', axis =1)
y = df_dummies['main_mar_t']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=1000)
rfc.fit(X_train,y_train)

In [None]:
#Test model for accuracy
rfc_pred = rfc.predict(X_test)

In [None]:
#Report precision, recall, f1-score results
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,rfc_pred))