In [0]:
import pandas as pd
import geopandas
import itertools
import pickle
from sklearn import preprocessing
from sklearn import kernel_ridge
import numpy as np
from google.colab import drive
from google.colab import files

import psutil
import humanize
import os
import GPUtil as GPU

In [0]:
!pip install geopandas
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize



In [0]:
# memory footprint support libraries/code
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Gen RAM Free: 12.9 GB  | Proc size: 227.8 MB
GPU RAM Free: 15079MB | Used: 0MB | Util   0% | Total 15079MB


In [0]:
#Try to browse drive files
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
#import point data
fw_pts = geopandas.read_file('drive/My Drive/10k_Points_S2_VI_S1_BP.shp')
#points = fw_pts
fw_pts.head()

Unnamed: 0,B2,B3,B4,B5,B6,B7,B8,B8A,B11,B12,arvi,dvi,gemi,gndvi,ipvi,ireci,mcari,msavi,msavi2,mtci,ndi45,pssra,pvi,reip,rvi,s2rep,savi,tndvi,tsavi,wdvi,S0_VH_N,S0_VV_N,lai,lai_cab,lai_cw,fapar,fcover,geometry
0,1017.0,1355.0,1884.0,2150.0,2365.0,2598.0,2718.0,3009.0,4248.0,3156.0,-0.006034,0.0834,0.474812,0.314445,0.590613,0.07854,0.012211,0.114926,0.616914,0.80827,0.06594,1.378981,0.058973,716.930237,1.442675,719.813965,0.130285,0.825364,-0.924842,0.1776,0.014561,0.053904,0.293115,9.517295,0.005263,0.031921,0.018619,POINT (340065 395145)
1,1057.0,1372.0,1896.0,2150.0,2365.0,2598.0,2770.0,3009.0,4248.0,3156.0,0.006358,0.0874,0.472472,0.308816,0.593656,0.07722,0.011158,0.119945,0.622073,0.846456,0.062778,1.370253,0.061801,718.046509,1.460971,720.79071,0.13563,0.829043,-0.892195,0.1822,0.014947,0.052304,0.292749,9.420005,0.00525,0.030687,0.017791,POINT (340075 395145)
2,1072.0,1397.0,1914.0,2203.0,2415.0,2681.0,2776.0,3108.0,4254.0,3111.0,0.003615,0.0862,0.4825,0.31486,0.591898,0.084081,0.01471,0.118084,0.620134,0.733564,0.070197,1.400731,0.060953,717.8302,1.450366,720.60144,0.133437,0.826919,-0.88263,0.1819,0.014947,0.052304,0.306748,9.406707,0.005524,0.036503,0.023166,POINT (340085 395145)
3,1072.0,1421.0,1936.0,2203.0,2415.0,2681.0,2791.0,3108.0,4254.0,3111.0,-0.00161,0.0855,0.478173,0.307167,0.590438,0.081669,0.012585,0.116814,0.618799,0.794007,0.064508,1.384814,0.060458,719.90564,1.441632,722.417419,0.131849,0.825152,-0.867322,0.1823,0.01683,0.051905,0.306178,9.276816,0.00551,0.034058,0.021415,POINT (340095 395145)
4,1123.0,1459.0,2048.0,2264.0,2585.0,2856.0,2957.0,3258.0,4284.0,3098.0,-0.002698,0.0909,0.475939,0.323754,0.590809,0.092256,0.00608,0.121885,0.623883,1.486111,0.050093,1.394531,0.064276,723.426819,1.443848,725.498413,0.136282,0.825602,-0.756723,0.1933,0.01683,0.051905,0.330857,9.463584,0.005839,0.048685,0.030191,POINT (340105 395145)


In [0]:
#S2 Ratios
bands = ['B2.', 'B3.', 'B4.', 'B5.', 'B6.', 'B7.', 'B8.', 'B8A.', 'B11.', 'B12.']
combos = list(map(''.join, itertools.chain(itertools.product(bands, bands))))
numerator = [None] * 100
denominator = [None] * 100

for i, string in enumerate(combos):
  split = string.split(".")
  if split[0] == split[1]:
    next
  else:
    numerator[i] = split[0]
    denominator[i] = split[1]
    
numerator = [x for x in numerator if x is not None]
denominator = [x for x in denominator if x is not None]

In [0]:
#make list of new column names
rat_names = [None] * len(numerator)
for i, band in enumerate(numerator):
  name = str(numerator[i]) + 'v' + str(denominator[i])
  rat_names[i] = name

In [0]:
#Perform the ratio division, put that data into a new column with proper name
for i, col_name in enumerate(rat_names):
  fw_pts.loc[:, col_name] = fw_pts.loc[:, numerator[i]] / fw_pts.loc[:, denominator[i]]

In [0]:
rat_names = ['VVvVH', 'VHvVV']
fw_pts.loc[:, rat_names[0]] = fw_pts.loc[:, 'S0_VV_N'] / fw_pts.loc[:, 'S0_VH_N']
fw_pts.loc[:, rat_names[1]] = fw_pts.loc[:, 'S0_VH_N'] / fw_pts.loc[:, 'S0_VV_N']

In [0]:
len(list(fw_pts))

130

In [0]:
#Names of the independent variables that will be predicted:
y_col_names = ['Dry_wgh', 'Frsh_wg', 'FDN', 'FDA', 'CP', 'DIVMS']
y_col_full_names = ['Dry Weight Biomass', 'Fresh Weight Biomass', 'Crude Protein']

#Names of the predictor variables
X_col_names = list(fw_pts.loc[:, fw_pts.columns != 'geometry'])

In [0]:
X = fw_pts.loc[:, X_col_names]

In [0]:
optimize_names = pd.read_csv('drive/My Drive/Optimized_Variable_Names_RF.csv')

In [0]:
point_preds = geopandas.GeoDataFrame(fw_pts, geometry='geometry')
prediction_col_names = ['geometry']

for var in y_col_names:
  print('Predicting ' + var + ' values.')
  X_col_names_opt = list(optimize_names.loc[optimize_names['variable'] == var, 'feature_name'])
  
  X_opt = X.loc[:, X_col_names_opt]
  
  X_opt = X_opt.dropna(axis=0, how='any')
  X_opt_2 = X_opt.dropna(axis=0, how='any')
  
  for col in list(X_opt):
    X_opt.loc[:, col] = preprocessing.scale(np.array(X_opt.loc[:, col]))
  X_opt = X_opt.to_numpy()
  
  path = 'drive/My Drive/Saved_Models/'
  model_file = path + var + '_model.p'
  
  reg = pickle.load(open(model_file, 'rb' ))
  pred = reg.predict(X_opt)
  
  scaler_file = path + var + '_scaler.p'
  scalery = pickle.load(open(scaler_file, 'rb' ))
  pred_unscaled = scalery.inverse_transform(pred)
  
  c_name = var + '_Pred'
  prediction_col_names.append(c_name)
  point_preds.loc[X_opt_2.index, c_name] = pred_unscaled
  
point_preds = point_preds.dropna(axis=0, how='any')
point_preds = point_preds.loc[:, prediction_col_names]
  


Predicting Dry_wgh values.




Predicting Frsh_wg values.




Predicting FDN values.




Predicting FDA values.




Predicting CP values.




Predicting DIVMS values.




In [0]:
point_preds.head()

Unnamed: 0,geometry,Dry_wgh_Pred,Frsh_wg_Pred,FDN_Pred,FDA_Pred,CP_Pred,DIVMS_Pred
0,POINT (340065 395145),0.143872,0.32864,471.886958,231.938724,24.178748,570.111061
1,POINT (340075 395145),0.143872,0.333161,471.886958,231.938724,23.878102,576.99023
2,POINT (340085 395145),0.143872,0.331428,471.886958,233.191053,24.097653,575.374728
3,POINT (340095 395145),0.143872,0.315735,471.94952,233.191053,24.097653,561.927268
4,POINT (340105 395145),0.1477,0.314092,471.94952,231.174684,24.736525,575.294485


In [0]:
point_preds.to_file(driver = 'ESRI Shapefile', filename= "drive/My Drive/Point_Predictions_2.shp")