In [2]:
import numpy as np
import pandas as pd
from SpatialTransform import SpatialTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

#loading in data
df = pd.read_csv('Grace_Variables.csv')


columns_to_keep = ["2014 Yield", "2016 Yield",
                  "Deep EC", "Shallow EC", "Elevation", "Slope",
                  "(aSM-wp)/(fc-wp)","Log10_Yld16",
                  "2017 Spring SM", 'X', 'Y']


columns_to_drop = [col for col in df.columns if col not in columns_to_keep]
new_column_names = {"2014 Yield": "Yld2014", "2016 Yield": "Yld2016", "Deep EC": "DeepEC", "Shallow EC": "ShallowEC", "(aSM-wp)/(fc-wp)": "WaterPotential", "2017 Spring SM": "y"}
       

df = df.drop(columns=columns_to_drop)
df = df.rename(columns=new_column_names)

#Split dataframe into traning and testing data
trainData = df.sample(frac=0.8, random_state=200)
testData = df.drop(trainData.index)

#Location arrays
trainLocs = trainData[['X','Y']].values
testLocs = testData[['X','Y']].values

# # #dropping longitude and latitude from the dataframe
trainData = trainData.drop(columns=['X', 'Y'])
testData = testData.drop(columns=['X', 'Y'])

In [7]:
transformer = SpatialTransformer()

IndData = transformer.transform_to_ind("y", 
                             trainData=trainData, 
                             testData=testData, 
                             testLocs=testLocs, 
                             trainLocs=trainLocs, 
                             smoothness = 0.5, 
                             range_param = 51.9685, 
                             nugget = 0.01, 
                             M=30, 
                             ncores = 1)

In [8]:
#Splitting the data into the independent and dependent variables from IndData['trainData]
X_train_spatial = IndData['trainData'].drop(columns=['y'])
y_train_spatial = IndData['trainData']['y']


#Random Foreset 
rf_spatial = RandomForestRegressor(random_state = 200)

param_grid = {
    'max_features': [6,7,8,9],
    'min_samples_leaf': [5,10,15,20]
}

# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf_spatial, param_grid = param_grid, 
                          cv = 5, n_jobs = 5)

grid_search.fit(X_train_spatial, y_train_spatial)

In [9]:
#Random Forest Model hyperparameters 
rf_spatial_fit = RandomForestRegressor(**grid_search.best_params_).fit(X_train_spatial,y_train_spatial)

#Predicting the test data
X_test = IndData['testData']

y_pred = rf_spatial_fit.predict(X_test)

#convert the prediceted values back to spatial
y_pred_spatial = transformer.back_transform_to_spatial(y_pred,IndData)

import math
MSE = np.square(np.subtract(testData['y'],y_pred_spatial)).mean() 
 
RMSE = math.sqrt(MSE)
print("Root Mean Square Error:\n")
print(RMSE)

Root Mean Square Error:

0.07099065007691685
