In [25]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import matplotlib.pyplot as plt

# Reading Data


In [26]:
df_2021 = pd.read_csv('data/2021.csv')
# df_2021.dropna(inplace=True)
# df_2021.tail(20)

df_2020 = pd.read_csv('data/2020.csv')
# df_2020.dropna(inplace=True)
# df_2020.tail(20)

### Setting up data for models & testing

In [27]:
df_2021

Unnamed: 0,Plot,Treatment,BD 0-10cm,BD 10-20cm,BD 20-30cm,BD Moisture 0-10cm,BD Moisture 10-20 cm,BD Moisture 20-30 cm,Emergence,Chlorophyll 28th May,...,Cone P NT 25cm,Cone P NT 27.5cm,Cone P NT 30cm,Cone P NT 32.5cm,Cone P NT 35cm,Cone P NT 37.5cm,Cone P NT 40cm,Cone P NT 42.5cm,Cone P NT 45cm,Combine Yield (t/ac)
0,1,PLPHMNCLA,1.11,1.29,1.61,21.2,31.0,34.3,7.4,39.3,...,3084.0,3873.0,4034.0,5142.0,4635.0,4213.0,4694.0,3991.0,4273.0,
1,2,PHPLMCCHA,1.16,1.42,1.58,19.8,25.9,26.9,7.4,38.0,...,2587.0,3043.0,4824.0,4818.0,4206.0,3841.0,4767.0,5181.0,4156.0,1.79
2,3,PLPHMNCHA,1.15,1.55,1.66,20.8,30.2,30.5,5.6,45.9,...,2503.0,3612.0,3149.0,4213.0,4295.0,3388.0,4061.0,3899.0,4489.0,1.99
3,4,PHPLMNCLA,1.05,1.47,1.47,18.2,28.8,27.1,4.8,44.7,...,1785.0,1935.0,1997.0,2740.0,3087.0,3621.0,4296.0,4070.0,3935.0,2.09
4,5,PLPLMNCHA,1.14,1.21,1.51,17.7,31.9,29.4,5.0,36.7,...,1730.0,1597.0,1418.0,1561.0,3060.0,3712.0,3663.0,4107.0,4455.0,1.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,92,MLPLMNCLA,,,,,,,,,...,,,,,,,,,,
92,93,MLPHMNCLA,,,,,,,5.2,41.2,...,4071.0,4262.0,4174.0,4486.0,1769.0,,,,,
93,94,MHPHMCCHA,,,,,,,6.6,39.4,...,,,,,,,,,,
94,95,MHPHMCCLA,,,,,,,5.2,39.5,...,3647.0,4649.0,3723.0,4000.0,3455.0,4862.0,3900.0,3593.0,5428.0,


In [28]:
# Setting targets and removing them from dataframe
y = df_2021['Combine Yield (t/ac)']
y = y.fillna(0)
del df_2021['Combine Yield (t/ac)']

emergenceY = df_2021['Emergence']
emergenceY = emergenceY.fillna(0)
del df_2021['Emergence']

# Features
X = df_2021[['BD 0-10cm', 'BD 10-20cm', 'BD 20-30cm', 'BD Moisture 0-10cm', 'BD Moisture 10-20 cm', 'BD Moisture 20-30 cm']]
X = X.fillna(0)
X.head()

Unnamed: 0,BD 0-10cm,BD 10-20cm,BD 20-30cm,BD Moisture 0-10cm,BD Moisture 10-20 cm,BD Moisture 20-30 cm
0,1.11,1.29,1.61,21.2,31.0,34.3
1,1.16,1.42,1.58,19.8,25.9,26.9
2,1.15,1.55,1.66,20.8,30.2,30.5
3,1.05,1.47,1.47,18.2,28.8,27.1
4,1.14,1.21,1.51,17.7,31.9,29.4


In [29]:
# Splitting data for training & testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

X_train_emergence, X_test_emergence, y_train_emergence, y_test_emergence = train_test_split(X, emergenceY, test_size=0.33, random_state=42) # Emergence

## Linear Regression Model

In [30]:
# Linear Regression Model
reg = LinearRegression().fit(X_train, y_train)
reg.score(X, y) # score

0.26399832760541597

In [31]:
# Linear Regression Model
regEmergence = LinearRegression().fit(X_train_emergence, y_train_emergence)
regEmergence.score(X, emergenceY) # score

0.2572444854045888

## SVR Model

In [32]:
# SVR Model
svr = SVR().fit(X_train, y_train)
svr.score(X, y) # score

0.23396969763998132

In [33]:
# SVR Model
svrEmergence = SVR().fit(X_train_emergence, y_train_emergence)
svrEmergence.score(X, emergenceY) # score

0.13439462928742496

In [34]:
# evaluate the model
yhat = reg.predict(X_test)
# evaluate predictions
mae = mean_absolute_error(y_test, yhat)
print('MAE: %.3f' % mae)

MAE: 0.620


In [35]:
print(np.sqrt(mean_squared_error(y_test, yhat)))

0.8517103213648807


## Testing Predictions

In [36]:
# # Plot 36
# yieldPredict = reg.predict(np.array([[1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]])) # Actual === 2.8
# emergencePredict = regEmergence.predict(np.array([[1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]])) # Actual === 6.0
# yieldPredict

In [37]:
# # Plot 55
# yieldPredict = reg.predict(np.array([[0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]])) # Actual === 0.24
# emergencePredict = regEmergence.predict(np.array([[0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]])) # Actual === 8.2
# yieldPredict

### SVR Predictions

In [38]:
# # Plot 36
# yieldPredict = svr.predict(np.array([[1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]])) # Actual === 2.8
# yieldPredict

# Model Evaluation

### Linear Regression Model

In [39]:
# Linear Regression
yhat = reg.predict(X_test)
mae = mean_absolute_error(y_test, yhat)
print('MAE: %.5f' % mae)

mse = mean_squared_error(y_test, yhat)
print('MSE: %.5f' % mse)

r2_error = r2_score(y_test, yhat)
print('R^2: %.5f' % r2_error)


MAE: 0.62004
MSE: 0.72541
R^2: 0.10789


### SVR Model

In [40]:
# SVR
yhat = svr.predict(X_test)
mae = mean_absolute_error(y_test, yhat)
print('MAE: %.5f' % mae)

mse = mean_squared_error(y_test, yhat)
print('MSE: %.5f' % mse)

r2_error = r2_score(y_test, yhat)
print('R^2: %.5f' % r2_error)

MAE: 0.57095
MSE: 0.62840
R^2: 0.22720
