In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

import warnings
warnings.simplefilter('ignore')

In [2]:
df = pd.read_csv("../working_data/dataset.csv", index_col=0)

## Creating working data frame

In [3]:
crops = ['Wheat', 'Barley', 'Maize', 'Sugar beet', 'Potatoes', 'Rye', 'Rape', 'Apples', 'Grapes', 'Cabbages']
mask = df["Item"].str.contains("|".join(crops))
df = df[mask]
df.reset_index(drop=True, inplace=True)
crop_names = df["Item"].unique()

## Creating the metrics data frame

In [4]:
def calculate_metrics(df, crop_name, y_true, y_pred):
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = metrics.mean_absolute_percentage_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    df.loc[crop_name] = [mae, mse, rmse, mape, r2, y_true.values[0][0], y_pred[0]]

In [5]:
quality_metrics = ['RF_MAE', 'RF_MSE', 'RF_RMSE',"RF_MAPE", "RF_R2", "Actual Yield", "Predicted Yield"]
error_df = pd.DataFrame(columns = quality_metrics, index = crop_names)
#display(error_df)

## Random Forest Regression

In [6]:
for crop in crop_names:
   crop_df = df[df['Item'] == crop]
   x = crop_df[[
      'Year', 
      'Change (C)', 'Annual Mean Precipitation',
      'Air Temp Mean', 'Frost Days', 'Hot Days', 
      'Ice Days', 'Summer Days','Sunshine Duration', 
      'Tropical Nights', 'Mean CO2 (ppm)', "Devil",	
      'Gustnado',	'Hail',	'Ice',	'Precip',	'Snow',	
      'Tornado',	'Wind',	'Total']]
   
   y = crop_df[['Yield (tonnes/ha)']]

   scaler = MinMaxScaler()
   regressor = RandomForestRegressor(n_estimators=200, random_state=15)

   x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, shuffle=True)
   x_train = scaler.fit_transform(x_train)
   x_test = scaler.transform(x_test)
   
   regressor.fit(x_train, y_train)

   #prediction of new value
   y_pred = regressor.predict(x_test)
   crop_c = crop
   calculate_metrics(error_df, crop_c , y_test, y_pred)

## Result

In [7]:
error_df

Unnamed: 0,RF_MAE,RF_MSE,RF_RMSE,RF_MAPE,RF_R2,Actual Yield,Predicted Yield
Barley,0.239906,0.092436,0.304034,0.050422,0.804653,4.9292,4.780217
Cabbages,2.226492,13.105293,3.620123,0.065279,0.852515,54.1249,54.868058
Maize (corn),0.823133,0.875587,0.935728,0.094179,0.22653,8.1561,8.867046
Potatoes,3.26743,12.304692,3.507805,0.115011,0.787443,39.8258,41.27215
Rape or colza seed,0.460376,0.290423,0.538909,0.144822,-0.459227,2.7379,3.011215
Rye,0.565815,0.443882,0.666245,0.100818,0.34345,6.132,5.019002
Sugar beet,5.362695,42.415538,6.512721,0.088843,0.584547,55.2381,57.725626
Wheat,0.402649,0.256388,0.506347,0.070909,0.837092,8.0873,7.552508
Apples,5.238494,31.207966,5.586409,0.270147,-0.556607,15.8809,23.834753
Grapes,0.477701,0.259739,0.509646,0.036949,0.471205,13.4028,14.0581
