In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor,BaggingRegressor,ExtraTreesRegressor,GradientBoostingRegressor,RandomForestRegressor,\
StackingRegressor,VotingRegressor,HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
import sys
sys.path.append("../../scripts/")
sys.path.append("../../config/")
from utils.load_save import *
from utils.utils import *
from models.FloweringModel import FloweringModel
from definitions import ROOT_DIR
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [2]:
#import train and test data
train = load_data('train_test','train', sorghum = True)
test  = load_data('train_test', 'test', sorghum = True)

In [3]:
## Initialize Models with default parameters
estimators = 100
models = [("LinearModel", LinearRegression()),("AdaBoost", AdaBoostRegressor()),('Random Forest', RandomForestRegressor(estimators)),
          ('Gradient Boosting', GradientBoostingRegressor()),('Bagging', BaggingRegressor()),
          ('ExtraTrees', ExtraTreesRegressor(estimators)),
           ('HistGradient Boosting', HistGradientBoostingRegressor())]

stack_models  = models[0:4]
stack = StackingRegressor(stack_models,stack_models[-3][1])
voting = VotingRegressor(models)
models.append(('Stacking Regressor',stack))
#models.append(('Voting Regressor',voting))
# models.append((('Stacking', StackingRegressor(models))))
# models.append(('Voting', VotingRegressor(models)))

In [4]:
#run models
results_test = []
results_train= []
for model in models:
    fm = FloweringModel(model, train,test)
    fm.fit()
    results = fm.evaluate()
    results_test.append(results[0])
    results_train.append(results[1])
    print(f"Done: {model[1]}")
df_results_test = pd.concat(results_test)
df_results_train= pd.concat(results_train)

Done: LinearRegression()
Done: AdaBoostRegressor()
Done: RandomForestRegressor()
Done: GradientBoostingRegressor()
Done: BaggingRegressor()
Done: ExtraTreesRegressor()
Done: HistGradientBoostingRegressor()
Done: StackingRegressor(estimators=[('LinearModel', LinearRegression()),
                              ('AdaBoost', AdaBoostRegressor()),
                              ('Random Forest', RandomForestRegressor()),
                              ('Gradient Boosting',
                               GradientBoostingRegressor())],
                  final_estimator=AdaBoostRegressor())


In [5]:
df_results_test, df_results_train

(                   Model       R^2       MAE      RMSE
 0            LinearModel  0.270029  4.684165  6.093586
 0               AdaBoost  0.211718  5.008313  6.332292
 0          Random Forest  0.235580  4.918599  6.235710
 0      Gradient Boosting  0.185854  4.991334  6.435336
 0                Bagging  0.201142  5.001911  6.374626
 0             ExtraTrees  0.215655  5.042484  6.316457
 0  HistGradient Boosting  0.196168  5.019212  6.394441
 0     Stacking Regressor  0.251189  4.867151  6.171718,
                    Model       R^2       MAE      RMSE
 0            LinearModel  0.209644  4.632941  5.892568
 0               AdaBoost  0.396147  4.357685  5.150609
 0          Random Forest  0.882367  1.790467  2.273305
 0      Gradient Boosting  0.736888  2.707184  3.399877
 0                Bagging  0.830970  2.106593  2.725057
 0             ExtraTrees  1.000000  0.000110  0.001482
 0  HistGradient Boosting  0.876614  1.776581  2.328230
 0     Stacking Regressor  0.304468  4.409239  

In [6]:
save_table(df_results_test,f"{ROOT_DIR}/figures/initial_sorghum_models_test_results.tex")

  tab1 = df.to_latex(**kwargs)
