In [1]:
!pip install lightgbm catboost xgboost tensorflow scikit-learn




In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import StackingRegressor, HistGradientBoostingRegressor
import xgboost as xgb

import lightgbm as lgb
from catboost import CatBoostRegressor

import warnings
warnings.filterwarnings("ignore")

In [4]:
try:
    import lightgbm as lgb
    use_lgb = True
except ImportError:
    use_lgb = False

try:
    from catboost import CatBoostRegressor
    use_cat = True
except ImportError:
    use_cat = False

In [5]:
df = pd.read_csv("corn_growth_dataset.csv")


In [6]:
X = df.drop("PlantHeight(cm)", axis=1)
y = df["PlantHeight(cm)"]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "ElasticNet": ElasticNet(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Extra Trees": ExtraTreesRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "AdaBoost": AdaBoostRegressor(),
    "KNN": KNeighborsRegressor(),
    "SVR": SVR(),
    "XGBoost": xgb.XGBRegressor(),
    "MLP Regressor": MLPRegressor(hidden_layer_sizes=(64, 64), max_iter=1000),
    "HistGradientBoosting": HistGradientBoostingRegressor()
}

In [10]:
if use_lgb:
    models["LightGBM"] = lgb.LGBMRegressor()

if use_cat:
    models["CatBoost"] = CatBoostRegressor(verbose=0)

stack = StackingRegressor(
    estimators=[
        ('ridge', Ridge()),
        ('svr', SVR()),
        ('gb', GradientBoostingRegressor())
    ],
    final_estimator=LinearRegression()
)
models["Stacking"] = stack

In [11]:
results = []
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results.append((name, mae, mse, r2))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000301 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 20000, number of used features: 3
[LightGBM] [Info] Start training from score 54.814664


In [12]:
results_df = pd.DataFrame(results, columns=["Model", "MAE", "MSE", "R2_Score"]).sort_values(by="R2_Score", ascending=False)
results_df.reset_index(drop=True, inplace=True)
print(results_df)

results_df.to_csv("advanced_model_comparison.csv", index=False)  

                   Model       MAE        MSE  R2_Score
0               Stacking  3.979351  24.850671  0.827382
1                  Ridge  3.980430  24.855255  0.827350
2      Linear Regression  3.980430  24.855278  0.827350
3                    SVR  4.001863  25.047680  0.826014
4          MLP Regressor  3.995898  25.063412  0.825905
5      Gradient Boosting  4.042325  25.626026  0.821997
6               CatBoost  4.057266  25.763342  0.821043
7   HistGradientBoosting  4.058432  25.786600  0.820881
8               LightGBM  4.075725  25.890195  0.820162
9                  Lasso  4.224323  27.717050  0.807472
10               XGBoost  4.220143  27.808212  0.806839
11         Random Forest  4.294717  28.847765  0.799618
12              AdaBoost  4.335272  29.242544  0.796875
13           Extra Trees  4.369467  29.994905  0.791649
14                   KNN  4.406605  30.405299  0.788799
15            ElasticNet  5.250887  42.531638  0.704567
16         Decision Tree  5.843520  53.629137  0