# Non-Linear Models

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from source.preprocessing import create_SIE_df, train_test_split, create_lagged_features, create_temp_df, merge_temperature_data, merge_co2_data
from source.saving_results import save_results_to_csv
from source.models import build_tree_regressor, predict_tree_regressor

## Model 1: Decision Tree

In [4]:
monthly_df = create_SIE_df("monthly")
monthly_df_lagged = create_lagged_features(monthly_df, lags=[1, 2, 3, 6, 12, 24])
temp_df = create_temp_df()
monthly_SIE = merge_temperature_data(monthly_df_lagged, temp_df, ["Temp Anomaly"])
monthly_co2_df = pd.read_csv("../data/co2_mm_gl.csv", skiprows=38) 
monthly_data_with_co2 = merge_co2_data(monthly_SIE, monthly_co2_df)

In [5]:
X_train, y_train, X_test, y_test = train_test_split(monthly_data_with_co2, 2011)

tree = build_tree_regressor(X_train, y_train, max_depth=5)
y_pred = predict_tree_regressor(tree, X_test)

save_results_to_csv(
    y_test=y_test,
    y_pred=y_pred,
    model_name="Decision Tree Regressor",
    features="Year, Month, Lags, Northern Hemisphere Temperature Anomaly, Global CO2 Concentration",
    target="Monthly averaged SIE for each year",
    train_period="1980-2011",
    test_period="2012-2025",
)

mae = 0.3908680042783078, rmse=0.531368890460391, r2= 0.9767891363261428
