In [1]:
# 03_Modeling.ipynb
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score

# Load
X_train, X_test, y_train, y_test, preprocessor = joblib.load('/workspaces/temperature-prediction-project/data/preprocessed_data.pkl')

In [2]:
# Pipeline
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42)
}

for name, model in models.items():
    pipe = Pipeline([('pre', preprocessor), ('model', model)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    
    print(f"\n{name}")
    print("MAE:", mean_absolute_error(y_test, y_pred))
    print("R²:", r2_score(y_test, y_pred))

    joblib.dump(pipe, f'/workspaces/temperature-prediction-project/data/{name.replace(" ", "_").lower()}_model.pkl')



Linear Regression
MAE: 0.7361419767448404
R²: 0.9903127264263686

Random Forest
MAE: 0.013685990086403319
R²: 0.9999639040310524
