# Set up


In [None]:
import sqlite3
import os
import joblib
from datetime import datetime
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
os.chdir("..")
from model.preprocess import preprocess_data
from database.db import get_weather_records
from utils import date_to_data_frame

/mnt/d/jenova-0/Code/Projects/data_science/Cloud-Oracle


# Hyperparameter Tuning for Your Model
We will perform **hyperparameter tuning** for our dataset using the following models:

- **Linear Regression**
- **Random Forest Regressor**

In [5]:
def model_tuning_for_random_forest_regressor(df):
    param_grid = {
        "n_estimators": [100, 200, 500, 1000],
        "max_depth": [None, 10, 20, 30],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
        "bootstrap": [True, False],
    }
    x =df[
        [
            "year",
            "month",
            "day_of_year",
            "hour_of_day"
        ]
    ]
    y = df[["low_temperature", "high_temperature"]]
    x_train, _, y_train, _ = train_test_split(x, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(max_depth=3, n_estimators=50)
    random_search = RandomizedSearchCV(
        model, param_grid, cv=5, n_iter=5, n_jobs=1, random_state=42
    )
    random_search.fit(x_train, y_train)
    return random_search.best_estimator_, random_search.best_params_

def model_tuning_for_linear_regression(df):
    param_space = {
        "copy_X": [True, False],
        "fit_intercept": [True, False],
        "n_jobs": [1, 5, 10, 15, None],
        "positive": [True, False],
    }
    x =df[
        [
            "year",
            "month",
            "day_of_year",
            "hour_of_day"
        ]
    ]
    y = df[["low_temperature", "high_temperature"]]
    x_train, _, y_train, _ = train_test_split(x, y)
    model = LinearRegression()
    random_search = RandomizedSearchCV(model, param_space, cv=5)
    random_search.fit(x_train, y_train)
    return random_search.best_estimator_, random_search.best_params_


sqlite_connection = sqlite3.connect("weather_forecast")
weather_data = get_weather_records(sqlite_connection)
df = preprocess_data(weather_data)
print("Linear regression tuning: ", model_tuning_for_linear_regression(df))
print("Random Forest tuning: ", model_tuning_for_random_forest_regressor(df))

Linear regression tuning:  (LinearRegression(n_jobs=1), {'positive': False, 'n_jobs': 1, 'fit_intercept': True, 'copy_X': True})
Random Forest tuning:  (RandomForestRegressor(bootstrap=False, max_depth=30, min_samples_split=5), {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 30, 'bootstrap': False})


# Linear Regression model prediction
### this prediction is based on linear regression model that gives more accurate result.

In [4]:
linear_regression_model = joblib.load("linear_regression.joblib")
today = date_to_data_frame(datetime.today().strftime("%Y-%m-%d %H"))
predict_result_of_linear_regression_model = linear_regression_model.predict(today)
print(
    f"linear regression low temperature: {round(predict_result_of_linear_regression_model[0][0])}, high temperature: {round(predict_result_of_linear_regression_model[0][1])}"
)

linear regression low temperature: 21, high temperature: 25


# Random Forest Regressor model prediction
### This prediction is generated using a Random Forest Regressor, an ensemble method that aggregates multiple decision trees for stable and reliable forecasts.
### While it performs well overall, the predicted temperature values may deviate by approximately ±3°C from the actual measurements.
### Compared to the Linear Regression model, it tends to be slightly less precise but more robust against data noise and irregularities.

In [3]:
random_forest_regressor_model = joblib.load("random_forest_regressor.joblib")
today = date_to_data_frame(datetime.today().strftime("%Y-%m-%d %H"))
predict_result_of_random_forest_regressor_model = random_forest_regressor_model.predict(
    today
)
print(
    f"random forest low temperature: {round(predict_result_of_random_forest_regressor_model[0][0])}, high temperature: {round(predict_result_of_random_forest_regressor_model[0][1])}"
)

random forest low temperature: 15, high temperature: 19
