In [8]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

# Load data
train = pd.read_csv("../data/processed/onion_maharashtra_train.csv")
valid = pd.read_csv("../data/processed/onion_maharashtra_valid.csv")

print("Train:", train.shape)
print("Valid:", valid.shape)

Train: (193, 14)
Valid: (49, 14)


In [9]:
feature_cols = [
    'day_of_week', 'month', 'weekofyear',
    'lag_1', 'lag_3', 'lag_7',
    'roll_mean_7', 'roll_std_7', 'roll_mean_14'
]

target_col = 'Avg_Modal_Price'

X_train = train[feature_cols]
y_train = train[target_col]

X_valid = valid[feature_cols]
y_valid = valid[target_col]

In [11]:
y_pred_naive = valid['lag_1'].values

mae_naive = mean_absolute_error(y_valid, y_pred_naive)

mse_naive = mean_squared_error(y_valid, y_pred_naive)
rmse_naive = np.sqrt(mse_naive)

mape_naive = np.mean(np.abs((y_valid - y_pred_naive) / y_valid)) * 100

print("Naive baseline:")
print(" MAE :", mae_naive)
print(" RMSE:", rmse_naive)
print(" MAPE:", mape_naive)

Naive baseline:
 MAE : 53.44642830171092
 RMSE: 74.58954605091652
 MAPE: 3.71121588197005


In [12]:
from sklearn.ensemble import RandomForestRegressor

# ------------------------------
# 1) Define the random forest model
# ------------------------------
rf = RandomForestRegressor(
    n_estimators=400,
    max_depth=12,
    random_state=42,
    n_jobs=-1
)

# ------------------------------
# 2) Train the model
# ------------------------------
rf.fit(X_train, y_train)

# ------------------------------
# 3) Predict on validation set
# ------------------------------
y_pred_rf = rf.predict(X_valid)

# ------------------------------
# 4) Evaluate the model
# ------------------------------
mae_rf = mean_absolute_error(y_valid, y_pred_rf)

mse_rf = mean_squared_error(y_valid, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)

mape_rf = np.mean(np.abs((y_valid - y_pred_rf) / y_valid)) * 100

print("RandomForest model performance:")
print(" MAE  :", mae_rf)
print(" RMSE :", rmse_rf)
print(" MAPE :", mape_rf)

RandomForest model performance:
 MAE  : 134.21107074311314
 RMSE : 168.02964255898365
 MAPE : 11.213297372266029
