In [4]:
import numpy as np
import pandas as pd
df=pd.read_csv('processed_dataset.csv')

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

features = [
    'demand', 'demand_pos_RRP', 'RRP_positive', 'demand_neg_RRP', 
    'RRP_negative', 'frac_at_neg_RRP', 'min_temperature', 'max_temperature', 
    'solar_exposure', 'rainfall', 'school_day_N', 'school_day_Y', 
    'holiday_N', 'holiday_Y', 'year', 'month', 'day'
]

X = df[features]
y = df['RRP']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse, r2


(93.69773385871261, 0.9429684196397307)

In [5]:
importances = rf_model.feature_importances_

feature_importance_df = pd.DataFrame({'Feature': features, 'Importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

feature_importance_df

Unnamed: 0,Feature,Importance
2,RRP_positive,0.8862455
7,max_temperature,0.04362196
0,demand,0.02269742
1,demand_pos_RRP,0.01786104
8,solar_exposure,0.01353381
16,day,0.00665042
10,school_day_N,0.003864697
14,year,0.002561539
6,min_temperature,0.001913777
9,rainfall,0.000342664


Model Accuracy
An R² score of 0.943 means the model explains 94.3% of the variability in RRP (price). This indicates that the selected features (demand, weather, school days, holidays, etc.) have a strong influence on price fluctuations.

R² Score: 0.943 (closer to 1 means better accuracy)

# Key Insights from Feature Importance:
Most Influential Factor:

RRP_positive (88.6% importance) is the dominant factor affecting the predicted price. This suggests that the price in positive-demand conditions heavily influences future prices.

Weather Conditions Play a Role:

max_temperature (4.36%) and solar_exposure (1.35%) impact price variations.

min_temperature and rainfall have minimal influence, indicating that extreme weather conditions might affect price more than moderate weather changes.

Demand Affects Pricing:

demand (2.27%) and demand_pos_RRP (1.79%) suggest that demand patterns influence pricing but not as strongly as past RRP values.
