In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import pickle

# Load dataset
df = pd.read_csv('./cleaned_dataset_2.csv')

df['date'] = pd.to_datetime(df['date'], errors='coerce')

df['demand_lag_1'] = df['demand'].shift(1)
df['demand_lag_7'] = df['demand'].shift(7)
df['rrp_lag_1'] = df['rrp'].shift(1)
df['rrp_lag_7'] = df['rrp'].shift(7)
df['temp_rain_interaction'] = df['min_temperature'] * df['rainfall']
df['weekday'] = df['date'].dt.weekday
df['is_weekend'] = (df['weekday'] >= 5).astype(int)
df['extreme_weather'] = ((df['max_temperature'] > df['max_temperature'].quantile(0.95)) | 
                         (df['rainfall'] > df['rainfall'].quantile(0.95))).astype(int)

df['ds'] = df['date'].copy()
df = df.dropna(subset=['ds'])

features_to_scale = ['demand', 'demand_lag_1', 'demand_lag_7', 'rrp', 'rrp_lag_1', 'rrp_lag_7',
                     'min_temperature', 'max_temperature', 'solar_exposure',
                     'rainfall', 'temp_rain_interaction']

for col in features_to_scale:
    df[col].fillna(df[col].mean(), inplace=True)
    df[col] = df[col].rolling(window=7, min_periods=1).mean()

df[['demand_lag_1', 'demand_lag_7', 'rrp_lag_1', 'rrp_lag_7']].fillna(method='bfill', inplace=True)
df.fillna(method='ffill', inplace=True)
df.fillna(method='bfill', inplace=True)

df = df.dropna(subset=features_to_scale)

# Split Data
train_size = int(len(df) * 0.8)
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:]

# Scale only on training set to prevent data leakage
scaler = StandardScaler()
train_df[features_to_scale] = scaler.fit_transform(train_df[features_to_scale])
test_df[features_to_scale] = scaler.transform(test_df[features_to_scale])

# Save scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Prepare data for Prophet
regressor_cols = ['demand_lag_1', 'demand_lag_7', 'rrp_lag_1', 'rrp_lag_7',
                  'min_temperature', 'max_temperature', 'solar_exposure',
                  'rainfall', 'temp_rain_interaction', 'is_weekend', 'extreme_weather']

train_df_demand = train_df[['ds', 'demand'] + regressor_cols].rename(columns={'demand': 'y'})
train_df_rrp = train_df[['ds', 'rrp'] + regressor_cols].rename(columns={'rrp': 'y'})

# Train Prophet Models
model_demand = Prophet()
model_rrp = Prophet()
for col in regressor_cols:
    model_demand.add_regressor(col)
    model_rrp.add_regressor(col)

model_demand.add_country_holidays(country_name='Australia')
model_rrp.add_country_holidays(country_name='Australia')

model_demand.fit(train_df_demand)
model_rrp.fit(train_df_rrp)

# Save models
with open('model_demand.pkl', 'wb') as f:
    pickle.dump(model_demand, f)

with open('model_rrp.pkl', 'wb') as f:
    pickle.dump(model_rrp, f)

# Cross-validation
df_cv = cross_validation(model_demand, horizon='30 days', period='15 days', initial='365 days')
df_p = performance_metrics(df_cv)
print(df_p[['rmse', 'mae', 'mape']])


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
  df[['demand_lag_1', 'demand_lag_7', 'rrp_lag_1', 'rrp_lag_7']].fillna(method='bfill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['demand_lag_1', 'demand_lag_7', 'rrp_lag_1', 'rrp_lag_7']].fillna(method='bfill', inplace=True)
  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using 

  0%|          | 0/86 [00:00<?, ?it/s]

18:11:48 - cmdstanpy - INFO - Chain [1] start processing
18:11:48 - cmdstanpy - INFO - Chain [1] done processing
18:11:48 - cmdstanpy - INFO - Chain [1] start processing
18:11:48 - cmdstanpy - INFO - Chain [1] done processing
18:11:49 - cmdstanpy - INFO - Chain [1] start processing
18:11:49 - cmdstanpy - INFO - Chain [1] done processing
18:11:49 - cmdstanpy - INFO - Chain [1] start processing
18:11:49 - cmdstanpy - INFO - Chain [1] done processing
18:11:49 - cmdstanpy - INFO - Chain [1] start processing
18:11:50 - cmdstanpy - INFO - Chain [1] done processing
18:11:50 - cmdstanpy - INFO - Chain [1] start processing
18:11:50 - cmdstanpy - INFO - Chain [1] done processing
18:11:50 - cmdstanpy - INFO - Chain [1] start processing
18:11:51 - cmdstanpy - INFO - Chain [1] done processing
18:11:51 - cmdstanpy - INFO - Chain [1] start processing
18:11:51 - cmdstanpy - INFO - Chain [1] done processing
18:11:51 - cmdstanpy - INFO - Chain [1] start processing
18:11:51 - cmdstanpy - INFO - Chain [1]

        rmse       mae      mape
0   0.172469  0.132882  1.192636
1   0.174212  0.131161  1.093114
2   0.174838  0.127396  1.052037
3   0.173644  0.120953  0.500268
4   0.172612  0.122549  0.529253
5   0.176699  0.130019  0.563962
6   0.167882  0.127328  0.623741
7   0.173680  0.131694  0.641967
8   0.172802  0.127740  0.662701
9   0.176647  0.131419  0.761197
10  0.164719  0.123712  0.650280
11  0.158926  0.123172  0.568208
12  0.156576  0.121486  0.370218
13  0.166723  0.128484  0.597907
14  0.168270  0.129336  0.658589
15  0.174244  0.135981  1.582618
16  0.171053  0.130725  1.404575
17  0.171600  0.127286  1.393822
18  0.173175  0.122341  0.559800
19  0.174228  0.124628  0.594543
20  0.180376  0.133088  0.639654
21  0.172662  0.131419  0.731600
22  0.178786  0.136960  0.759125
23  0.179553  0.134234  0.737736
24  0.184686  0.138495  0.788528
25  0.172854  0.129943  0.668454
26  0.163025  0.126674  0.567995
