In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

In [None]:
# Prepare weekly data
combined_df['week_start'] = combined_df['date'] - pd.to_timedelta(combined_df['date'].dt.dayofweek, unit='d')
weekly_df = combined_df.groupby('week_start').agg({
    'value': 'mean',
    **{col: 'first' for col in combined_df.columns if col not in ['value', 'date', 'week_start']},
}).reset_index()

In [None]:
# Train-test split
cutoff_date = '2023-01-01'
train_df = weekly_df[weekly_df['week_start'] < cutoff_date]
eval_df = weekly_df[weekly_df['week_start'] >= cutoff_date]
X_train = train_df.drop(columns=['value', 'week_start'])
y_train = train_df['value']
X_eval = eval_df.drop(columns=['value', 'week_start'])
y_eval = eval_df['value']

In [None]:
# Train the model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [None]:
# Make predictions
predictions = rf_model.predict(X_eval)

In [None]:
# Calculate prediction intervals
pred_std = np.std([tree.predict(X_eval) for tree in rf_model.estimators_], axis=0)
conf_interval = 1.96 * pred_std  # 95% confidence interval
lower_bound = predictions - conf_interval
upper_bound = predictions + conf_interval


In [None]:
# Save data for evaluation
eval_df['predictions'] = predictions
eval_df['lower_bound'] = lower_bound
eval_df['upper_bound'] = upper_bound
eval_df['actual'] = y_eval.values
eval_df.to_csv('predictions_with_actuals.csv', index=False)

print("Weekly predictions saved for evaluation.")