In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import matplotlib.dates as mdates

# Generic input file location
input_file = './data/input/revised_synthetic_data.csv'

# Generic output folder locations
plot_folder = './output/input_data_characterization/plots/'


model_files_folder = os.path.join(output_folder, 'model_files/')
predictions_folder = os.path.join(output_folder, 'predictions/')
plots_folder = os.path.join(output_folder, 'plots/')

# Ensure folders exist or create them
os.makedirs(model_files_folder, exist_ok=True)
os.makedirs(predictions_folder, exist_ok=True)
os.makedirs(plots_folder, exist_ok=True)

# Load data
data = pd.read_csv(input_file)
data['Month'] = pd.to_datetime(data['Month'])
data.set_index('Month', inplace=True)

# Ensure the data covers the range from 2024 to 2029
data = data.asfreq('MS')  # Monthly Start frequency

# Prepare Demand data for Prophet model
demand_data = data[['Demand']].reset_index()
demand_data.columns = ['ds', 'y']  # Prophet expects 'ds' (date) and 'y' (value)

# Prepare Supply data for Prophet model
supply_data = data[['Supply']].reset_index()
supply_data.columns = ['ds', 'y']  # Prophet expects 'ds' (date) and 'y' (value)

# Create Prophet model for Demand
demand_model = Prophet()
demand_model.fit(demand_data)

# Create Prophet model for Supply
supply_model = Prophet()
supply_model.fit(supply_data)

# Predict for the full range (2024–2029)
demand_forecast = demand_model.predict(demand_data)
supply_forecast = supply_model.predict(supply_data)

# Save predictions to CSV files
demand_predictions = demand_forecast[['ds', 'yhat']]
supply_predictions = supply_forecast[['ds', 'yhat']]
demand_predictions.to_csv(f'{predictions_folder}Prophet_demand_predictions.csv', index=False)
supply_predictions.to_csv(f'{predictions_folder}Prophet_supply_predictions.csv', index=False)

# Evaluate performance metrics
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    return mae, mse, rmse, r2

demand_metrics = calculate_metrics(demand_data['y'], demand_predictions['yhat'])
supply_metrics = calculate_metrics(supply_data['y'], supply_predictions['yhat'])

performance = pd.DataFrame({
    'Metric': ['MAE', 'MSE', 'RMSE', 'R²'],
    'Demand': demand_metrics,
    'Supply': supply_metrics
})
performance.to_csv(f'{output_folder}Prophet_performance_metrics.csv', index=False)

# Generate date range for x-ticks (force a full date range to appear)
date_range = pd.date_range(start='2024-01-01', end='2029-12-01', freq='YS')

# Plot Actual vs Predicted for Demand
plt.figure(figsize=(11, 8))
plt.plot(demand_data['ds'], demand_data['y'], label='Actual Demand', color='blue', linewidth=2)
plt.plot(demand_predictions['ds'], demand_predictions['yhat'], label='Predicted Demand', color='green', linestyle='--', linewidth=2)
plt.title('Prophet: Actual vs Predicted Demand', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Demand', fontsize=32)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(date_range, date_range.year, fontsize=20)
plt.yticks(fontsize=20)
plt.grid(True)
plt.legend(fontsize=20)
plt.tight_layout()
plt.savefig(f'{plots_folder}Prophet_Actual_vs_Predicted_Demand.pdf')
plt.close()

# Plot Actual vs Predicted for Supply
plt.figure(figsize=(11, 8))
plt.plot(supply_data['ds'], supply_data['y'], label='Actual Supply', color='blue', linewidth=2)
plt.plot(supply_predictions['ds'], supply_predictions['yhat'], label='Predicted Supply', color='green', linestyle='--', linewidth=2)
plt.title('Prophet: Actual vs Predicted Supply', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Supply', fontsize=32)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(date_range, date_range.year, fontsize=20)
plt.yticks(fontsize=20)
plt.grid(True)
plt.legend(fontsize=20)
plt.tight_layout()
plt.savefig(f'{plots_folder}Prophet_Actual_vs_Predicted_Supply.pdf')
plt.close()

# Plot residuals for Demand
demand_residuals = demand_data['y'] - demand_predictions['yhat']
plt.figure(figsize=(11, 8))
plt.plot(demand_data['ds'], demand_residuals, color='red')
plt.title('Prophet: Residual Plot for Demand', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Residuals', fontsize=32)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(date_range, date_range.year, fontsize=20)
plt.yticks(fontsize=20)
plt.grid(True)
plt.tight_layout()
plt.savefig(f'{plots_folder}Prophet_Demand_Residuals.pdf')
plt.close()

# Plot residuals for Supply
supply_residuals = supply_data['y'] - supply_predictions['yhat']
plt.figure(figsize=(11, 8))
plt.plot(supply_data['ds'], supply_residuals, color='red')
plt.title('Prophet: Residual Plot for Supply', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Residuals', fontsize=32)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(date_range, date_range.year, fontsize=20)
plt.yticks(fontsize=20)  # Add font size for yticks
plt.grid(True)
plt.tight_layout()
plt.savefig(f'{plots_folder}Prophet_Supply_Residuals.pdf')
plt.close()

print("Prophet model, predictions, performance metrics, and plots saved successfully!")
