In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Input file location
input_file = './data/input/revised_synthetic_data.csv'

# Output folder locations
output_folder = './output/input_data_characterization/plots/'

model_files_folder = os.path.join(output_folder, 'model_files/')
predictions_folder = os.path.join(output_folder, 'predictions/')
plots_folder = os.path.join(output_folder, 'plots/')

# Ensure folders exist or create them
os.makedirs(model_files_folder, exist_ok=True)
os.makedirs(predictions_folder, exist_ok=True)
os.makedirs(plots_folder, exist_ok=True)

# Load data and define frequency explicitly to avoid warnings
data = pd.read_csv(input_file)
data['Month'] = pd.to_datetime(data['Month'])
data.set_index('Month', inplace=True)
data = data.asfreq('MS')  # Monthly Start frequency

# Select variables to model
demand_data = data['Demand']
supply_data = data['Supply']

# Fit ARIMA models for Demand and Supply
demand_model = ARIMA(demand_data, order=(5, 1, 0))
demand_fit = demand_model.fit()

supply_model = ARIMA(supply_data, order=(5, 1, 0))
supply_fit = supply_model.fit()

# Save model files
demand_fit.save(f'{model_files_folder}ARIMA_demand_model.pkl')
supply_fit.save(f'{model_files_folder}ARIMA_supply_model.pkl')

# Make predictions
demand_predictions = demand_fit.predict(start=1, end=len(demand_data), typ='levels')
supply_predictions = supply_fit.predict(start=1, end=len(supply_data), typ='levels')

# Ensure predictions are properly aligned
demand_predictions = demand_predictions[:-1]
supply_predictions = supply_predictions[:-1]

# Save predictions
pd.DataFrame(demand_predictions, columns=['Predicted_Demand']).to_csv(f'{predictions_folder}ARIMA_demand_predictions.csv')
pd.DataFrame(supply_predictions, columns=['Predicted_Supply']).to_csv(f'{predictions_folder}ARIMA_supply_predictions.csv')

# Evaluate performance
demand_mae = mean_absolute_error(demand_data[1:], demand_predictions)
demand_mse = mean_squared_error(demand_data[1:], demand_predictions)
demand_rmse = np.sqrt(demand_mse)
demand_r2 = r2_score(demand_data[1:], demand_predictions)

supply_mae = mean_absolute_error(supply_data[1:], supply_predictions)
supply_mse = mean_squared_error(supply_data[1:], supply_predictions)
supply_rmse = np.sqrt(supply_mse)
supply_r2 = r2_score(supply_data[1:], supply_predictions)

# Save performance metrics to CSV
performance = pd.DataFrame({
    'Metric': ['MAE', 'MSE', 'RMSE', 'R²'],
    'Demand': [demand_mae, demand_mse, demand_rmse, demand_r2],
    'Supply': [supply_mae, supply_mse, supply_rmse, supply_r2]
})
performance.to_csv(f'{output_folder}ARIMA_performance_metrics.csv', index=False)

# Plot residuals for Demand
demand_residuals = demand_data[1:] - demand_predictions
plt.figure(figsize=(11, 8))
plt.plot(demand_residuals)
plt.title('ARIMA: Residuals Plot for Demand', fontsize=25)
plt.xlabel('Date', fontsize=36)
plt.ylabel('Residuals', fontsize=36)
plt.grid(True)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.savefig(f'{plots_folder}ARIMA_demand_residuals.pdf', format='pdf')
plt.close()

# Plot residuals for Supply
supply_residuals = supply_data[1:] - supply_predictions
plt.figure(figsize=(11, 8))
plt.plot(supply_residuals)
plt.title('ARIMA: Residuals Plot for Supply', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Residuals', fontsize=32)
plt.grid(True)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.savefig(f'{plots_folder}ARIMA_supply_residuals.pdf', format='pdf')
plt.close()

# Actual vs Predicted for Demand
plt.figure(figsize=(11, 8))
plt.plot(demand_data[1:], label='Actual Demand', color='blue')
plt.plot(demand_predictions, label='Predicted Demand', color='orange')
plt.title('ARIMA: Actual vs Predicted Demand', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Demand', fontsize=32)
plt.legend(fontsize=20)
plt.grid(True)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.savefig(f'{plots_folder}ARIMA_demand_actual_vs_predicted.pdf', format='pdf')
plt.close()

# Actual vs Predicted for Supply
plt.figure(figsize=(11, 8))
plt.plot(supply_data[1:], label='Actual Supply', color='blue')
plt.plot(supply_predictions, label='Predicted Supply', color='orange')
plt.title('ARIMA: Actual vs Predicted Supply', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Supply', fontsize=32)
plt.legend(fontsize=20)
plt.grid(True)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.savefig(f'{plots_folder}ARIMA_supply_actual_vs_predicted.pdf', format='pdf')
plt.close()

print("Model, predictions, performance metrics, and plots saved successfully!")
