In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Generic input file location
input_file = './data/input/revised_synthetic_data.csv'

# Generic output folder locations
plot_folder = './output/input_data_characterization/plots/'


plots_folder = os.path.join(output_folder, 'plots')

# Ensure folders exist
os.makedirs(plots_folder, exist_ok=True)

# Load data
data = pd.read_csv(input_data_path)

# Convert 'Month' column to datetime and set as index
data['Month'] = pd.to_datetime(data['Month'])
data.set_index('Month', inplace=True)

# Ensure the data frequency is monthly and covers the date range from 2024 to 2029
data = data.asfreq('MS')

# Feature scaling for the independent variables
scaler_features = MinMaxScaler()
scaled_features = scaler_features.fit_transform(data[['Waste_Generation', 'Pricing', 'Economic_Growth_Rate', 'Resource_Availability', 'Energy_Prices']])

# Scale the target variables (Demand and Supply) separately
scaler_demand = MinMaxScaler()
scaled_demand = scaler_demand.fit_transform(data[['Demand']])

scaler_supply = MinMaxScaler()
scaled_supply = scaler_supply.fit_transform(data[['Supply']])

# Combine scaled features and targets
X = scaled_features
y_demand = scaled_demand
y_supply = scaled_supply

# Train-test split without shuffling to preserve time order
X_train, X_test, y_train_demand, y_test_demand = train_test_split(X, y_demand, test_size=0.2, shuffle=False)
_, _, y_train_supply, y_test_supply = train_test_split(X, y_supply, test_size=0.2, shuffle=False)

# Capture full range of indices manually for plotting
time_index = data.index

# Build Neural Network for Demand Prediction
model_demand = Sequential()
model_demand.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model_demand.add(Dense(64, activation='relu'))
model_demand.add(Dense(1))

model_demand.compile(optimizer='adam', loss='mean_squared_error')
model_demand.fit(X_train, y_train_demand, validation_data=(X_test, y_test_demand), epochs=100, batch_size=32)

# Build Neural Network for Supply Prediction
model_supply = Sequential()
model_supply.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model_supply.add(Dense(64, activation='relu'))
model_supply.add(Dense(1))

model_supply.compile(optimizer='adam', loss='mean_squared_error')
model_supply.fit(X_train, y_train_supply, validation_data=(X_test, y_test_supply), epochs=100, batch_size=32)

# Predictions (on the entire dataset for consistent predictions)
predicted_demand = model_demand.predict(X)
predicted_supply = model_supply.predict(X)

# Inverse scaling to return predictions to original scale
predicted_demand_original = scaler_demand.inverse_transform(predicted_demand)
predicted_supply_original = scaler_supply.inverse_transform(predicted_supply)

# Inverse scaling for the actual values
y_demand_original = scaler_demand.inverse_transform(y_demand)
y_supply_original = scaler_supply.inverse_transform(y_supply)

# Generate date range for x-ticks (the entire range from 2024 to 2029)
date_range = pd.date_range(start='2024-01-01', end='2029-12-01', freq='YS')

# Plot Actual vs Predicted for Supply (2024-2029)
plt.figure(figsize=(11, 8))
plt.plot(time_index, y_supply_original, label='Actual Supply', color='blue')
plt.plot(time_index, predicted_supply_original, label='Predicted Supply', color='green', linestyle='--')
plt.title('Neural Network: Actual vs Predicted Supply', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Supply', fontsize=32)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.grid(True)

# Explicitly set the x-tick values
plt.gca().xaxis.set_major_locator(mdates.YearLocator())  # Set major ticks at year intervals
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format x-axis as years
plt.xticks(date_range, date_range.year, fontsize=20)
plt.yticks(fontsize=20)

# Save the plot to the correct folder
plt.savefig(os.path.join(plots_folder, 'NN_supply_actual_vs_predicted_full.pdf'), format='pdf')
plt.close()

# Plot Actual vs Predicted for Demand (2024-2029)
plt.figure(figsize=(11, 8))
plt.plot(time_index, y_demand_original, label='Actual Demand', color='blue')
plt.plot(time_index, predicted_demand_original, label='Predicted Demand', color='green', linestyle='--')
plt.title('Neural Network: Actual vs Predicted Demand', fontsize=25)
plt.xlabel('Date', fontsize=32)
plt.ylabel('Demand', fontsize=32)
plt.legend(fontsize=20)
plt.grid(True)

# Set x-tick values for demand
plt.gca().xaxis.set_major_locator(mdates.YearLocator())  # Set major ticks at year intervals
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format x-axis as years
plt.xticks(date_range, date_range.year, fontsize=20)
plt.yticks(fontsize=20)

# Save the plot to the correct folder
plt.savefig(os.path.join(plots_folder, 'NN_demand_actual_vs_predicted_full.pdf'), format='pdf')
plt.close()

# Plot residuals for Demand and Supply
def plot_residuals(time_index, actual, predicted, title, y_label, file_name):
    residuals = actual - predicted
    plt.figure(figsize=(11, 8))
    plt.plot(time_index, residuals, label='Residuals', color='purple')
    plt.title(f'Neural Network: Residual Plot for {title}', fontsize=25)
    plt.xlabel('Date', fontsize=32)
    plt.ylabel(y_label, fontsize=32)
    plt.grid(True)
    
    # Explicitly set the x-tick values
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())  # Set major ticks at year intervals
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format x-axis as years
    plt.xticks(date_range, date_range.year, fontsize=20)
    plt.yticks(fontsize=20)

    # Save the plot to the correct folder
    plt.savefig(os.path.join(plots_folder, file_name), format='pdf')
    plt.close()

# Calculate residuals and plot
# For Demand
plot_residuals(time_index, y_demand_original, predicted_demand_original, 'Demand', 'Residuals', 'NN_Demand_Residuals.pdf')

# For Supply
plot_residuals(time_index, y_supply_original, predicted_supply_original, 'Supply', 'Residuals', 'NN_Supply_Residuals.pdf')

print("Neural Network plots and metrics saved successfully!")
