In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.dates as mdates

# Folder Structure Setup
base_folder = './output/input_data_characterization/'

folders = ['model_files', 'predictions', 'plots']
for folder in folders:
    os.makedirs(os.path.join(base_folder, folder), exist_ok=True)

# Load the dataset
data = pd.read_csv('./data/input/revised_synthetic_data.csv')

# Normalize the data for Q-Learning
def normalize_data(df):
    return (df - df.min()) / (df.max() - df.min())

# Corrected column names for normalization
data['Normalized_Demand'] = normalize_data(data['Demand'])
data['Normalized_Supply'] = normalize_data(data['Supply'])
data['Normalized_Economic_Growth_Rate'] = normalize_data(data['Economic_Growth_Rate'])
data['Normalized_Waste_Generation'] = normalize_data(data['Waste_Generation'])
data['Normalized_Energy_Prices'] = normalize_data(data['Energy_Prices'])

# Initialize Q-Learning parameters
state_columns = ['Normalized_Demand', 'Normalized_Supply', 'Normalized_Economic_Growth_Rate', 'Normalized_Waste_Generation', 'Normalized_Energy_Prices']
action_column = 'Pricing'

# Increase Q-table size for higher granularity in state space (10 bins)
state_space_size = (10,) * len(state_columns)  # Increased to 10 bins per state
action_space_size = 10  # Increase action space for more pricing options

# Create the Q-table
Q_table = np.zeros(state_space_size + (action_space_size,))

# Define hyperparameters
learning_rate = 0.05  # Tuned learning rate
discount_factor = 0.9  # Tuned discount factor for balancing future rewards
exploration_rate = 1.0
exploration_decay = 0.995  # Slower decay to allow more exploration
min_exploration_rate = 0.1
episodes = 1000  # Increase training duration to 1000 episodes

# Discretize the state space into 10 bins (increased granularity)
def discretize_state(row, state_columns, state_space_size):
    state = []
    for col in state_columns:
        value = row[col]
        state_value = np.digitize(value, np.linspace(0, 1, state_space_size[0] + 1)) - 1
        state_value = min(state_value, state_space_size[0] - 1)  # Ensure within bounds
        state.append(state_value)
    return tuple(state)

# Get the action based on the current state using epsilon-greedy strategy
def choose_action(state):
    if random.uniform(0, 1) < exploration_rate:
        return random.randint(0, action_space_size - 1)  # Random action
    else:
        return np.argmax(Q_table[state])  # Best action

# Improved reward function
def calculate_reward(row):
    demand_diff = abs(row['Demand'] - row['Pricing'])
    supply_diff = abs(row['Supply'] - row['Pricing'])
    # Penalize more when pricing deviates significantly from both demand and supply
    return - ((demand_diff + supply_diff) ** 2)  # Square differences to penalize large deviations

# Update Q-values
def update_q_table(state, action, reward, next_state):
    best_next_action = np.argmax(Q_table[next_state])
    td_target = reward + discount_factor * Q_table[next_state + (best_next_action,)]
    Q_table[state + (action,)] += learning_rate * (td_target - Q_table[state + (action,)])

# Training the Q-learning model
for episode in range(episodes):
    print(f"Episode {episode+1}/{episodes}")
    for index, row in data.iterrows():
        state = discretize_state(row, state_columns, state_space_size)
        action = choose_action(state)
        reward = calculate_reward(row)

        if index + 1 < len(data):
            next_row = data.iloc[index + 1]
            next_state = discretize_state(next_row, state_columns, state_space_size)
            update_q_table(state, action, reward, next_state)

    # Reduce exploration rate
    exploration_rate = max(min_exploration_rate, exploration_rate * exploration_decay)

# Save the Q-table
np.save(os.path.join(base_folder, 'model_files', 'Q_table.npy'), Q_table)

# Extract optimized pricing actions from the Q-table
data['Optimized_Pricing'] = data.apply(lambda row: np.argmax(Q_table[discretize_state(row, state_columns, state_space_size)]), axis=1)

# Save the predictions to CSV
data[['Month', 'Pricing', 'Optimized_Pricing']].to_csv(os.path.join(base_folder, 'predictions', 'Q_Learning_Pricing_Optimization.csv'), index=False)

# Performance metrics calculation
mae = mean_absolute_error(data['Pricing'], data['Optimized_Pricing'])
mse = mean_squared_error(data['Pricing'], data['Optimized_Pricing'])
rmse = np.sqrt(mse)
r2 = r2_score(data['Pricing'], data['Optimized_Pricing'])

performance_df = pd.DataFrame({
    'MAE': [mae],
    'MSE': [mse],
    'RMSE': [rmse],
    'R²': [r2]
})

# Save performance metrics to a file
performance_df.to_csv(os.path.join(base_folder, 'predictions', 'Q_Learning_Performance_Metrics.csv'), index=False)

# Resampling the data for plotting
data['Month'] = pd.to_datetime(data['Month'])
data.set_index('Month', inplace=True)
data_resampled = data.resample('3M').mean()

# Plot Pricing Optimization (following DQN-style plotting)
plt.figure(figsize=(11, 8))
plt.plot(data_resampled.index, data_resampled['Pricing'], label='Original Pricing', color='blue', linewidth=2)
plt.plot(data_resampled.index, data_resampled['Optimized_Pricing'], label='Optimized Pricing', color='green', linestyle='--', linewidth=2)
plt.title("Q-Learning: Actual vs Optimized Pricing", fontsize=25)
plt.xlabel("Date", fontsize=32)
plt.ylabel("Pricing", fontsize=32)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.grid(True)

# Set the x-tick values (Yearly)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Save the plot
plt.savefig(os.path.join(base_folder, 'plots', 'Q_Learning_Actual_vs_Optimized_Pricing.pdf'), format='pdf')
plt.close()

# Plot Pricing Residuals (following DQN-style plotting)
residuals = data_resampled['Pricing'] - data_resampled['Optimized_Pricing']
plt.figure(figsize=(11, 8))
plt.plot(data_resampled.index, residuals, label='Residuals', color='red', linewidth=2)
plt.title("Q-Learning: Pricing Residuals", fontsize=25)
plt.xlabel("Date", fontsize=32)
plt.ylabel("Residuals", fontsize=32)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.grid(True)

# Set the x-tick values (Yearly)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Save the residual plot
plt.savefig(os.path.join(base_folder, 'plots', 'Q_Learning_Pricing_Residuals.pdf'), format='pdf')
plt.close()

print(f"Performance Metrics:\nMAE: {mae}\nMSE: {mse}\nRMSE: {rmse}\nR²: {r2}")
print("Q-Learning Pricing Optimization and Performance Evaluation Completed Successfully!")
