In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import random
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.dates as mdates

# Folder Structure Setup
base_folder = './output/input_data_characterization/'

folders = ['model_files', 'predictions', 'plots']
for folder in folders:
    os.makedirs(os.path.join(base_folder, folder), exist_ok=True)

# Load the dataset
data = pd.read_csv('./data/input/revised_synthetic_data.csv')

# Normalize the data for DQN
def normalize_data(df):
    return (df - df.min()) / (df.max() - df.min())

# Corrected column names for normalization
data['Normalized_Demand'] = normalize_data(data['Demand'])
data['Normalized_Supply'] = normalize_data(data['Supply'])
data['Normalized_Economic_Growth_Rate'] = normalize_data(data['Economic_Growth_Rate'])
data['Normalized_Waste_Generation'] = normalize_data(data['Waste_Generation'])
data['Normalized_Energy_Prices'] = normalize_data(data['Energy_Prices'])

# State columns and action column
state_columns = ['Normalized_Demand', 'Normalized_Supply', 'Normalized_Economic_Growth_Rate', 'Normalized_Waste_Generation', 'Normalized_Energy_Prices']
action_column = 'Pricing'

# Define Q-learning and DQN parameters
state_space_size = (3,) * len(state_columns)  # Reduce state space size for efficiency
action_space_size = 3  # Reduce action space size for efficiency
learning_rate = 0.001
discount_factor = 0.95
episodes = 10  # Increased to a reasonable number of episodes

# Function to discretize the state space
def discretize_state(row, state_columns, state_space_size):
    state = []
    for col in state_columns:
        value = row[col]
        state_value = np.digitize(value, np.linspace(0, 1, state_space_size[0] + 1)) - 1
        state_value = min(state_value, state_space_size[0] - 1)
        state.append(state_value)
    return tuple(state)

# Build DQN model with reduced layers and neurons
def build_dqn_model():
    model = tf.keras.Sequential()
    model.add(layers.Input(shape=(len(state_columns),)))
    model.add(layers.Dense(64, activation='relu'))  # Moderate neurons
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(action_space_size, activation='linear'))
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
    return model

# Initialize DQN model
model = build_dqn_model()
target_model = build_dqn_model()  # Target network for stability
target_model.set_weights(model.get_weights())  # Sync with main model

# Experience replay buffer
buffer = []
buffer_capacity = 100  # Larger buffer
batch_size = 16  # Moderate batch size for stable training

# Function to choose action using epsilon-greedy strategy
def choose_action(state, exploration_rate):
    if random.uniform(0, 1) < exploration_rate:
        return random.randint(0, action_space_size - 1)  # Random action
    else:
        q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
        return np.argmax(q_values)

# Function to update the model using experience replay
def train_model():
    if len(buffer) < batch_size:
        return
    mini_batch = random.sample(buffer, batch_size)
    for state, action, reward, next_state, done in mini_batch:
        target = reward
        if not done:
            target_q = target_model.predict(np.expand_dims(next_state, axis=0), verbose=0)
            target = reward + discount_factor * np.max(target_q)
        q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
        q_values[0][action] = target
        model.fit(np.expand_dims(state, axis=0), q_values, epochs=1, verbose=0)

# Function to update the target network
def update_target_model():
    target_model.set_weights(model.get_weights())

# Training the DQN model
exploration_rate = 1.0
exploration_decay = 0.9  # Faster decay
min_exploration_rate = 0.05  # Reduce min exploration
sync_steps = 2  # Sync target model every 2 episodes

for episode in range(episodes):
    print(f"Episode {episode+1}/{episodes}")
    for index, row in data.iterrows():
        if index > 50:  # Limiting number of steps inside the episode to avoid long processing
            break
        
        state = discretize_state(row, state_columns, state_space_size)
        action = choose_action(state, exploration_rate)
        reward = -abs(row[action_column] - row['Demand'])  # Reward function based on pricing difference
        
        done = index + 1 >= len(data)  # Episode ends at last data point
        if not done:
            next_row = data.iloc[index + 1]
            next_state = discretize_state(next_row, state_columns, state_space_size)
        else:
            next_state = None
        
        # Store experience in buffer
        buffer.append((state, action, reward, next_state, done))
        if len(buffer) > buffer_capacity:
            buffer.pop(0)
        
        # Train the model
        train_model()

    # Update exploration rate
    exploration_rate = max(min_exploration_rate, exploration_rate * exploration_decay)
    
    # Sync target network
    if episode % sync_steps == 0:
        update_target_model()

# Save model and predictions
model.save(os.path.join(base_folder, 'model_files', 'DQN_model.h5'))

# Extract optimized pricing actions
data['Optimized_Pricing'] = data.apply(lambda row: np.argmax(model.predict(np.expand_dims(discretize_state(row, state_columns, state_space_size), axis=0), verbose=0)), axis=1)

# Save predictions
data[['Month', 'Pricing', 'Optimized_Pricing']].to_csv(os.path.join(base_folder, 'predictions', 'DQN_Pricing_Optimization.csv'), index=False)

# Calculate performance metrics
mae = mean_absolute_error(data['Pricing'], data['Optimized_Pricing'])
mse = mean_squared_error(data['Pricing'], data['Optimized_Pricing'])
rmse = np.sqrt(mse)
r2 = r2_score(data['Pricing'], data['Optimized_Pricing'])

# Save performance metrics
performance_metrics = pd.DataFrame({
    'MAE': [mae],
    'MSE': [mse],
    'RMSE': [rmse],
    'R²': [r2]
})
performance_metrics.to_csv(os.path.join(base_folder, 'predictions', 'DQN_performance_metrics.csv'), index=False)

# Ensure 'Month' is the DatetimeIndex before resampling
if 'Month' in data.columns:
    data['Month'] = pd.to_datetime(data['Month'])
    data.set_index('Month', inplace=True)

# Resampling to reduce granularity for plotting
data_resampled = data.resample('3M').mean()

# Plot Pricing Optimization
plt.figure(figsize=(11, 8))
plt.plot(data_resampled.index, data_resampled['Pricing'], label='Original Pricing', color='blue', linewidth=2)
plt.plot(data_resampled.index, data_resampled['Optimized_Pricing'], label='Optimized Pricing', color='green', linestyle='--', linewidth=2)
plt.title("DQN: Actual vs Optimized Pricing", fontsize=25)
plt.xlabel("Date", fontsize=32)
plt.ylabel("Pricing", fontsize=32)
plt.legend(fontsize=20)
plt.grid(True)

# Set the x-tick values (Yearly)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Save the plot
plt.savefig(os.path.join(base_folder, 'plots', 'DQN_Actual_vs_Optimized_Pricing.pdf'), format='pdf')
plt.close()

# Plot Pricing Residuals
residuals = data_resampled['Pricing'] - data_resampled['Optimized_Pricing']
plt.figure(figsize=(11, 8))
plt.plot(data_resampled.index, residuals, label='Residuals', color='red', linewidth=2)
plt.title("DQN: Pricing Residuals", fontsize=25)
plt.xlabel("Date", fontsize=32)
plt.ylabel("Residuals", fontsize=32)
plt.legend(fontsize=20)
plt.grid(True)

# Set the x-tick values (Yearly)
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

# Save the residual plot
plt.savefig(os.path.join(base_folder, 'plots', 'DQN_Pricing_Residuals.pdf'), format='pdf')
plt.close()

print(f"Performance Metrics:\nMAE: {mae}\nMSE: {mse}\nRMSE: {rmse}\nR²: {r2}")
print("DQN Pricing Optimization and Performance Evaluation Completed Successfully!")
