In [163]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import pandas as pd
from collections import deque

In [132]:
# Load dataset
df = pd.read_csv("charging_data.csv")

# Encode categorical features
df["Weather"] = df["Weather"].astype("category").cat.codes  # Convert Weather to numeric
# Encode categorical features
df["Weekend"] = df["Weekend"].astype("category").cat.codes  # Convert Weather to numeric

# Convert DataFrame to NumPy array
data = df.to_numpy()

In [154]:
df.head()

Unnamed: 0,Hour,Charging_Count,Weather,Weekend,Charging_Duration
0,9,77,0,0,34
1,2,30,0,1,71
2,12,45,3,0,21
3,1,38,2,0,60
4,2,46,3,0,51


In [164]:

# Hyperparameters
EPISODES = 2000
GAMMA = 0.9
EPSILON = 0.9
EPSILON_DECAY = 0.995
MIN_EPSILON = 0.01
BATCH_SIZE = 64
LEARNING_RATE = 0.001
MEMORY = deque(maxlen=10000)  # Fixed-size memory buffer


In [147]:
# Define state and action sizes
STATE_SIZE = data.shape[1]  # Number of features (Hour, Charging_Count, Weather, Weekend, Charging_Duration)
ACTION_SIZE = 2  # 0: Standard, 1: Increase

In [165]:
# Define Neural Network
class QNetwork(nn.Module):
    def __init__(self):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(STATE_SIZE, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, ACTION_SIZE)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [166]:
# Initialize Model
device = torch.device("cuda" if to
                      
                      
                      
                      
                      rch.cuda.is_available() else "cpu")
model = QNetwork().to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.MSELoss()

In [199]:
import requests

url = "http://localhost:5000/daily-count/hourly-data"
params = {"date": "2025-03-04"}

response = requests.get(url, params=params)

if response.status_code == 200:
    data = response.json()
    
    # Sort the data by charging_count in descending order and extract only hours
    top_hours = [entry["hour"] for entry in sorted(data["data"], key=lambda x: x["charging_count"], reverse=True)[:5]]

    print(top_hours)  # Output: [14, 14, 15]
else:
    print(f"Error: {response.status_code}, {response.text}")


[17, 18, 19, 17, 18]


In [225]:
def get_reward(action, hour):
    if 6 <= hour <= 10 or 17 <= hour <= 21:  # Peak hours
        return 2 if action == 1 else -1  # Reward for increasing charge
    else:
        return 1 if action == 0 else -0.5  # Reward for standard charge


In [226]:
# Training Loop
for episode in range(EPISODES):
    # Select random state
    state = df.sample(1).values[0]  # Get one random sample
    state = torch.tensor(state, dtype=torch.float32).to(device)  # Convert to tensor

    # Epsilon-greedy policy
    if random.random() < EPSILON:
        action = random.randint(0, ACTION_SIZE - 1)  # Explore
    else:
        with torch.no_grad():
            action = torch.argmax(model(state)).item()  # Exploit

    # Compute reward
    reward = get_reward(action, state[0])  # Pass only hour

    # Select next state
    next_state = df.sample(1).values[0]  
    next_state = torch.tensor(next_state, dtype=torch.float32).to(device)

    # Store experience in replay memory
    MEMORY.append((state, action, reward, next_state))

    # Training
    if len(MEMORY) > BATCH_SIZE:
        batch = random.sample(MEMORY, BATCH_SIZE)
        states, actions, rewards, next_states = zip(*batch)

        states = torch.stack(states)
        actions = torch.tensor(actions, dtype=torch.long).to(device)
        rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
        next_states = torch.stack(next_states)

        # Compute target Q-values
        target_q = rewards + GAMMA * torch.max(model(next_states), dim=1)[0]

        # Compute current Q-values
        current_q = model(states).gather(1, actions.unsqueeze(1)).squeeze()

        # Compute loss and update model
        loss = loss_fn(current_q, target_q)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Decay epsilon
    EPSILON = max(MIN_EPSILON, EPSILON * EPSILON_DECAY)

    if episode % 100 == 0:
        print(f"Episode {episode}, Loss: {loss:.4f}, Epsilon: {EPSILON:.3f}")

print("✅ RL Model Trained Successfully!")


Episode 0, Loss: 0.6469, Epsilon: 0.010
Episode 100, Loss: 0.7164, Epsilon: 0.010
Episode 200, Loss: 0.8871, Epsilon: 0.010
Episode 300, Loss: 0.6312, Epsilon: 0.010
Episode 400, Loss: 0.7405, Epsilon: 0.010
Episode 500, Loss: 0.4490, Epsilon: 0.010
Episode 600, Loss: 0.6128, Epsilon: 0.010
Episode 700, Loss: 0.6724, Epsilon: 0.010
Episode 800, Loss: 0.6135, Epsilon: 0.010
Episode 900, Loss: 0.6687, Epsilon: 0.010
Episode 1000, Loss: 0.6179, Epsilon: 0.010
Episode 1100, Loss: 0.8249, Epsilon: 0.010
Episode 1200, Loss: 0.5907, Epsilon: 0.010
Episode 1300, Loss: 0.8451, Epsilon: 0.010
Episode 1400, Loss: 0.7730, Epsilon: 0.010
Episode 1500, Loss: 0.6977, Epsilon: 0.010
Episode 1600, Loss: 0.6246, Epsilon: 0.010
Episode 1700, Loss: 0.8651, Epsilon: 0.010
Episode 1800, Loss: 0.6305, Epsilon: 0.010
Episode 1900, Loss: 0.7762, Epsilon: 0.010
✅ RL Model Trained Successfully!


In [227]:
print(df.sample(1).values[0])  # Debugging Step

[ 18 106   1   0  52]


In [228]:
df.head()

Unnamed: 0,Hour,Charging_Count,Weather,Weekend,Charging_Duration
0,9,77,0,0,34
1,2,30,0,1,71
2,12,45,3,0,21
3,1,38,2,0,60
4,2,46,3,0,51


In [229]:
test_state = (23,80,2,0,56)  # hour, minute, weather, charging_duration, peak_hour

In [230]:
reward = get_reward(0,test_state[0])  # Pass hour directly
print(f"Final Reward: {reward}")

Final Reward: 1


In [232]:
for i in range(0,23):
    test_state = torch.tensor([i,80,2,0,56], dtype=torch.float32).to(device)
    test_state = test_state.cpu().numpy() 
    reward = get_reward(1, test_state[0])  # Pass hour directly
    print(f"Final Reward: {reward}")

Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: 2
Final Reward: 2
Final Reward: 2
Final Reward: 2
Final Reward: 2
Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: -0.5
Final Reward: 2
Final Reward: 2
Final Reward: 2
Final Reward: 2
Final Reward: 2
Final Reward: -0.5


In [236]:
sample = df.head()
print(sample)  # ✅ Shows one random row

   Hour  Charging_Count  Weather  Weekend  Charging_Duration
0     9              77        0        0                 34
1     2              30        0        1                 71
2    12              45        3        0                 21
3     1              38        2        0                 60
4     2              46        3        0                 51


In [244]:
test_state = torch.tensor([10, 42,2, 1, 32], dtype=torch.float32).to(device)
test_state = test_state.cpu().numpy()  

for action in range(ACTION_SIZE):
    reward = get_reward(action, test_state[0])
    print(f"Action {action} -> Reward: {reward}")  # ✅ See reward for each action


Action 0 -> Reward: -1
Action 1 -> Reward: 2


In [270]:
test_state = torch.tensor([1, 42,  weather_encoding["Rainy"], boolean_encoding[True], 32], dtype=torch.float32).to(device)
test_state = test_state.cpu().numpy()
reward = get_reward(1, test_state[0])

In [271]:
reward

-0.5

In [272]:
if(reward>0):
    print("Increase by 1.2%")
else:
    print("Normal price")


Normal price
