# Environment Setup

Goal: Use DQN model to setup optimal temperature for a room <br>

Factors affecting initial temperature: <br>
<ol>
    <li> Room booking status </li>
    <li> Outside Temp</li>
    <li> Day </li>
    <li> Time </li>
</ol>

Action taken by DQN: <br>
<ol>
    <li> Increase Temperature </li>
    <li> Decrease Temperature   </li>
    <li> Maintain Temperature </li>
</ol>

Rewards for DQN : <br>
<ol>
    <li> Energy Consumption Levels</li>
    <li> User Comfort</li>

In [30]:
import gym
import math
import numpy as np
import matplotlib.pyplot as plt

In [31]:
# Machine Learning to predict energy consumption

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
training_data = pd.read_excel('output.xlsx')
# for day data change mon to 1, tue to 2 etc

training_data['day_'] = training_data['day_'].replace(['Mon','Tue','Wed','Thu','Fri','Sat','Sun'],[1,2,3,4,5,6,7])
features = ['day_', 'time_', 'outside_temp', 'inside_temp', 'booking_status']
X = training_data[features]
y = training_data['energy_consumption']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Random forest model to predict energy consumption

rf_model = RandomForestRegressor(n_estimators=200, random_state= 50) # 200 trees, random_state for reproducibility
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# percentage error
percentage_error = (np.mean(np.abs((y_test - y_pred) / y_test)) * 100)
print(f"Percentage Error: {percentage_error}")

Mean Squared Error: 591.7480183967483
Percentage Error: 0.5739767899029191


In [32]:
# Equation to calculate energy consumption
def energy_consumption(day, time, outside_temp, inside_temp, booking_status, change_in_temp):
    day = day
    time = time
    outside_temp = outside_temp
    inside_temp = inside_temp
    booking_status = booking_status
    change_in_temp = change_in_temp
    energy_consumption = rf_model.predict([[day, time, outside_temp, inside_temp, booking_status]])
    return energy_consumption



In [33]:
import gym
import numpy as np
from gym import spaces
class ThermostatEnvironment(gym.Env):
    def __init__(self):
        super(ThermostatEnvironment, self).__init__()
        self.day = 1
        self.time = 1
        self.booking_status = 0
        self.outside_temp = 30
        self.inside_temp = 18 # what room temperature should be
        self.action_space = spaces.Discrete(3) #increase, decrease, maintain
        self.observation_space = spaces.Box(low=np.array([1,1,0,23,18]), high=np.array([5,24,1,40,25]), dtype=np.float32)
        self.temperature = 18 # what room temperature is
        self.max_temp = 25
        self.min_temp = 18
        self.reward = 0
        self.done = False
    def step(self, action):
        # action logic
        if action == 0: # decrease, too hot
            self.temperature -= 1
        elif action == 2: #increase, too cold
            self.temperature += 1
        else: #maintain
            pass

        # ensure temperature is within bounds
        self.temperature = np.clip(self.temperature, self.min_temp, self.max_temp)

        # calculate energy consumption
        energy = energy_consumption(self.day, self.time, self.outside_temp, self.inside_temp, self.booking_status, self.temperature - self.inside_temp)

        # calculate reward
        # reward = 0.6* energy reward + 0.4* comfort reward
        # energy reward
        energy_reward = - (energy - 3200)/3200 # 3200 is the average energy consumption
        print(f"Energy Reward: {energy_reward}")

        # comfort reward
        # reward if user does not change temperature
        change_in_temp = self.temperature - self.inside_temp
        if change_in_temp == 0:
            comfort_reward = 1
        if change_in_temp > 0:
            comfort_reward = -0
        if change_in_temp < 0:
            comfort_reward = -1
        print(f"Comfort Reward: {comfort_reward}")

        reward = 0.6* energy_reward + 0.4* comfort_reward
        print(f"Total Reward: {reward}")
        self.reward += reward
        self.done = True

        return np.array([self.day, self.time, self.booking_status, self.outside_temp, self.inside_temp]), reward, self.done, {}
    
    def reset(self):
        self.day = 1
        self.time = 1
        self.booking_status = 0
        self.outside_temp = 30
        self.inside_temp = 18
        self.temperature = 18
        self.reward = 0
        self.done = False
        return np.array([self.day, self.time, self.booking_status, self.outside_temp, self.inside_temp])
    
    def render(self):
        pass
    
        


# Q Network

In [34]:
# Neural Network
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import copy

class DQN(nn.Module):
    def __init__(self, input_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_size, 25)  # 5 input features, 1 output features
        self.fc2 = nn.Linear(25,25)
        self.fc3 = nn.Linear(25, action_size) # 10 input features, 2 output features
    
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

In [35]:
# memory
class ReplayMemory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
    
    def push(self, state, action, next_state, reward):
        if len(self.memory) < self.capacity:
            self.memory.append((state, action, next_state, reward))
        else:
            self.memory.pop(0)
            self.memory.append((state, action, next_state, reward))
    
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)

# Training

In [36]:
# Train the model

# Hyperparameters
learning_rate = 0.001
gamma = 0.9
buffer_limit = 50000
batch_size = 32
tau = 0.01
input_size = 5
action_size = 4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the environment
env = ThermostatEnvironment()
env.reset()

# Initialize the model
model = DQN(input_size, action_size).to(device)
target_model = copy.deepcopy(model)
target_model.load_state_dict(model.state_dict())
target_model.eval()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Initialize the replay buffer
replay_buffer = []
replay_buffer_size = 0

# Initialize the training parameters
if torch.cuda.is_available():
    print('Using GPU')
    num_episodes = 500
else:
    print('Using CPU')
    num_episodes = 100
epsilon = 0.1
epsilon_decay = 0.99
epsilon_min = 0.01
update_every = 10
update_count = 0
loss_fn = nn.MSELoss()


# Get number of actions from gym action space
n_actions = env.action_space.n

# Get number of states from gym observation space
state,info = env.reset(),{}
n_states = len(state)

policy_net = DQN(n_states, n_actions).to(device)
target_net = DQN(n_states, n_actions).to(device)
target_net.load_state_dict(policy_net.state_dict())

optimizer = optim.Adam(policy_net.parameters(), lr=0.01)
memory = ReplayMemory(10000)

def select_action(state, epsilon):
    global steps_done
    sample = random.random()
    eps_threshold = epsilon
    if sample > eps_threshold:
        with torch.no_grad():
            return policy_net(state).max(1)[1].view(1, 1)
    else:
        return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long)

episodes_duration = []

def optimize_model():
    if len(memory) < batch_size:
        return
    transitions = memory.sample(batch_size)
    batch = memory.sample(batch_size)

    state_batch, action_batch, next_state_batch, reward_batch = zip(*batch)
    state_batch = torch.cat(state_batch)
    next_state_batch = torch.cat(next_state_batch)
    reward_batch = torch.cat(reward_batch)
    action_batch = torch.tensor(action_batch).to(device)

    state_action_values = policy_net(state_batch).gather(1, action_batch)
    next_state_values = target_net(next_state_batch).max(1)[0].detach()
    expected_state_action_values = (next_state_values * gamma) + reward_batch

    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))


    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss




Using CPU


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [37]:
# Training loop

for episode in range(num_episodes):
    state = env.reset()
    state = torch.tensor([state], device=device, dtype=torch.float32)
    done = False
    total_reward = 0
    steps = 0
    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, _ = env.step(action.item())
        next_state = torch.tensor([next_state], device=device, dtype=torch.float32)
        reward = torch.tensor([reward], device=device, dtype=torch.float32)
        memory.push(state, action, next_state, reward)
        state = next_state
        total_reward += reward.item()
        loss = optimize_model()
        steps += 1
    episodes_duration.append(steps)
    print(f"Episode {episode}, Total Reward: {total_reward}, Loss: {loss}")
    if episode % update_every == 0:
        target_net.load_state_dict(policy_net.state_dict())
    epsilon = max(epsilon_min, epsilon*epsilon_decay)

# Plot the duration of each episode
plt.plot(episodes_duration)
plt.xlabel('Episode')
plt.ylabel('Duration')
plt.show()






    
    

Episode 0, Total Reward: -0.14783568680286407, Loss: None
Episode 1, Total Reward: 0.2521643042564392, Loss: None
Episode 2, Total Reward: 0.2521643042564392, Loss: None
Episode 3, Total Reward: 0.2521643042564392, Loss: None
Episode 4, Total Reward: 0.2521643042564392, Loss: None


Episode 5, Total Reward: 0.2521643042564392, Loss: None
Episode 6, Total Reward: 0.2521643042564392, Loss: None
Episode 7, Total Reward: 0.2521643042564392, Loss: None
Episode 8, Total Reward: 0.2521643042564392, Loss: None
Episode 9, Total Reward: 0.2521643042564392, Loss: None
Episode 10, Total Reward: 0.2521643042564392, Loss: None
Episode 11, Total Reward: -0.14783568680286407, Loss: None
Episode 12, Total Reward: 0.2521643042564392, Loss: None
Episode 13, Total Reward: 0.2521643042564392, Loss: None
Episode 14, Total Reward: 0.2521643042564392, Loss: None
Episode 15, Total Reward: 0.2521643042564392, Loss: None
Episode 16, Total Reward: 0.2521643042564392, Loss: None




Episode 17, Total Reward: 0.2521643042564392, Loss: None
Episode 18, Total Reward: 0.2521643042564392, Loss: None
Episode 19, Total Reward: 0.2521643042564392, Loss: None
Episode 20, Total Reward: 0.2521643042564392, Loss: None




Episode 21, Total Reward: 0.2521643042564392, Loss: None
Episode 22, Total Reward: 0.2521643042564392, Loss: None
Episode 23, Total Reward: 0.2521643042564392, Loss: None
Episode 24, Total Reward: 0.2521643042564392, Loss: None
Episode 25, Total Reward: 0.2521643042564392, Loss: None
Episode 26, Total Reward: 0.2521643042564392, Loss: None
Episode 27, Total Reward: 0.2521643042564392, Loss: None
Episode 28, Total Reward: 0.2521643042564392, Loss: None
Episode 29, Total Reward: 0.2521643042564392, Loss: None
Episode 30, Total Reward: 0.2521643042564392, Loss: None


RuntimeError: Index tensor must have the same number of dimensions as input tensor