# Environment Setup

Goal: Use DQN model to setup optimal temperature for a room <br>

Factors affecting initial temperature: <br>
<ol>
    <li> Room booking status </li>
    <li> Outside Temp</li>
    <li> Day </li>
    <li> Time </li>
</ol>

Action taken by DQN: <br>
<ol>
    <li> Increase Temperature </li>
    <li> Decrease Temperature   </li>
    <li> Maintain Temperature </li>
</ol>

Rewards for DQN : <br>
<ol>
    <li> Energy Consumption Levels</li>
    <li> User Comfort</li>

In [2]:
import gym
import math
import numpy as np
import matplotlib.pyplot as plt

In [16]:
# Machine Learning to predict energy consumption

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
training_data = pd.read_excel('output.xlsx')
# for day data change mon to 1, tue to 2 etc

training_data['day_'] = training_data['day_'].replace(['Mon','Tue','Wed','Thu','Fri','Sat','Sun'],[1,2,3,4,5,6,7])
features = ['day_', 'time_', 'outside_temp', 'inside_temp', 'booking_status']
X = training_data[features]
y = training_data['energy_consumption']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Random forest model to predict energy consumption

rf_model = RandomForestRegressor(n_estimators=200, random_state= 50) # 200 trees, random_state for reproducibility
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 591.7480183967483


In [17]:
# Equation to calculate energy consumption
def energy_consumption(day, time, outside_temp, inside_temp, booking_status, change_in_temp):
    day = day
    time = time
    outside_temp = outside_temp
    inside_temp = inside_temp
    booking_status = booking_status
    change_in_temp = change_in_temp
    energy_consumption = rf_model.predict([[day, time, outside_temp, inside_temp, booking_status]])
    return energy_consumption



In [20]:
import gym
import numpy as np
from gym import spaces
class ThermostatEnvironment(gym.Env):
    def __init__(self):
        super(ThermostatEnvironment, self).__init__()
        self.day = 1
        self.time = 1
        self.booking_status = 0
        self.outside_temp = 30
        self.inside_temp = 18 # what room temperature should be
        self.action_space = spaces.Discrete(3) #increase, decrease, maintain
        self.observation_space = spaces.Box(low=np.array([1,1,0,23,18]), high=np.array([5,24,1,40,25]), dtype=np.float32)
        self.temperature = 18 # what room temperature is
        self.max_temp = 25
        self.min_temp = 18
        self.reward = 0
        self.done = False
    def step(self, action):
        # action logic
        if action == 0: # decrease, too hot
            self.temperature -= 1
        elif action == 2: #increase, too cold
            self.temperature += 1
        else: #maintain
            pass

        # ensure temperature is within bounds
        self.temperature = np.clip(self.temperature, self.min_temp, self.max_temp)

        # calculate energy consumption
        energy = energy_consumption(self.day, self.time, self.outside_temp, self.inside_temp, self.booking_status, self.temperature - self.inside_temp)

        # calculate reward
        # reward = 0.6* energy reward + 0.4* comfort reward
        # energy reward
        energy_reward = - (energy - 3200)/3200 # 3200 is the average energy consumption

        # comfort reward
        # reward if user does not change temperature
        change_in_temp = self.temperature - self.inside_temp
        if change_in_temp == 0:
            comfort_reward = 1
        if change_in_temp > 0:
            comfort_reward = -0
        if change_in_temp < 0:
            comfort_reward = -1

        reward = 0.6* energy_reward + 0.4* comfort_reward
        self.reward += reward
        self.done = True

        return np.array([self.day, self.time, self.booking_status, self.outside_temp, self.inside_temp]), reward, self.done, {}
    
    def reset(self):
        self.day = 1
        self.time = 1
        self.booking_status = 0
        self.outside_temp = 30
        self.inside_temp = 18
        self.temperature = 18
        self.reward = 0
        self.done = False
        return np.array([self.day, self.time, self.booking_status, self.outside_temp, self.inside_temp])
    
    def render(self):
        pass
    
        


# Q Network

In [21]:
# Neural Network
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import copy

class DQN(nn.Module):
    def __init__(self, input_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_size, 25)  # 5 input features, 1 output features
        self.fc2 = nn.Linear(25,25)
        self.fc3 = nn.Linear(25, action_size) # 10 input features, 2 output features
    
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

# Import training data

# Training

In [22]:
# Train the model

# Hyperparameters
learning_rate = 0.001
gamma = 0.9
buffer_limit = 50000
batch_size = 32
tau = 0.01
input_size = 5
action_size = 4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the environment
env = ThermostatEnvironment()
env.reset()

# Initialize the model
model = DQN(input_size, action_size).to(device)
target_model = copy.deepcopy(model)
target_model.load_state_dict(model.state_dict())
target_model.eval()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Initialize the replay buffer
replay_buffer = []
replay_buffer_size = 0

# Initialize the training parameters
num_episodes = 100
epsilon = 0.1
epsilon_decay = 0.99
epsilon_min = 0.01
update_every = 10
update_count = 0
loss_fn = nn.MSELoss()

# Training loop



  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [None]:
# Training loop

for episode in range(num_episodes):
    