In [73]:
import sys
!{sys.executable} -m pip install gym





[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


## Environment Setup

In [74]:
import gym
import numpy as np
from gym import spaces

class ThermostatEnvironment(gym.Env):
    
    def __init__(self):
        super(ThermostatEnvironment, self).__init__()
        self.room_number = 1
        self.booking_status = 0
        self.outside_temp = 30
        self.inside_temp = 18
        self.action_space = spaces.Discrete(3) #increase, decrease, maintain
        self.observation_space = spaces.Box(low=np.array([1,0,23,18]), high=np.array([2000, 1, 40, 25]), dtype=np.float32)
        self.set_point = 18
        self.temperature = 18
        self.max_temp = 25
        self.min_temp = 18
        self.reward = 0
        self.done = False
    
    def step(self, action):
        #action logic
        if action == "0": #decrease
            self.temperature -=1
        elif action == "2": #increase
            self.temperature +=1
        else: #maintain
            self.temperature = self.temperature

        # ensure within range
        if self.temperature < self.min_temp:
            self.temperature = self.min_temp
        elif self.temperature > self.max_temp:
            self.temperature = self.max_temp

        # reward logic
        if self.temperature == self.set_point:
            self.reward = 1
        else:
            self.reward = 0
        return self.temperature, self.reward, self.done, {}

    def reset(self):
        self.temperature = 18
        self.room_number = 1
        self.booking_status = 0
        self.outside_temperature = 18
        return self.temperature
    
    def action_space(self):
        return self.action_space

## Neural Network

In [75]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque

class DQN(nn.Module):
    def __init__(self, input_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)  # 4 input features, 10 output features
        self.fc2 = nn.Linear(10, action_size) # 10 input features, 2 output features
    
    def forward(self, x):
        print("Input size: ", x.shape) 
        x = torch.relu(self.fc1(x))
        print("After fc1: ", x.shape )
        x = self.fc2(x)
        print("After fc2: ", x.shape)
        return x

## DQN Agent

In [76]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self.build_model()


    #return action based on state
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            act_values = self.model(state)
        return np.argmax(act_values[0])
    
    # remember experience
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            state = torch.FloatTensor(state).unsqueeze(0)
            next_state = torch.FloatTensor(next_state).unsqueeze(0)
            target = reward
            if not done:
                with torch.no_grad():
                    target = (reward + self.gamma * torch.max(self.model(next_state)[0])).item()
            target_f = self.model(state)
            target_f[0][action] = target
            self.optimizer.zero_grad()
            loss = nn.MSELoss()(target_f, self.model(state))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def build_model(self):
        self.model = DQN(self.state_size, self.action_size)  # Ensure this matches your DQN class
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        return self.model
    
    def train_agent(self, episodes, env, batch_size):
        for e in range(episodes):
            state = env.reset()
            state = torch.FloatTensor(state).unsqueeze(0)  # Adjust for PyTorch
            for time in range(500):
                action = self.act(state)
                next_state, reward, done, _ = env.step(action)
                reward = reward if not done else -10
                next_state = torch.FloatTensor(next_state).unsqueeze(0)  # Adjust for PyTorch
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if done:
                    print(f"episode: {e}/{episodes}, score: {time}, e: {self.epsilon:.2}")
                    break
                if len(self.memory) > batch_size:
                    self.replay(batch_size)

    def test_agent(self, env):
        state = env.reset()
        state = torch.FloatTensor(state).unsqueeze(0)  # Adjust for PyTorch
        done = False
        while not done:
            with torch.no_grad():
                action_values = self.model(state)
            action = torch.argmax(action_values, dim=1).item()  # Updated for PyTorch
            next_state, reward, done, _ = env.step(action)
            state = torch.FloatTensor(next_state).unsqueeze(0)  # Adjust for PyTorch
            print(f"action: {action}, reward: {reward}, done: {done}")

## Training Agent

In [77]:

# Create the environment
env = ThermostatEnvironment()
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
print("State size: ", state_size)
print("Action size: ", action_size)
# Create the agent
agent = DQNAgent(state_size, action_size)

# Train the agent
agent.train_agent(100, env, 32) #100 episodes, batch size of 32



    



State size:  4
Action size:  3
Input size:  torch.Size([1, 1, 18])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x18 and 4x10)