In [3]:
!pip install gym


Collecting gym
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m81.8 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting gym_notices>=0.0.4 (from gym)
  Downloading gym_notices-0.0.8-py3-none-any.whl.metadata (1.0 kB)
Downloading gym_notices-0.0.8-py3-none-any.whl (3.0 kB)
Building wheels for collected packages: gym
  Building wheel for gym (pyproject.toml) ... [?25ldone
[?25h  Created wheel for gym: filename=gym-0.26.2-py3-none-any.whl size=827728 sha256=23ed9276335a749bc3a38224acc9df8177634509490f1c0a99dffc8062550a4a
  Stored in directory: /Users/guruprasad/Library/Caches/pip/wheels/95/51/6c/9bb05ebbe7c5cb8171dfaa3611f32622ca4658d53f31c79077
Successfully built gym
Installing collected packages: gym_notices, gym
Successfully instal

In [5]:
import gym
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from collections import deque

# Define climate conditions for different countries
country_climate = {
    'USA': {'indoor': (20, 25), 'outdoor': (-10, 35)},
    'India': {'indoor': (22, 28), 'outdoor': (10, 40)},
    'Canada': {'indoor': (18, 24), 'outdoor': (-30, 30)},
    'UK': {'indoor': (20, 24), 'outdoor': (-5, 30)},
}

# Custom Environment for Thermostat
class SmartThermostatEnv(gym.Env):
    def __init__(self, country='USA', manual_mode=False):
        super(SmartThermostatEnv, self).__init__()
        
        self.observation_space = gym.spaces.Box(low=np.array([10, -10, 0, 0]), 
                                                high=np.array([35, 40, 2, 1]), dtype=np.float32)
        self.action_space = gym.spaces.Discrete(3)
        
        self.country_defaults = {'USA': 22, 'India': 24, 'Canada': 20, 'UK': 21}
        self.country = country
        self.target_temp = self.country_defaults.get(country, 22)
        
        self.manual_mode = manual_mode
        self.max_steps = 100
        self.current_step = 0
    
    def step(self, action):
        if self.manual_mode:
            return self._get_state(), 0, False, {}
        
        # Apply action: 0 = Decrease, 1 = Maintain, 2 = Increase
        if action == 0:
            self.indoor_temp -= 1
        elif action == 1:
            self.indoor_temp += 0
        elif action == 2:
            self.indoor_temp += 1
        
        # Bound temperature
        self.indoor_temp = max(10, min(35, self.indoor_temp))
        
        # Calculate reward (always positive)
        comfort_reward = 10 - min(10, abs(self.target_temp - self.indoor_temp))
        energy_reward = 5 if action == 1 else 4.5
        base_reward = 5
        reward = comfort_reward + energy_reward + base_reward  # 9.5 to 20
        
        self.current_step += 1
        done = self.current_step >= self.max_steps
        return self._get_state(), reward, done, {}
    
    def reset(self):
        indoor_range = country_climate[self.country]['indoor']
        outdoor_range = country_climate[self.country]['outdoor']
        self.indoor_temp = random.randint(indoor_range[0], indoor_range[1])
        self.outdoor_temp = random.randint(outdoor_range[0], outdoor_range[1])
        self.time_of_day = random.choice([0, 1, 2])
        self.occupied = random.choice([0, 1])
        self.current_step = 0
        return self._get_state()
    
    def _get_state(self):
        return np.array([self.indoor_temp, self.outdoor_temp, self.time_of_day, self.occupied], dtype=np.float32)

# DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.0005
        self.model = self._build_model()
    
    def _build_model(self):
        model = Sequential([
            Dense(64, activation='relu', input_dim=self.state_size),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state, train=True):
        if train and np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(np.array([state]), verbose=0)
        return np.argmax(q_values[0])
    
    def replay(self, batch_size=32):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(np.array([next_state]), verbose=0)[0])
            target_f = self.model.predict(np.array([state]), verbose=0)
            target_f[0][action] = target
            self.model.fit(np.array([state]), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Function to select a country
def select_country():
    print("Select a country:")
    for country in country_climate.keys():
        print(f"- {country}")
    
    selected_country = input("Enter the country name: ").strip()
    if selected_country in country_climate:
        return selected_country
    else:
        print("Invalid country selected. Defaulting to USA.")
        return 'USA'

# Function to generate climate conditions
def generate_climate_conditions(country):
    indoor_range = country_climate[country]['indoor']
    outdoor_range = country_climate[country]['outdoor']
    indoor_temp = random.randint(indoor_range[0], indoor_range[1])
    outdoor_temp = random.randint(outdoor_range[0], outdoor_range[1])
    return indoor_temp, outdoor_temp

# Function to get user feedback
def get_user_feedback(indoor_temp, outdoor_temp):
    print(f"Suggested Indoor Temperature: {indoor_temp}°C")
    print(f"Outdoor Temperature: {outdoor_temp}°C")
    feedback = input("Do you like this temperature? (yes/no): ").strip().lower()
    return feedback == 'yes'

# Function for manual temperature setting
def manual_temperature_setting(outdoor_temp):
    while True:
        try:
            indoor_temp = float(input("Enter your preferred indoor temperature (10-35°C): "))
            if 10 <= indoor_temp <= 35:
                break
            else:
                print("Temperature must be between 10°C and 35°C.")
        except ValueError:
            print("Invalid input. Please enter a number.")
    return indoor_temp, outdoor_temp

# Train the Agent and Post-Training Interaction
if __name__ == "__main__":
    # Training Phase
    env = SmartThermostatEnv(country='USA')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)

    EPISODES = 500
    print("Training the agent...")
    for e in range(EPISODES):
        state = env.reset()
        total_reward = 0
        done = False
        while not done:
            action = agent.act(state, train=True)
            next_state, reward, done, _ = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
        agent.replay()
        if e % 50 == 0:
            print(f"Episode {e}/{EPISODES}, Reward: {total_reward:.2f}, Epsilon: {agent.epsilon:.2f}")
    print("Training complete!")

    # Post-Training Interaction
    while True:
        print("\nPost-training options:")
        selected_country = select_country()
        env = SmartThermostatEnv(country=selected_country)
        
        # Generate initial climate conditions
        indoor_temp, outdoor_temp = generate_climate_conditions(selected_country)
        state = np.array([indoor_temp, outdoor_temp, random.choice([0, 1, 2]), random.choice([0, 1])], dtype=np.float32)
        
        # Use trained agent to suggest temperature
        action = agent.act(state, train=False)
        if action == 0:
            suggested_temp = indoor_temp - 1
        elif action == 1:
            suggested_temp = indoor_temp
        else:
            suggested_temp = indoor_temp + 1
        
        # Bound the suggested temperature
        suggested_temp = max(10, min(35, suggested_temp))
        
        print(f"\nCountry: {selected_country}")
        print(f"Initial Indoor Temp: {indoor_temp}°C, Outdoor Temp: {outdoor_temp}°C")
        
        # Get user feedback
        if get_user_feedback(suggested_temp, outdoor_temp):
            print("Great! Using the suggested temperature.")
            final_temp = suggested_temp
        else:
            print("Switching to manual mode...")
            final_temp, outdoor_temp = manual_temperature_setting(outdoor_temp)
            print(f"Manually set temperature: {final_temp}°C")
        
        # Display final choice
        print(f"Final Indoor Temperature: {final_temp}°C for {selected_country}")
        
        # Ask if user wants to continue
        cont = input("Would you like to select another country? (yes/no): ").strip().lower()
        if cont != 'yes':
            break
    
    print("Program ended.")

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training the agent...
Episode 0/500, Reward: 1342.00, Epsilon: 0.99
Episode 50/500, Reward: 1031.00, Epsilon: 0.77
Episode 100/500, Reward: 1076.00, Epsilon: 0.60
Episode 150/500, Reward: 1185.00, Epsilon: 0.47
Episode 200/500, Reward: 987.50, Epsilon: 0.37
Episode 250/500, Reward: 984.00, Epsilon: 0.28
Episode 300/500, Reward: 1052.00, Epsilon: 0.22
Episode 350/500, Reward: 1848.50, Epsilon: 0.17
Episode 400/500, Reward: 996.50, Epsilon: 0.13
Episode 450/500, Reward: 1007.50, Epsilon: 0.10
Training complete!

Post-training options:
Select a country:
- USA
- India
- Canada
- UK


Enter the country name:  India



Country: India
Initial Indoor Temp: 28°C, Outdoor Temp: 39°C
Suggested Indoor Temperature: 28°C
Outdoor Temperature: 39°C


Do you like this temperature? (yes/no):  yes


Great! Using the suggested temperature.
Final Indoor Temperature: 28°C for India


Would you like to select another country? (yes/no):  uk


Program ended.
