In [None]:
# Importation of necessary packages

import time # Necessary to evaluate frugality
import json # Necessary to export your results
import DiscreteEnvironment as DiscreteEnvironment # Imposed Discrete Environment
import numpy as np
import pickle

## Other packages
import matplotlib.pyplot as plt

## Setup for environment and agents

In [None]:
# Environment
"""
The buildings mentionned below are specific to the hackathon and are not available in this repo.
You can replace them with any MicroGrid object generated from pymgrid
"""

with open('building_1.pkl', 'rb') as f:
    building_1 = pickle.load(f)

with open('building_2.pkl', 'rb') as f:
    building_2 = pickle.load(f)
    
with open('building_3.pkl', 'rb') as f:
    building_3 = pickle.load(f)

buildings = [building_1, building_2, building_3]

building_environments = [
    DiscreteEnvironment.Environment(env_config={'building':buildings[i]}) for i in range(3)
]


In [None]:
# Q-table initialisation

def init_qtable(env, nb_action):
    
    state = []
    Q = {}

    for i in range(-int(env.mg.parameters['PV_rated_power']-1),int(env.mg.parameters['load']+2)):
        
        for j in np.arange(round(env.mg.battery.soc_min,1),round(env.mg.battery.soc_max+0.1,1),0.1):
            
            j = round(j,1)
            state.append((i,j)) 

    #Initialize Q(s,a) at zero
    for s in state:

        Q[s] = {}

        for a in range(nb_action):

            Q[s][a] = 0

    return Q

In [None]:
# Agent 

class QAgent:
    def __init__(self, actions, env, epsilon):
        # actions = [0, 1, 2, 3, 4]
        self.actions = actions
        self.env = env
        self.alpha = 0.1
        self.gamma = 0.99
        self.epsilon = epsilon
        self.q_table = init_qtable(self.env,len(self.actions))
    
    #we make an epsilon-greedy approach for the training
    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            # take random action 
            action = np.random.choice(self.actions)
        else:
            # take action according to the q function table
            current_state = self.q_table[state]
            action = self.max_dict(current_state)[0]
        return action
        
    # update q function
    def updateQ(self, state, action, reward, next_state):
        current_q = self.q_table[state][action]
        new_q = reward + self.gamma * max(self.q_table[next_state])
        self.q_table[state][action] += self.alpha * (new_q - current_q)
    
    #defining max_dict for our q-table
    def max_dict(self, d):

        max_key = None
        max_val = float('-inf')


        for k,v in d.items():

            if v > max_val:

                max_val = v
                max_key = k

        return max_key, max_val
    
    def update_epsilon(self, ep):
        ep = ep - ep *0.05
        if ep < 0.01:  
            ep = 0.01
        self.epsilon = ep

## Training of the agent

In [None]:

train_start = time.process_time()

env0 = building_environments[0]
agent0 = QAgent(actions=[0,1,2,3,4], env=env0, epsilon = 0.99)
cost_list0 = []

for episode in range(100):
    state = env0.reset()

    while True:
        # take action and proceed one step in the environment
        action = agent0.choose_action(state)
        next_state, reward, done, _ = env0.step(action)
        
        # with sample <s,a,r,s'>, agent learns new q function
        agent0.updateQ(state, action, reward, next_state)

        state = next_state

        # if episode ends, then break
        if done:
            cost_list0.append(env0.get_cost())
            break
            
            train_start = time.process_time()

env1 = building_environments[1]
agent1 = QAgent(actions=[0,1,2,3,4], env=env1, epsilon = 0.99)
cost_list1 = []

for episode in range(100):
    state = env1.reset()

    while True:
        # take action and proceed one step in the environment
        action = agent1.choose_action(state)
        next_state, reward, done, _ = env1.step(action)
        
        # with sample <s,a,r,s'>, agent learns new q function
        agent1.updateQ(state, action, reward, next_state)
        agent1.update_epsilon(agent1.epsilon)

        state = next_state

        # if episode ends, then break
        if done:
            cost_list1.append(env1.get_cost())
            break

env2 = building_environments[2]
agent2 = QAgent(actions=[0,1,2,3,4], env=env2, epsilon = 0.99)
cost_list2 = []

for episode in range(100):
    state = env2.reset()

    while True:
        # take action and proceed one step in the environment
        action = agent2.choose_action(state)
        next_state, reward, done, _ = env2.step(action)
        
        # with sample <s,a,r,s'>, agent learns new q function
        agent2.updateQ(state, action, reward, next_state)
        agent2.update_epsilon(agent2.epsilon)

        state = next_state

        # if episode ends, then break
        if done:
            cost_list2.append(env2.get_cost())
            break

train_end = time.process_time()



In [None]:
train_frugality = train_end - train_start

### Testing the agent

In [None]:
test_start = time.process_time()
total_cost = [0,0,0]
agent_list = [agent0, agent1, agent2]

for i,building_env in enumerate(building_environments):
    agent = agent_list[i]
    obs = building_env.reset(testing=True)
    done = False
    while not done:
        action = agent.max_dict(agent.q_table[obs])[0]
        obs, reward, done, info = building_env.step(action)
        total_cost[i]+=reward

test_end = time.process_time()

In [None]:
test_frugality = test_end - test_start


In [None]:
final_results = {
    "building_1_performance" : -1*total_cost[0],
    "building_2_performance" : -1*total_cost[1],
    "building_3_performance" : -1*total_cost[2],
    "frugality" : train_frugality + test_frugality,
}
print(final_results)