In [None]:
!pip install git+https://github.com/Total-RD/pymgrid/

In [None]:
import pickle

"""
The buildings mentionned below are specific to the hackathon and are not available in this repo.
You can replace them with any MicroGrid object generated from pymgrid
"""

with open('building_1.pkl', 'rb') as f:
    building_1 = pickle.load(f)
    building_1.train_test_split()

with open('building_2.pkl', 'rb') as f:
    building_2 = pickle.load(f)
    building_2.train_test_split()

with open('building_3.pkl', 'rb') as f:
    building_3 = pickle.load(f)
    building_3.train_test_split()

buildings = [building_1, building_2, building_3]

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import time
import json
import sys

frugality = 0

In [None]:
for building in buildings:
    print(building.architecture)

In [None]:
buildings[2].print_info()

In [None]:
### Import the Gym environnement with finite States & Actions
import DiscreteEnvironment as DiscreteEnvironment 

In [None]:
### Gymify your building MicroGrid object
building_environments = []
for i in range(3):
    env_config={'building': buildings[i]}
    environment = DiscreteEnvironment.Environment(env_config)
    building_environments.append(environment)

In [None]:
def init_qtable(env, nb_action):
    
    state = []
    Q = {}

    for i in range(-int(env.mg.parameters['PV_rated_power']-1),int(env.mg.parameters['load']+2)):
        
        for j in np.arange(round(env.mg.battery.soc_min,1),round(env.mg.battery.soc_max+0.1,1),0.1):
            
            j = round(j,1)
            state.append((i,j)) 

    #Initialize Q(s,a) at zero
    for s in state:

        Q[s] = {}

        for a in range(nb_action):

            Q[s][a] = 0

    return Q

In [None]:
def espilon_decreasing_greedy(action, epsilon, nb_action):
    
    p = np.random.random()

    if p < (1 - epsilon):
        return action

    else:
        return np.random.choice(nb_action)

def max_dict(d):

    return max(d, key=d.get)

def update_epsilon(epsilon, eps_decay=0.05):
    
    epsilon *= 1 - eps_decay
    return max(epsilon, 0.1)

action_name_grid = {
    0: "Charge the battery from the PV",
    1: "Discharge the battery",
    2: "Import from the grid",
    3: "Export to the grid",
    4: "Charge the battery from the grid"
}

action_name_genset = {
    0: "Charge the battery from the PV",
    1: "Discharge the battery",
    2: "Import from the grid",
    3: "Export to the grid",
    4: "Use Generator",
    5: "Charge the battery from the grid",
    6: "Use Generator and Discharge Battery"
}

def print_results():
        
    print("t -     STATE  -  ACTION - COST")
    print("================================")

In [None]:
def training_Q_Learning(env, nb_episode=50, alpha=0.1, epsilon=0.9, eps_decay=0.05, gamma=0.9):
    
    nb_action = env.action_space.n
    Q = init_qtable(env, nb_action)
    nb_state = len(Q)
    
    record_cost = []
    print_training = "Training Progressing ... "
    
    for e in range(nb_episode + 1):
        
        value_print = "\r" + print_training + "Episode " + str(e) + "/" + str(nb_episode) 
        sys.stdout.write(value_print)
        sys.stdout.flush()
            
        env.reset()
        s = env.transition()
        a = max_dict(Q[s])
        a = espilon_decreasing_greedy(a, epsilon, nb_action)
        done = False
        
        while not done:
            
            s_, r, done, _ = env.step(a)
            a_ = max_dict(Q[s_])
            a = espilon_decreasing_greedy(a, epsilon, nb_action)

            if done:
                
                Q[s][a] += alpha * (r - Q[s][a])
      
            else:
            
                target = r + gamma * Q[s_][a_]
                td_error = target - Q[s][a]
                Q[s][a] = (1 - alpha) * Q[s][a] + alpha * td_error
                
            s, a = s_, a_
        
        record_cost.append(env.get_cost())
        epsilon = update_epsilon(epsilon, eps_decay)
        
    return Q, record_cost

In [None]:
def testing_Q_Learning(env, Q, genset=False):
    
    change_name_action = action_name_grid
    if genset:
        change_name_action = action_name_genset
    env.reset(testing=True)
    s = env.transition()
    a = max_dict(Q[s])
    i, total_cost = 0, 0
    print_results()
    done = False
    
    while not done:

        action_name = change_name_action[a]
        s_, r, done, _, = env.step(a)
        total_cost -= r
        
        if i < 10:
            print(i, " -", (int(s[0]), s[1]), action_name, round(total_cost, 1), "€")
        else:
            print(i, "-", (int(s[0]), s[1]), action_name, round(total_cost, 1), "€")
        
        i += 1
        a_ = max_dict(Q[s_])
        s, a = s_, a_
    
    return round(total_cost, 1)

In [None]:
start = time.process_time()
Q1, record_cost1 = training_Q_Learning(building_environments[0], epsilon=0.8, gamma=0.9)
end = time.process_time()
frugality += end - start

In [None]:
plt.plot(record_cost1)

In [None]:
start = time.process_time()
total_cost1 = testing_Q_Learning(building_environments[0], Q1)
end = time.process_time()
frugality += end - start

In [None]:
print("total cost for test building 1:", total_cost1, "€")

In [None]:
start = time.process_time()
Q2, record_cost2 = training_Q_Learning(building_environments[1], epsilon=0.8, gamma=0.9)
end = time.process_time()
frugality += end - start

In [None]:
plt.plot(record_cost2)

In [None]:
start = time.process_time()
total_cost2 = testing_Q_Learning(building_environments[1], Q2)
end = time.process_time()
frugality += end - start

In [None]:
print("total cost for test building 2:", total_cost2, "€")

In [None]:
start = time.process_time()
Q3, record_cost3 = training_Q_Learning(building_environments[2], epsilon=0.8, gamma=0.9)
end = time.process_time()
frugality += end - start

In [None]:
plt.plot(record_cost3)

In [None]:
start = time.process_time()
total_cost3 = testing_Q_Learning(building_environments[2], Q3, genset=True)
end = time.process_time()
frugality += end - start

In [None]:
print("total cost for test building 3:", total_cost3, "€")

In [None]:
final_results = {
    "building_1_performance" : total_cost1,
    "building_2_performance" : total_cost2,
    "building_3_performance" : total_cost3,
    "frugality" : frugality
}
print(final_results)