In [None]:
#!pip install git+https://github.com/Total-RD/pymgrid/
#!pip install tensorforce
import numpy as np
import matplotlib.pyplot as plt
from pymgrid.Environments.pymgrid_cspla import MicroGridEnv
from tensorforce.environments import Environment
import tensorforce
from tensorforce.agents import Agent
from tensorforce.execution import Runner
import time
import json 

In [None]:
import pickle
"""
The buildings mentionned below are specific to the hackathon and are not available in this repo.
You can replace them with any MicroGrid object generated from pymgrid
"""

with open('building_1.pkl', 'rb') as f:
    building_1 = pickle.load(f)
    building_1.train_test_split()

with open('building_2.pkl', 'rb') as f:
    building_2 = pickle.load(f)
    building_2.train_test_split()
    
with open('building_3.pkl', 'rb') as f:
    building_3 = pickle.load(f)
    building_3.train_test_split()

buildings = [building_1, building_2, building_3]

In [None]:
# Function to train DeepRL model and test it --> with Tensorforce
## We will train our model on the building 3, and after test it on all the buildings

def train_test_deepRL():
    building_environment_train = MicroGridEnv(env_config={'microgrid':buildings[2],"testing":False})
    
    #Best init values :
    nb_hours = 5846
    learning_rate = 1e-2
    horizon = 1
    nb_episodes = 2
    
    #Creating Tensorforce environment and agent
    environment = Environment.create(
    environment=building_environment_train, max_episode_timesteps=nb_hours)

    agent = Agent.create(
        agent='tensorforce', environment=environment, update=64,
        optimizer=dict(optimizer='adam', learning_rate=learning_rate),
        objective='policy_gradient', reward_estimation=dict(horizon=horizon))
    
    
    # Training model :
    print("Begining training...")
    train_start = time.process_time()

    for _ in range(nb_episodes):
        states = environment.reset()
        terminal = False
        l_rewards = []
        i = 0
        while not terminal:
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)
            i+=1
            if i>200:
                break

    train_end = time.process_time()
    train_frugality = train_end - train_start
    print("Finished training.")
    
    
    #Testing on the 3 buildings :
    list_cost, list_frugality_test = [], []
    building_env_test = [MicroGridEnv(env_config={'microgrid':buildings[i],"testing":True}) for i in range(3)]
    
    for building_env in building_env_test :
        test_start = time.process_time()
        reward = 0
        l_rewards = []
        done = False
        while not done:
            action = agent.act(states, deterministic=True, independent=True)
            states, reward, done, info = building_env.step(action)
            l_rewards.append(-reward)

        cost = np.sum(l_rewards)
        test_end = time.process_time()
        
        list_cost.append(cost)
        test_frugality = test_end - test_start
        list_frugality_test.append(test_frugality)
        
    frugality = np.sum(list_frugality_test) + train_frugality
    
    
    return(list_cost, frugality)

In [None]:
list_cost, frugality = train_test_deepRL()
cost1, cost2, cost3 = list_cost

print("TEST COSTS :", list_cost)
print("Total frugality:", frugality)

**Saving results :**

In [None]:
final_results = {
    "building_1_performance" : cost1,
    "building_2_performance" : cost2,
    "building_3_performance" : cost3,
    "frugality" : frugality,
}
print(final_results)