## Submission Notebook 

<b> Install your packages below: </b>

In [None]:
!pip install cvxpy
!pip install -r requirements.txt

In the section below, you must run your methodology for solving the problem from start to finish :

In [None]:
import pickle
"""
The buildings mentionned below are specific to the hackathon and are not available in this repo.
You can replace them with any MicroGrid object generated from pymgrid
"""

with open('building_1.pkl', 'rb') as f:
    building_1 = pickle.load(f)

with open('building_2.pkl', 'rb') as f:
    building_2 = pickle.load(f)
    
with open('building_3.pkl', 'rb') as f:
    building_3 = pickle.load(f)

buildings = [building_1, building_2, building_3]

In [None]:
import time # Necessary to evaluate frugality
from pymgrid.Environments.pymgrid_cspla import MicroGridEnv # Imposed Environment
import numpy as np

## Import your favourite Deep Learning library for RL and other packages here
from train_per import *
from wrappers import NormalizedMicroGridEnv

In [None]:
"""
Below is an environment initialization without a Deep RL library, the code can vary depending on which library you 
use
"""
building_environments = [MicroGridEnv(env_config={'microgrid':buildings[i]}) for i in range(3)]


<b> 3) Training of the agent </b>

In [None]:
perfect_train_scores = [4068.5, 13568.92, 15345.97]
similarity_stop = 0.3
MAX_EPISODES = 10

def train(building_idx, building_env, perfect_train_score):
    env = NormalizedMicroGridEnv(building_env)
    #env = FlattenObservation(FrameStack(env, 24))

    #writer = SummaryWriter(comment=current_time)

    state_size = env.observation_space.low.size
    action_size = env.action_space.n

    agent = DQNAgent(state_size, action_size)
    scores, steps = [], 0
    scores_list = deque(maxlen=5) # yearly score

    for e in range(MAX_EPISODES):
        done = False
        score = 0

        state = env.reset()
        state = np.reshape(state, [1, state_size])

        while not done:
            # get action for the current state and go one step in environment
            action = agent.get_action(state)

            next_state, reward, done, info = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])

            # save the sample <s, a, r, s'> to the replay memory
            agent.append_sample(state, action, reward, next_state, done)
            # every time step do the training
            if agent.memory.tree.n_entries >= agent.train_start:
                loss = agent.train_model()
                #writer.add_scalar('Loss', loss, steps)
                steps += 1

            score += reward
            state = next_state

            if done:
                # every episode update the target model to be same with model
                agent.update_target_model()
                agent.scheduler.step()

                # every episode, plot the play time
                scores.append(score)
                print("episode:", e, "\tscore:", score, "\tmemory length:",
                        agent.memory.tree.n_entries, "\tepsilon:", agent.epsilon,
                        "\tlearning rate:", agent.scheduler.get_last_lr()[0])

                #writer.add_scalar('Learning rate', agent.scheduler.get_last_lr()[0], e)
                #writer.add_scalar('Training total building cost', score, e)

                torch.save(agent.model, "./save_model/per_dqn_" + str(building_idx))
                torch.save(agent.model, "./save_model/per_dqn_" +  str(building_idx) + "_" +
                            current_time + "_" + str(e).zfill(5))

                # Early stop
                similarity = abs(score - perfect_train_score) / perfect_train_score
                print("Similarity to perfect score", similarity)
                if similarity <= similarity_stop:
                    print("Reached similarity stop of", similarity_stop)
                    return
                if len(scores_list) >= scores_list.maxlen:
                    mean = np.mean(scores_list)
                    similarity = abs(score - mean) / abs(mean)
                    print("Similarity to stable score", similarity)
                    if similarity <= similarity_stop:
                        print("Reached stable score")
                        return
                scores_list.append(score)

train_start = time.process_time()

for building_idx, (building_env, perfect_train_score) in enumerate(zip(building_environments, perfect_train_scores)):
    train(building_idx, building_env, perfect_train_score)

train_end = time.process_time()
train_frugality = train_end - train_start
print(train_frugality)

<b> 4) Test of the agent </b>

In [None]:


test_start = time.process_time()
total_cost = [0,0,0]
building_environments = [MicroGridEnv(env_config={'microgrid':buildings[i], "testing": True}) for i in range(3)]

for i, building_env in enumerate(building_environments):
    test_done = False
    test_score = 0

    test_env = NormalizedMicroGridEnv(building_env)
    state_size = test_env.observation_space.low.size
    action_size = test_env.action_space.n

    agent = DQNAgent(state_size, action_size)
    agent.model = torch.load('save_model/per_dqn_' + str(i))
    agent.model.train(False)
    
    state = test_env.reset()
    state = np.reshape(state, [1, state_size])

    with torch.no_grad():
        while not test_done:
            state = torch.from_numpy(state).float().cpu()
            q_value = agent.model(state)
            _, action = torch.max(q_value, 1)
            action = int(action)
            next_state, reward, test_done, info = test_env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            test_score += reward
            state = next_state

            if test_done:
                #writer.add_scalar('Testing total building cost', test_score, e)
                total_cost[i] = test_score

test_end = time.process_time()

test_frugality = test_end - test_start
print(test_frugality)

In [None]:
frugality = train_frugality + test_frugality
print(frugality)

<b> 5) Store & Export Results in JSON format </b>

In [None]:
final_results = {
    "building_1_performance" : total_cost[0],
    "building_2_performance" : total_cost[1],
    "building_3_performance" : total_cost[2],
    "frugality" : train_frugality + test_frugality,
}
print(final_results)