In [5]:
from copy import deepcopy
from pathlib import Path
import pickle
import sys
sys.path.append('..')
import numpy as np
from citylearn import  CityLearn
from agents.marlisa import MARLISA
from agents.rbc import RBC

In [2]:
# Load environment
climate_zone = 5
params = {
        'data_path':Path("../data/Climate_Zone_"+str(climate_zone)), 
        'building_attributes':'building_attributes.json', 
        'weather_file':'weather_data.csv', 
        'solar_profile':'solar_generation_1kW.csv', 
        'carbon_intensity':'carbon_intensity.csv',
        'building_ids':["Building_"+str(i) for i in [1,2,3,4,5,6,7,8,9]],
        'buildings_states_actions':'../buildings_state_action_space.json', 
        'simulation_period': (0, 8760*4-1),
        'cost_function': ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','carbon_emissions'], 
        'central_agent': False,
        'save_memory': False
}

# Contain the lower and upper bounds of the states and actions, to be provided to the agent 
# to normalize the variables between 0 and 1.
# Can be obtained using observations_spaces[i].low or .high
env = CityLearn(**params)
observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, 
# Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

# RBC

In [None]:
results_id = 'rbc'
data = {
    'env': None,
    'agents': None,
    'actions': [],
    'rewards': [],
}

# initialize control agent
params_agent = {
    'actions_spaces':actions_spaces, 
}
agents = RBC(**params_agent)
state = env.reset()
done = False
hour_ix = 2
hour_state = np.array([[state[0][hour_ix]]])
action = agents.select_action(hour_state)
data['actions'].append(action)
  
while not done:
    print(f'\rTime step: {env.time_step}/{env.simulation_period[1]}', end=' '*20)
    next_state, _, done, _ = env.step(action)
    hour_state = np.array([[state[0][hour_ix]]])
    action_next = agents.select_action(hour_state)
    state = next_state
    action = action_next
    data['actions'].append(action)

print()
rbc_cost = deepcopy(env.cost())

# save env
filepath = f'{results_id}.pkl'
print(f'Saving simulation results...')

with open(filepath, 'wb') as f:
    data['env'] = env
    data['agents'] = agents
    pickle.dump(data, f)

print(f'Saved simulation results to {filepath}')

# MARLISA

In [None]:
results_id = 'marlisa'
data = {
    'env': None,
    'agents': None,
    'actions': [],
    'rewards': [],
}

# initialize control agent
params_agent = {
    'building_ids':["Building_"+str(i) for i in [1,2,3,4,5,6,7,8,9]],
    'buildings_states_actions':'../buildings_state_action_space.json', 
    'building_info':building_info,
    'observation_spaces':observations_spaces, 
    'action_spaces':actions_spaces, 
    'hidden_dim':[256,256], 
    'discount':0.99, 
    'tau':5e-3, 
    'lr':3e-4, 
    'batch_size':256, 
    'replay_buffer_capacity':1e5, 
    'regression_buffer_capacity':3e4, 
    'start_training':600, # Start updating actor-critic networks
    'exploration_period':7500, # Just taking random actions
    'start_regression':500, # Start training the regression model
    'information_sharing':True, # If True -> set the appropriate 'reward_function_ma' in reward_function.py
    'pca_compression':.95, 
    'action_scaling_coef':0.5, # Actions are multiplied by this factor to prevent too aggressive actions
    'reward_scaling':5., # Rewards are normalized and multiplied by this factor
    'update_per_step':2, # How many times the actor-critic networks are updated every hourly time-step
    'iterations_as':2,# Iterations of the iterative action selection (see MARLISA paper for more info)
    'safe_exploration':True
}
agents = MARLISA(**params_agent)
state = env.reset()
done = False
j = 0
is_evaluating = False
action, coordination_vars = agents.select_action(state, deterministic=is_evaluating)
data['actions'].append(action)
  
while not done:
    print(f'\rTime step: {env.time_step}/{env.simulation_period[1]}', end=' '*20)
    next_state, reward, done, _ = env.step(action)
    action_next, coordination_vars_next = agents.select_action(next_state, deterministic=is_evaluating)
    agents.add_to_buffer(state, action, reward, next_state, done, coordination_vars, coordination_vars_next)
    coordination_vars = coordination_vars_next
    state = next_state
    action = action_next
    data['actions'].append(action)
    data['rewards'].append(reward)
    is_evaluating = (j > 3*8760)
    j += 1

print()
marlisa_cost = deepcopy(env.cost())

# save env
filepath = f'{results_id}.pkl'
print(f'Saving simulation results...')

with open(filepath, 'wb') as f:
    data['env'] = env
    data['agents'] = agents
    pickle.dump(data, f)

print(f'Saved simulation results to {filepath}')

# Cost

In [11]:
# marlisa w.r.t. rbc
cost = [{
    r_k: m_v/r_v for (r_k, r_v), (_, m_v) in zip(r.items(), m.items())
} for r, m in zip(rbc_cost, marlisa_cost)]
print(cost)

[{'ramping': 0.91902566, '1-load_factor': 0.9305743334008743, 'average_daily_peak': 0.86097056, 'peak_demand': 1.0063878, 'net_electricity_consumption': 0.98314023, 'carbon_emissions': 1.001453, 'total': 0.9503686274101599, 'coordination_score': 0.929548887851526}, {'ramping_last_yr': 0.79039854, '1-load_factor_last_yr': 0.9030067580400817, 'average_daily_peak_last_yr': 0.8035298, 'peak_demand_last_yr': 0.86645424, 'net_electricity_consumption_last_yr': 0.98141176, 'carbon_emissions_last_yr': 1.005203, 'coordination_score_last_yr': 0.8415331317480884, 'total_last_yr': 0.8842199533268318}]
