In [1]:
import os
import sys
import shutil
import pickle

glp = os.path.abspath("../code")
if not glp in sys.path: sys.path.append( glp )

from global_paths import global_paths

if not global_paths["COBS"] in sys.path: sys.path.append( global_paths["COBS"] )

In [2]:
import cobs
import torch
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
from BuildingOccupancy import Person, Meeting, WeeklyMeeting, OneTimeMeeting, BuildingOccupancy
from DefaultBuildings import Building_5ZoneAirCooled, Building_5ZoneAirCooled_SingleAgent
from Agents import agent_constructor
from CentralController import ddpg_episode_mc
import RLCritics
import StateUtilities as SU

In [14]:
dirname = '../scripts/checkpoints/011-20210311-2155/'

In [5]:
cobs.Model.set_energyplus_folder(global_paths["eplus"])

In [6]:
building = Building_5ZoneAirCooled_SingleAgent()

In [7]:
# Define the agents
agents1 = []
for agent_name, (controlled_device, controlled_device_type) in building.agent_device_pairing.items():
    new_agent = agent_constructor( controlled_device_type )
    new_agent.initialize(name = agent_name,
                         controlled_element = controlled_device,
                         global_state_keys  = building.global_state_variables)
    agents1.append(new_agent)

# Define the agents
agents2 = []
for agent_name, (controlled_device, controlled_device_type) in building.agent_device_pairing.items():
    new_agent = agent_constructor( controlled_device_type )
    new_agent.initialize(name = agent_name,
                         controlled_element = controlled_device,
                         global_state_keys  = building.global_state_variables)
    agents2.append(new_agent)

In [8]:
# Define the critics
critics1 = []
critics2 = []
ciritic_input_variables=["Minutes of Day","Day of Week","Calendar Week",
                         "Outdoor Air Temperature","Outdoor Air Humidity",
                         "Outdoor Wind Speed","Outdoor Wind Direction",
                         "Outdoor Solar Radi Diffuse","Outdoor Solar Radi Direct"]
for vartype in ["Zone Temperature","Zone People Count",
                "Zone Relative Humidity",
                "Zone VAV Reheat Damper Position","Zone CO2"]:
    ciritic_input_variables.extend( [f"SPACE{k}-1 {vartype}" for k in range(1,6)] )
for agent in agents1:
    new_critic = RLCritics.CriticMergeAndOnlyFC(
                    input_variables=ciritic_input_variables,
                    agents = agents1,
                    global_state_keys=building.global_state_variables)
    critics1.append(new_critic)

for agent in agents1:
    new_critic = RLCritics.CriticMergeAndOnlyFC(
                    input_variables=ciritic_input_variables,
                    agents = agents1,
                    global_state_keys=building.global_state_variables)
    critics2.append(new_critic)

In [15]:
agents1[0].load_models_from_disk(dirname, "episode_0_")
agents2[0].load_models_from_disk(dirname, "episode_1_")

In [16]:
critics1[0].load_models_from_disk(dirname, "episode_0_")
critics2[0].load_models_from_disk(dirname, "episode_1_")

In [17]:
params1a = agents1[0].model_actor.parameters()
params2a = agents2[0].model_actor.parameters()

params1c = critics1[0].model.parameters()
params2c = critics2[0].model.parameters()

In [18]:
for p1, p2 in zip(params1a, params2a):
    p = p1 - p2
    print(p.min(), p.max())

tensor(-0.0599, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0416, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0268, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0103, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.2567, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.1306, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0523, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0129, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.2630, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.1480, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0056, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0146, device='cuda:0', grad_fn=<MaxBackward1>)


In [19]:
for p1, p2 in zip(params1c, params2c):
    p = p1 - p2
    print(p.min(), p.max())

tensor(-0.0884, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0867, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0579, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0421, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0934, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.1027, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0458, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0411, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0430, device='cuda:0', grad_fn=<MinBackward1>) tensor(0.0669, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.0360, device='cuda:0', grad_fn=<MinBackward1>) tensor(-0.0360, device='cuda:0', grad_fn=<MaxBackward1>)


In [270]:
m1 = torch.nn.Sequential(
    torch.nn.Linear(3,7),
    torch.nn.ReLU(),
    torch.nn.Linear(7,7),
    torch.nn.ReLU(),
    torch.nn.Linear(7,1),
    torch.nn.ReLU()
)

In [271]:
loss = torch.nn.MSELoss(0.01)
opti = torch.optim.Adam(params=m1.parameters(), lr=0.5)

In [272]:
ten1 = torch.tensor([[0.01,0.0,0.0044]], requires_grad=True)
ten2 = torch.tensor([[0.01,0.0,0.0044]])

In [244]:
o1 = m1(ten1)
o1

tensor([[0.5024]], grad_fn=<ReluBackward0>)

In [245]:
o2 = m1(ten2)
o2

tensor([[0.5024]], grad_fn=<ReluBackward0>)

In [246]:
loss(o1, torch.ones(1)).backward()

  return F.mse_loss(input, target, reduction=self.reduction)


In [247]:
opti.step()

In [290]:
for p in m1.parameters():
    print(len(p.shape), float(p.norm().detach().numpy()))

2 1.4963072538375854
1 1.0531017780303955
2 1.611612319946289
1 0.5887762904167175
2 0.5188843011856079
1 0.35344937443733215


In [289]:
len(p.shape)

1