In [None]:
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [13]:
import pandas as pd
import numpy  as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO

from int.int_env     import IntEnv
from unint.unint_env import UnintEnv
from hvac.hvac_env   import HVAC_env
from soc.soc_env     import SocEnv

class HLA_Baseline(gym.Env):
    def __init__(self, mode = 'train'):
        super(HLA_Baseline, self).__init__()
        self.mode = mode # train or test
        self.Pgrid_max   = None
        self.time_step   = None
        self.action_mask = None
        self.agent_order = None
        self.month       = None
        self.month_name  = None
        self.month_name_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dcb']

        # step data
        self.now_Pgrid_price  = None
        self.now_remain_power = None

        #episode data
        self.execute_price_data = None
        self.execute_remain_power_data = None
        
        # read data
        self.price_data = pd.read_csv("csv_data/grid_price.csv")   # ['summer_price'] ['not_summer_price']
        self.pv_data    = pd.read_csv("csv_data/PhotoVoltaic.csv") # ['month']

        if self.mode == 'train':
            self.unc_power_data = pd.read_csv("csv_data/TrainingData.csv")
        else:
            self.unc_power_data = pd.read_csv("csv_data/TestingData.csv") 

        # model ---------------------------------
        self.soc_model        = PPO.load("soc/agent/soc_model_f")
        self.int_load_model   = PPO.load("int/agent/int_model_f")
        self.unint_load_model = PPO.load("unint/agent/unint_model_f")
        self.hvac_model       = PPO.load("hvac/agent/hvac_model_f")

        # int load data
        self.execute_int_load_preference_data = {}

        self.int_load_preference_data = {
            1 : pd.read_csv("csv_data/intPreference1.csv"),
            2 : pd.read_csv("csv_data/intPreference2.csv"),
            3 : pd.read_csv("csv_data/intPreference3.csv")
        }

        # unint load data
        self.unint_load_already_add = { 1 : False, 2 : False }

        self.execute_unint_load_preference_data = {}
        
        self.unint_load_preference_data = {
            1 : pd.read_csv("csv_data/unIntPreference1.csv"),
            2 : pd.read_csv("csv_data/unIntPreference2.csv")
        }

        # hvac ---------------------------------
        self.T_out_data = pd.read_csv("csv_data/TemperatureF.csv")

        self.T_set_data = {
            1 : pd.read_csv("csv_data/userSetTemperatureF.csv"),
            2 : pd.read_csv("csv_data/userSetTemperatureF2.csv"),
            3 : pd.read_csv("csv_data/userSetTemperatureF3.csv")
        }

        # load data 
        self.soc_load   = None
        self.hvac_load  = {}
        self.int_load   = {}
        self.unint_load = {}

        # for testing
        self.total_cost = 0
        self.total_preference = 0

        # save print data
        self.plt_data = {}
        self.already_open_order = { agent : 1 for agent in ['soc','int_1','int_2','int_3','unint_1','unint_2','hvac_1','hvac_2','hvac_3']}

        # observation space [ time_step, SOC, remain_power, order, pgrid_price, state, power, preference]
        lowerLimit = np.array(
            [
                # time_step
                0,
                # remain_power
                -20,
                # pgrid_price
                0,
                # order
                # 0,
                #T_out
                35,
                #SOC
                -0.5,
                # load order
                0, 

                # Int_load_1 
                0, # remain_demand
                0, # power
                -1,# preference
                0, # load order

                # Int_load_2_state
                0, # remain_demand
                0, # power
                -1,# preference
                0, # load order

                # Int_load_3_state
                0, # remain_demand
                0, # power
                -1,# preference
                0, # load order

                # Unint_load_1_state
                0,  # remain_demand
                0,  # period
                0,  # remain_period
                0,  # power
                -1, # preference
                0,  # load order

                # Unint_load_2_state
                0,  # remain_demand
                0,  # period
                0,  # remain_period
                0,  # power
                -1, # preference
                0,  # load order

                # HVAC 1
                20, # T_in
                35, # T_user_set
                0,  # load order

                # HVAC 2
                20, # T_in
                35, # T_user_set
                0,  # load order

                # HVAC 3
                20, # T_in
                35, # T_user_set
                0,  # load order

            ], 
            dtype=np.float32
        )

        upperLimit = np.array(
            [
                # time_step
                96,
                # remain_power
                30,
                # pgrid_price
                7,
                # order
                # 9,
                # T_out
                130,
                # SOC
                1.5,
                # load order
                9,  

                # Int_load_1 
                40, # remain_demand
                5,  # power
                4,  # preference
                9,  # load order

                # Int_load_2_state
                40, # remain_demand
                5,  # power
                4,  # preference
                9,  # load order

                # Int_load_3_state
                40, # remain_demand
                5,  # power
                4,  # preference
                9,  # load order

                # Unint_load_1_state
                40, # remain_demand
                7,  # period
                7,  # remain_period
                5,  # power
                4,  # preference
                9,  # load order

                # Unint_load_2_state
                40, # remain_demand
                7,  # period
                7,  # remain_period
                5,  # power
                4,  # preference
                9,  # load order

                # HVAC 1
                130, # T_in
                130, # T_set
                9,   # load order

                # HVAC 2
                130, # T_in
                130, # T_set
                9,   # load order

                # HVAC 3
                130, # T_in
                130, # T_set
                9,   # load order

            ], 
            dtype=np.float32
        )

        self.observation_space = spaces.Box(lowerLimit, upperLimit, dtype=np.float32)
        # Ation space --------------------------------------------------------------------------------------
        self.action_space = spaces.Discrete(9)

    def action_masks(self):
        return self.action_mask
    
    def _soc_interact(self):
        fixload = ( self.Pgrid_max - self.now_remain_power )
        state   = np.array([ self.time_step, fixload, self.soc_load.soc, self.now_Pgrid_price]) # [time_step, remian_power, soc, pgrid_price]
        self.soc_load.state = state
        action, _ = self.soc_model.predict(state, deterministic=True)
        _ = self.soc_load.step(action)

        power = self.soc_load.load_use_power
        preference = 0
        reward = self.soc_load.reward

        if self.mode != 'train':
            self.plt_data['soc'].append(self.soc_load.soc)
            self.plt_data['soc_action'].append(power)
            self.plt_data['soc_fixload'].append(fixload)
        
            # if self.mode == 'test_print':
            #     print('soc     - action {: .2f}  | use power {:} | reward {:}'.format( float(action[0]), power, reward ) )
            #     print(self.soc_load.state)
            #     print()

        return power, preference, reward

    def _int_load_interact(self, id):
        remain_power  = self.now_remain_power - self.int_load[id].load_power
        remain_demand = self.int_load[id].load_remain_demand
        preference    = self.execute_int_load_preference_data[id][self.time_step]
        
        #[ time_step, pgrid_price, remain_time, preference]
        state = np.array([ self.time_step, self.now_Pgrid_price, remain_demand, preference])
        self.int_load[id].state = state 
        
        if remain_power >= 0:
            action, _  = self.int_load_model.predict(state, deterministic=True)
        else:
            action = 0

        _ = self.int_load[id].step(action)

        power      = self.int_load[id].load_use_power
        preference = self.int_load[id].load_preference
        reward     = self.int_load[id].reward

        if self.mode != 'train':
            self.plt_data['int_'+str(id)+'_power'].append(power)
            self.plt_data['int_'+str(id)+'_fixload'].append(self.Pgrid_max-self.now_remain_power)

            # if self.mode == 'test_print':
            #     print('int {:}   - action {: .2f}  | use power {:} | reward {:}'.format(id, float(action), power, reward ) )
            #     print(self.int_load[id].state)
            #     print()

        return power, preference, reward
    
    def _unint_load_interact(self, id):
        remain_power = self.now_remain_power - self.unint_load[id].load_power
        preference   = self.execute_unint_load_preference_data[id][self.time_step]
        
        # [ time_step, pgrid_price, remain_time, remain_period, period_len, preference ]
        state = np.array([ self.time_step, self.now_Pgrid_price, self.unint_load[id].load_remain_demand,  self.unint_load[id].load_remain_period, self.unint_load[id].load_period, preference ])
        self.unint_load[id].state = state 

        if remain_power >= 0:
            action, _  = self.unint_load_model.predict(state, deterministic=True)
        else:
            action = 0
        
        _ = self.unint_load[id].step(action)

        power      = self.unint_load[id].load_use_power
        preference = self.unint_load[id].load_preference
        reward     = self.unint_load[id].reward

        if self.unint_load_already_add[id] == True:
            already_remain_power = self.unint_load[id].load_power
            # power = 0
        else:
            already_remain_power = 0

        if self.mode != 'train':
            self.plt_data['unint_'+str(id)+'_power'].append(self.unint_load[id].load_use_power)
            self.plt_data['unint_'+str(id)+'_fixload'].append(self.Pgrid_max - self.now_remain_power - already_remain_power)

            if self.mode == 'test_print':
                print('unint {:} - action {: .2f}  | use power {:} | reward {:}'.format(id, float(action), power, reward ) )
                print(self.unint_load[id].state)
                print()

        return power, preference, reward
    
    def _hvac_interact(self, id):
        
        remain_power = min( self.hvac_load[id].load_power, self.now_remain_power )
        T_in  = self.hvac_load[id].T_in
        T_out = self.execute_T_out[self.time_step]
        T_set = self.execute_hvac_T_set_data[id][self.time_step]
        
        # [ time_step, remain_power, price, T_in, T_out, T_set ]
        state = np.array([ self.time_step, remain_power, self.now_Pgrid_price, T_in, T_out, T_set])

        self.hvac_load[id].state = state 
        action, _  = self.hvac_model.predict(state, deterministic=True)

        state_next, _, _, _ , _ = self.hvac_load[id].step(action)

        T_in_next = state_next[3]

        power      = self.hvac_load[id].load_use_power
        preference = self.hvac_load[id].load_preference
        reward     = self.hvac_load[id].reward

        if self.mode != 'train':
            self.plt_data['T_in_'+str(id)].append(T_in_next)
            self.plt_data['hvac_'+str(id)+'_power'].append(power)
            self.plt_data['hvac_'+str(id)+'_fixload'].append(self.Pgrid_max-self.now_remain_power)

            # if self.mode == 'test_print':
            #     print('hvac {:}  - action {: .2f}  | use power {:} | reward {:}'.format( id, float(action[0]), power, reward ) )
            #     print(self.hvac_load[id].state)
            #     print()

        return power, preference, reward
    
    def _get_next_state(self):
        self.state =  np.array(
            [
                self.time_step,
                self.now_remain_power,
                self.now_Pgrid_price,
                # self.agent_order,
                self.execute_T_out[self.time_step],# T_out
                self.soc_load.soc,
                self.already_open_order['soc'],

                self.int_load[1].load_remain_demand,
                self.int_load[1].load_power,
                self.execute_int_load_preference_data[1][self.time_step],
                self.already_open_order['int_1'],  # = { agent : 0 for agent in ['soc','int_1','int_2','int_3','unint_1','unint_2','hvac_1','hvac_2','hvac_3']}

                self.int_load[2].load_remain_demand,
                self.int_load[2].load_power,
                self.execute_int_load_preference_data[2][self.time_step],
                self.already_open_order['int_2'],

                self.int_load[3].load_remain_demand,
                self.int_load[3].load_power,
                self.execute_int_load_preference_data[3][self.time_step],
                self.already_open_order['int_3'],


                self.unint_load[1].load_remain_demand,
                self.unint_load[1].load_period,
                self.unint_load[1].load_remain_period,
                self.unint_load[1].load_power,
                self.execute_unint_load_preference_data[1][self.time_step],
                self.already_open_order['unint_1'],


                self.unint_load[2].load_remain_demand,
                self.unint_load[2].load_period,
                self.unint_load[2].load_remain_period,
                self.unint_load[2].load_power,
                self.execute_unint_load_preference_data[2][self.time_step],
                self.already_open_order['unint_2'],

                self.hvac_load[1].T_in,
                self.execute_hvac_T_set_data[1][self.time_step],
                self.already_open_order['hvac_1'],

                self.hvac_load[2].T_in,
                self.execute_hvac_T_set_data[2][self.time_step],
                self.already_open_order['hvac_2'],

                self.hvac_load[3].T_in,
                self.execute_hvac_T_set_data[3][self.time_step],
                self.already_open_order['hvac_3'],
            ], 
            dtype=np.float32
        )
        
    ###########################################################################
    def step(self, action):
        self.step_power = 0
        power, preference ,agent_reward = self._soc_interact()
        self.step_power += power
        power, preference ,agent_reward = self._int_load_interact(1)

        power, preference ,agent_reward = self._int_load_interact(2)
        self.int_preference += preference
        self.step_power += power

        power, preference ,agent_reward = self._int_load_interact(3)
        self.int_preference += preference
        self.step_power += power

        power, preference ,agent_reward = self._unint_load_interact(1)
        self.unint_preference += preference
        self.step_power += power
 
        power, preference ,agent_reward = self._unint_load_interact(2)
        self.unint_preference += preference
        self.step_power += power

        power, preference ,agent_reward = self._hvac_interact(1)
        self.hvac_preference += preference
        self.step_power += power

        power, preference ,agent_reward = self._hvac_interact(2)
        self.hvac_preference += preference
        self.step_power += power


        power, preference ,agent_reward = self._hvac_interact(3)
        self.hvac_preference += preference
        self.step_power += power

        self.now_remain_power = self.now_remain_power - self.step_power

        if self.step_power > self.now_remain_power:
            self.exceed_pgrid_max += 1

        reward = 0

        self.total_cost += ( self.now_Pgrid_price*( self.Pgrid_max - self.now_remain_power ) )

        
        self.time_step += 1
        self.agent_order = 0 
        
        if self.time_step >= 96:
            done = True
            self.time_step = 95
            self.now_remain_power = self.execute_remain_power_data[self.time_step]
            self.now_Pgrid_price  = self.execute_price_data[self.time_step]
            self._get_next_state()
            self.time_step = 96
        else:
            done = False
            self.now_remain_power = self.execute_remain_power_data[self.time_step]
            self.now_Pgrid_price  = self.execute_price_data[self.time_step]
            self._get_next_state()

        # truncated = False
        return  ( self.state, float(reward), done, False, {} )
    
    ###########################################################################
    def reset(self, seed=None, options=None, day = 1, Pgrid_max = 20): 
        super().reset(seed=seed)
        self.already_open_order = { agent : 0 for agent in ['soc','int_1','int_2','int_3','unint_1','unint_2','hvac_1','hvac_2','hvac_3']}
        self.int_preference    = 0
        self.unint_preference  = 0
        self.hvac_preference   = 0
        self.exceed_pgrid_max  = 0
        self.time_step = 0
        self.agent_order = 0
        self.action_mask = np.array([True,True,True,True,True,True,True,True,True])

        if self.mode == 'train':
            # self.day   = np.random.choice(np.arange(1,360))
            self.day   = np.random.choice(np.arange(1,360))
            # self.month = self.day//30 + 1
            self.month = np.random.randint(6,10)
            remain_power_noise = np.random.normal(0, 0.5, 96)
            price_noise = np.random.normal(0,0.1,96)
            T_out_noise = np.random.rand(96)*6 - 3 
            self.Pgrid_max = np.random.randint(15,21)
        else:
            self.day   = day
            self.month = self.day
            remain_power_noise = np.zeros(96)
            price_noise = np.zeros(96)
            T_out_noise = np.zeros(96)
            self.Pgrid_max = Pgrid_max

        # self.month_name_list = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dcb']
        self.month_name = self.month_name_list[self.month-1]

        # remain_power_data
        self.execute_remain_power_data = self.Pgrid_max*np.ones(96) + self.pv_data[self.month_name] - np.array(self.unc_power_data['day'+str(self.day)+'_powerConsumption']) #+ remain_power_noise


        self.execute_price_data = np.array(self.price_data['summer_price'])     + price_noise

        # SOC
        self.soc_load = SocEnv( mode='test')
        _  = self.soc_load.reset()

        # int load ---------------------------
        self.execute_int_load_preference_data = {}

        for id in range(1,4):
            self.int_load[id] = IntEnv( mode='test')
            
            if self.mode == 'train':
                noise  = np.random.randint( -1, 2, 96)
                demand = np.random.randint(20,26)
                load_power = (np.random.rand()*2.5 + 1.5)
            else:
                noise = np.zeros(96)
                if id == 1:
                    demand = 20
                    load_power = 2.7
                elif id == 2:
                    demand = 25
                    load_power = 3
                else:
                    demand = 22
                    load_power = 2.5

            _  = self.int_load[id].reset( id=id, month=self.month , demand=np.random.randint(20,26), load_power = (np.random.rand()*2.5 + 1.5) )

            self.execute_int_load_preference_data[id] = self.int_load_preference_data[id][str(int(self.month))] + noise

            for i in range(len(self.execute_int_load_preference_data[id])):
                if self.execute_int_load_preference_data[id][i] > 4:
                    self.execute_int_load_preference_data[id][i] = 4

                elif self.execute_int_load_preference_data[id][i] < -1:
                    self.execute_int_load_preference_data[id][i] = -1

        # unint load ---------------------------
        self.execute_unint_load_preference_data = {}

        for id in range(1,3):
            self.unint_load[id] = UnintEnv( mode='test')
            period = np.random.randint(5,8)
            demand = (np.random.randint(30,35)//period)*period

            if self.mode == 'train':
                noise = np.random.randint( -1, 2, 96)
                load_power = (np.random.rand()*2.5 + 1.5) 
            else:
                noise = np.zeros(96)
                if id == 1:
                    load_power = 2.5
                    period = 6
                    demand = 30
                else:
                    load_power = 3.2
                    period = 5
                    demand = 30

            _  = self.unint_load[id].reset( id=id, month=self.month , demand=demand, period = period, load_power = load_power )

            self.execute_unint_load_preference_data[id] = self.unint_load_preference_data[id][str(int(self.month))] + noise

            for i in range(len(self.execute_unint_load_preference_data[id])):
                if self.execute_unint_load_preference_data[id][i] > 4:
                    self.execute_unint_load_preference_data[id][i] = 4

                elif self.execute_unint_load_preference_data[id][i] < -1:
                    self.execute_unint_load_preference_data[id][i] = -1

        self.unint_load_already_add = {
            1 : False,
            2 : False
        }

        # hvac --------------------------------
        self.execute_T_out = self.T_out_data[self.month_name] + T_out_noise

        self.execute_hvac_T_set_data = {}

        for id in range(1,4):
            self.hvac_load[id] = HVAC_env( mode='test')
            _  = self.hvac_load[id].reset(input_month=self.month)

            if self.mode == 'train':
                noise = np.random.randint( -1, 2, 96)
            else:
                noise = np.zeros(96)

            self.execute_hvac_T_set_data[id] = self.T_set_data[id][self.month_name] + noise

        # state  ------------------------------
        self.now_remain_power = self.execute_remain_power_data[self.time_step]
        self.now_Pgrid_price  = self.execute_price_data[self.time_step]

        self._get_next_state()

        self.total_cost = 0
        self.total_preference = 0

        if self.mode != 'train':
            self.plt_data = {
                'soc':[],
                'soc_action':[],
                'soc_fixload':[],

                'int_1_power':[],
                'int_1_fixload':[],
                'int_1_preference':self.execute_int_load_preference_data[1],
                'int_2_power':[],
                'int_2_fixload':[],
                'int_2_preference':self.execute_int_load_preference_data[2],
                'int_3_power':[],
                'int_3_fixload':[],
                'int_3_preference':self.execute_int_load_preference_data[3],

                'unint_1_power':[],
                'unint_1_fixload':[],
                'unint_1_preference':self.execute_unint_load_preference_data[1],
                'unint_2_power':[],
                'unint_2_fixload':[],
                'unint_2_preference':self.execute_unint_load_preference_data[2],

                'T_in_1':[],
                'T_set_1':self.execute_hvac_T_set_data[1],
                'hvac_1_power':[],
                'hvac_1_fixload':[],
                'T_in_2':[],
                'T_set_2':self.execute_hvac_T_set_data[2],
                'hvac_2_power':[],
                'hvac_2_fixload':[],
                'T_in_3':[],
                'T_set_3':self.execute_hvac_T_set_data[3],
                'hvac_3_power':[],
                'hvac_3_fixload':[],

                'T_out':self.execute_T_out,
                'Pgrid price':self.execute_price_data,
                'Pgrid max':self.Pgrid_max*np.ones(96),
                'pv':self.pv_data[self.month_name]
            }

        return ( self.state, {} )

    def render(self):
        if self.agent_order == 0:
            print(self.time_step,'-----------------------------')
            print('pgrid max:',self.Pgrid_max)
            print('remain_power:',self.now_remain_power)
            print('unint:',self.unint_load_already_add)


env = HLA_Baseline(mode = 'test')

month = 8
    
month_name = {1:"January",2:"February",3:"March",4:"April",5:"May",6:"June",7:"July",8:"August",9:"September",10:"October",11:"November",12:"December"}
month_title = month_name[month]
env.reset(day = month)
total_rewards = 0
action = 0
terminated = False

while not terminated:
    # env.render()
    state, reward, terminated, _ , _ = env.step(action)
    # print(env.agent_order)
    total_rewards += reward
    if terminated:
        break

int_load_remain = 0
unint_load_remain = 0

for id in range(1,4):
    int_load_remain += env.int_load[id].load_remain_demand

for id in range(1,3):
    unint_load_remain += env.unint_load[id].load_remain_demand

print('hvac  : ',env.hvac_preference)
print('int   : ',env.int_preference  , '   int_load_remain :', int_load_remain )
print('unint : ',env.unint_preference, ' unint_load_remain :', unint_load_remain)
print('total_cost :' ,env.total_cost/4)
print('total_preference :', env.total_preference)
print('exceed Pgrid max : ',env.exceed_pgrid_max )

hvac  :  244.51867248046324
int   :  126.0    int_load_remain : 0
unint :  120.0  unint_load_remain : 0
total_cost : 802.2506188453432
total_preference : 0
exceed Pgrid max :  25
