In [1]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['axes.facecolor'] = 'white'
import seaborn as sns
from matplotlib import font_manager, rc

import random
import os
import numpy as np
from collections      import deque
from tensorflow.keras.models     import Sequential
from tensorflow.keras.layers     import Dense
from tensorflow.keras.optimizers import Adam

In [2]:

class Agent():
    def __init__(self, state_size, action_size):
        self.state_size         = state_size
        self.action_size        = action_size
        self.memory             = deque(maxlen=1000)
        self.learning_rate      = 0.001
        self.gamma              = 0.9
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.96
        self.brain              = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(10, input_dim=self.state_size, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        

        if os.path.isfile(self.weight_backup):
            model.load_weights(self.weight_backup)
            self.exploration_rate = self.exploration_min
        
        print('model loaded')
        return model

    def save_model(self):
            self.brain.save(self.weight_backup)

    def act(self, state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        
        act_values = self.brain.predict(state)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done): # 상태를 저장
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, sample_batch_size, index, max_cnt):
        if len(self.memory) < sample_batch_size: # sample_batch_size 마다 했던 게임들을 바탕으로 학습
            return
        
        # 잘한게임 혹은 못한 게임은 그 게임을 복기
        if (index >= max_cnt * 0.9 or index >= 200) or (index < max_cnt * 0.1):
            if index >= max_cnt * 0.9 or index >= 200 :
                print('nice game!')
            else :
                print('bad game...')
            for state, action, reward, next_state, done in list(self.memory)[-index:]:
                target = reward 
                if not done: #게임 끝이 아니라면 (진행중인경우)
                    target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0]) # 큐함수
                target_f = self.brain.predict(state) # 예측값

                target_f[0][action] = target # 예측 행동의 q값을 (보상+다음상황에서의 )

                self.brain.fit(state,target_f, epochs=2, verbose=0)
                return
                   
        elif index < max_cnt * 0.1 :
            print('bad game...')
            
            sample_batch = random.sample(list(self.memory)[:-index], sample_batch_size) # 일부를 선택
            for state, action, reward, next_state, done in sample_batch:
                target = reward 
                if not done: #게임 끝이 아니라면 (진행중인경우)
                    target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0]) # 큐함수
                    
                target_f = self.brain.predict(state) # 예측값
                target_f[0][action] = target # 예측 행동의 q값을 (보상+다음상황에서의 )
                self.brain.fit(state,target_f, epochs=1, verbose=0)
                
                return
            
        # 일반 게임
        sample_batch = random.sample(self.memory, sample_batch_size) # 일부를 선택

        X_lst = []
        y_lst = []
        for state, action, reward, next_state, done in sample_batch:
            target = reward 
            if not done: #게임 끝이 아니라면 (진행중인경우)
                target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0]) # 큐함수
            target_f = self.brain.predict(state) # 예측값

            target_f[0][action] = target # 예측 행동의 q값을 (보상+다음상황에서의 )
            X_lst.append(state)
            y_lst.append(target_f)

            self.brain.fit(state,target_f, epochs=1, verbose=0)
                
        # decay
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay

In [170]:
class Room :
    def __init__(self, season, climate, room_size, room_default, limit):
        self.season         = season
        self.climate        = climate
        self.room_size      = room_size
        self.room_default   = room_default
        self.limit          = limit
        
    def avg_env(self, hour) :
        
        # 계절별 일/낮/밤 평균 기온
        tmp_dict = {'spring' : [13,18,11],
                   'summer' : [26,31,24],
                   'autumn' : [10,13,7],
                   'winter' : [2,4,-8]}
        
        # 계절별 평균 상대 습도
        hum_dict = {'spring' : 55.6,
                   'summer' : 71,
                   'autumn' : 53.7,
                   'winter' : 62.5}
        
        re1 = tmp_dict[self.season][0]
        re2 = hum_dict[self.climate]
        
        return re1, re2
        
    def numofpeople(self, num) : # 
        re1 = 1 +( 0.5 * (num/self.room_size) * 36.5 )
        re2 = 60 +( 0.5 * (num/self.room_size) * 70)  # 습도에 대한 영향은 매우 미비하게
        
        return re1, re2
        
    def room_controller (self, set_temp, cnt, now1, now2) :
        '''
        set_temp : -3 ~ 3
        cnt : 냉난방을 가동한 시간
        re : 방에 가하는 에너지의 정량화
        '''
        
        elec = 30
        
        if set_temp < 0 :
            now_set = 'cool'
            if cnt == 0 :
                elec = -set_temp * 50
            elif cnt >= 1 :
                elec = -set_temp * 30 * (1 + cnt/1000)
                
        elif set_temp > 0 :
            now_set = 'heat'
            if cnt == 0 :
                elec = set_temp * 10
            else :
                elec = set_temp * 45 * (1 + cnt/800)
                
        expenditure = elec * (abs(set_temp)+1) / 2        
        re1 = now1 + (set_temp *(10/(self.room_size+1)))
        re2 = now2 - (20 *(1/(self.room_size+1)))
        
        return expenditure, re1, re2
    
    def cal_condition (self, now1,now2, base_re1, base_re2, human_re1, human_re2, controll_re1, controll_re2) :
        
        def non_zero(val,dv) :
            if val <= 15 :
                return dv
            return val
        
        human_re1 = non_zero(human_re1, now1)
        human_re2 = non_zero(human_re2, now2)
        controll_re1 = non_zero(controll_re1, now1)
        controll_re2 = non_zero(controll_re2, now2)
        
        random_ratio = ((random.randrange(-5,6)) / 100 +1)
        
        print('\nanalysis:')
        print(f'temp_impact : {(now1)}, {(base_re1)}, {(human_re1 )}, {(controll_re1)}')
        print(f'hmd_impact : {(now2)}, {(base_re2 )}, {(human_re2)}, {(controll_re2)}')
        print('\n')
        total_re1 = (now1 * 0.25) + (base_re1 * 0.1) + (human_re1 * 0.2 )  + (controll_re1 * 0.45)  * random_ratio
        total_re2 = (now2 * 0.4) + (base_re2 * 0.2) + (human_re2 * 0.05) + (controll_re2 * 0.35) * random_ratio
        
        #print(total_re1, total_re2)
        return total_re1, total_re2
 

In [171]:
def run (situation) :
    
    
    try :
        inroom_temp = situation.room_default
        inroom_hmd = 60
        inroom_people = 5

        outdoor_temp, outdoor_hmd = situation.avg_env(0)
        intemp, in_hmd = situation.numofpeople(inroom_people)

        cnt = 0
        totalexp, erg_temp, erg_hmd = situation.room_controller(0, cnt,inroom_temp,inroom_hmd )
        up_temp, up_hmd = situation.cal_condition(inroom_temp,inroom_hmd,outdoor_temp, outdoor_hmd, intemp, in_hmd, erg_temp, erg_hmd )
        
        for episode in range(1000) :

            done = False
            total_time= 0
            while done or total_time >= 600:
                # 방안의 사람 수 변화
                people_chg = random.randrange(-2,3)
                if inroom_people + people_chg <= situation.room_size and inroom_people + people_chg >= 0 :
                    inroom_people += people_chg

                intemp, in_hmd = situation.numofpeople(inroom_people)

                # 에이전트의 행동선택
                print(f'now state : \n1)outdoor : {outdoor_temp}, {outdoor_hmd}\n2)inroom : {up_temp}, {up_hmd}, {inroom_people} people\n*****')
                action = int(input())

                # 행동에 따른 전기세 및 체감쾌적도 계산
                upexp, erg_temp, erg_hmd = situation.room_controller(action, cnt,up_temp,up_hmd )
                totalexp += upexp
                if action != 0 : cnt += 1
                elif action == 0 : cnt = 0 
                print(f'냉난방 가동 시간 {cnt}')   

                # 다음 상태 계산
                up_temp, up_hmd = situation.cal_condition(up_temp,up_hmd,outdoor_temp,outdoor_hmd, intemp, in_hmd, erg_temp, erg_hmd )
                if up_temp < situation.limit[0][0] or up_temp > situation.limit[0][1] :
                    print('too hot or too cold!')
                    totalexp *= 1.5
                    done = True
                if up_hmd < situation.limit[1][0] or up_hmd > situation.limit[1][1] :
                    print('too dry or too wet!')
                    totalexp *= 1.5
                    done = True
                    
                total_time +=1
                #print('\n******누적비용 : ', totalexp, '\n현재비용 : ', upexp,  '\n---------------------------------------------\n\n')
                
        print("Episode #{} : round : {} / budget: {}".format(episode, total_time, totalexp))
        
        
        
        

In [172]:
env1 = Room('summer', 'summer', 20, 25, ((22,26), (40,80)))
run(env1)

now state : 
1)outdoor : 26, 71
2)inroom : 25, 60


analysis:
temp_impact : 25, 26, 25, 25.0
hmd_impact : 60, 71, 68.75, 59.04761904761905


now state : 
1)outdoor : 26, 71
2)inroom : 25.1, 62.304166666666674, 5 people
*****
-1
냉난방 가동 시간 1

analysis:
temp_impact : 25.1, 26, 25.1, 24.623809523809527
hmd_impact : 62.304166666666674, 71, 68.75, 61.351785714285725



******누적비용 :  115.0 
현재비용 :  100.0 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 25.197328571428574, 64.46175416666668, 7 people
*****
-1
냉난방 가동 시간 2

analysis:
temp_impact : 25.197328571428574, 26, 25.197328571428574, 24.7211380952381
hmd_impact : 64.46175416666668, 71, 72.25, 63.50937321428573



******누적비용 :  145.03 
현재비용 :  30.029999999999998 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 25.28580024285715, 66.27004790416667, 6 people
*****
-1
냉난방 가동 시간 3

analysis:
temp_impact : 25.28580024285715, 26, 25.28580024285715, 24.809609766

-1
냉난방 가동 시간 16

analysis:
temp_impact : 24.016862010794092, 26, 24.016862010794092, 23.540671534603618
hmd_impact : 71.6266341744249, 71, 70.5, 70.67425322204394



******누적비용 :  1697.29 
현재비용 :  30.449999999999996 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 24.00089009542897, 71.11164229748533, 5 people
*****
-1
냉난방 가동 시간 17

analysis:
temp_impact : 24.00089009542897, 26, 24.00089009542897, 23.524699619238497
hmd_impact : 71.11164229748533, 71, 68.75, 70.15926134510438



******누적비용 :  1727.77 
현재비용 :  30.48 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 23.457209630167497, 69.41011131624134, 5 people
*****
-1
냉난방 가동 시간 18

analysis:
temp_impact : 23.457209630167497, 26, 23.457209630167497, 22.981019153977023
hmd_impact : 69.41011131624134, 71, 68.75, 68.45773036386039



******누적비용 :  1758.28 
현재비용 :  30.509999999999998 
---------------------------------------------


now state : 
1)outdoor 

-1
냉난방 가동 시간 39

analysis:
temp_impact : 23.415676594865975, 26, 23.415676594865975, 22.9394861186755
hmd_impact : 68.8791505092522, 71, 74.0, 67.92676955687125



******누적비용 :  2405.92 
현재비용 :  31.14 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 23.563050908627705, 69.46377324205487, 6 people
*****
-1
냉난방 가동 시간 40

analysis:
temp_impact : 23.563050908627705, 26, 23.563050908627705, 23.08686043243723
hmd_impact : 69.46377324205487, 71, 70.5, 68.51139228967392



******누적비용 :  2437.09 
현재비용 :  31.169999999999998 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 23.904132719317126, 70.2088662172494, 6 people
*****
-1
냉난방 가동 시간 41

analysis:
temp_impact : 23.904132719317126, 26, 23.904132719317126, 23.42794224312665
hmd_impact : 70.2088662172494, 71, 70.5, 69.25648526486845



******누적비용 :  2468.29 
현재비용 :  31.200000000000003 
---------------------------------------------


now state : 
1)outdoor : 26,

-1
냉난방 가동 시간 62

analysis:
temp_impact : 24.214034917597672, 26, 24.214034917597672, 23.737844441407198
hmd_impact : 69.21367056131218, 71, 61.75, 68.26128960893124



******누적비용 :  3130.42 
현재비용 :  31.83 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 23.964705111579526, 68.38659056038829, 2 people
*****
-1
냉난방 가동 시간 63

analysis:
temp_impact : 23.964705111579526, 26, 23.964705111579526, 23.488514635389052
hmd_impact : 68.38659056038829, 71, 63.5, 67.43420960800734



******누적비용 :  3162.28 
현재비용 :  31.860000000000003 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 24.482440465432113, 69.51170825509801, 0 people
*****
-1
냉난방 가동 시간 64

analysis:
temp_impact : 24.482440465432113, 26, 24.482440465432113, 24.00624998924164
hmd_impact : 69.51170825509801, 71, 60.0, 68.55932730271707



******누적비용 :  3194.17 
현재비용 :  31.889999999999997 
---------------------------------------------


now state : 
1)outdoo

-1
냉난방 가동 시간 86

analysis:
temp_impact : 23.636938322021216, 26, 23.636938322021216, 23.16074784583074
hmd_impact : 69.52880300753657, 71, 74.0, 68.57642205515562



******누적비용 :  3903.34 
현재비용 :  32.55 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 23.763182140839618, 69.95328639951214, 8 people
*****
-1
냉난방 가동 시간 87

analysis:
temp_impact : 23.763182140839618, 26, 23.763182140839618, 23.286991664649143
hmd_impact : 69.95328639951214, 71, 74.0, 69.00090544713119



******누적비용 :  3935.92 
현재비용 :  32.580000000000005 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 23.982161137451783, 70.51463780443069, 9 people
*****
-1
냉난방 가동 시간 88

analysis:
temp_impact : 23.982161137451783, 26, 23.982161137451783, 23.50597066126131
hmd_impact : 70.51463780443069, 71, 75.75, 69.56225685204974



******누적비용 :  3968.53 
현재비용 :  32.61 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)i

-1
냉난방 가동 시간 110

analysis:
temp_impact : 25.620375420501283, 26, 25.620375420501283, 25.14418494431081
hmd_impact : 72.69546417086028, 71, 74.0, 71.74308321847933



******누적비용 :  4891.54 
현재비용 :  33.269999999999996 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 25.33090333191604, 71.8371640035472, 7 people
*****
-1
냉난방 가동 시간 111

analysis:
temp_impact : 25.33090333191604, 26, 25.33090333191604, 24.854712855725566
hmd_impact : 71.8371640035472, 71, 72.25, 70.88478305116625



******누적비용 :  4924.84 
현재비용 :  33.300000000000004 
---------------------------------------------


now state : 
1)outdoor : 26, 71
2)inroom : 24.73614245303566, 70.36465270661074, 7 people
*****
-1
냉난방 가동 시간 112

analysis:
temp_impact : 24.73614245303566, 26, 24.73614245303566, 24.259951976845187
hmd_impact : 70.36465270661074, 71, 72.25, 69.41227175422979



******누적비용 :  4958.17 
현재비용 :  33.33 
---------------------------------------------


now state : 
1)outdoor : 

ValueError: invalid literal for int() with base 10: ''

# 