In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, GRU, LSTM
from keras.optimizers import SGD
from keras.layers import TimeDistributed, Flatten
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
import plotly.graph_objects as go


sc = MinMaxScaler(feature_range=(0,1))

2024-05-27 18:46:57.236097: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-27 18:46:57.372438: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
from keras.layers import Dropout

def LSTM_model(x_train):
    model = Sequential()
    '''
    unit: LSTM layer의 뉴런 수를 의미한다. 이는 레이어의 출력 차원을 정의하며, 모델의 용량(capacity)를 결정하는 parameter
    return_sequences=True: LSTM layer가 모든 시간 단계에서 출력 시퀀스를 반환할지, 아니면 마지막 시간 단계만 반환할지 결정한다.
    input_shape: 모델의 입력 차원
    activation: LSTM sell의 activation function. 주로 tahn or softmax
    '''
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2]), activation='tanh'))
    model.add(Dropout(0.2))  # Dropout 추가

    model.add(LSTM(units=50, return_sequences=True, activation='tanh'))
    model.add(Dropout(0.2))  # Dropout 추가

    model.add(TimeDistributed(Dense(units=1)))  # 각 시간 단계마다 독립적인 예측

    #model.add(Dense(units=1)) # 출력 layer 하나의 예측값 60

    # Compile
    model.compile(optimizer = SGD(learning_rate = 0.01, decay = 1e-7,
                                 momentum=0.9, nesterov=False), loss = 'mean_squared_error')

    return model


In [3]:
time_steps = 144 
for_periods = 144

In [4]:
csv_names = ["하이테크센터", "로스쿨관", "1호관(본관)", "7호관(학생회관)", 
                "2호남관", "5호북관", "2호북관", "김현태인하드림센터", "5호동관",
                "5호남관", "인하드림센터", "9호관", "60주년기념관", "서호관"]


en_names = ['HighTech', 'Law School', 'No. 1 building', 'No. 7 building',
            'South of No. 2 building', 'North of No. 5 building', 'North of No. 2 building',
            'Kim Hyun Tae Inha Dream Center', 'East of No. 5 building', 'South of No. 5 building',
            'Inha Dream Center', 'No. 9 building', '60th Center', 'West Building']

def read_data(building):
    df = pd.read_csv(f"GeneratedCSV/Preprocessed_{building}.csv")
    return df

In [52]:
def ts_train_test(data, time_steps, for_periods):
    data['DateTime'] = pd.to_datetime(data['DateTime'])
    
    start_date = data.iloc[1]['DateTime']
    end_date = data.iloc[-1]['DateTime']
    mask2_date = data.iloc[-(time_steps + 1)]['DateTime']

    mask1 = (data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)  
    mask2 = (data['DateTime'] >= mask2_date) & (data['DateTime'] <= end_date)

    ts_train_scaled = data.loc[mask1,['DayOfWeek', 'Time', 'PowerUsage']].values
    ts_test_scaled = data.loc[mask2,['DayOfWeek', 'Time', 'PowerUsage']].values
    x_train = []
    y_train = []

    for i in range(time_steps, len(ts_train_scaled) - for_periods): # 4594번실행
        x_train.append(ts_train_scaled[i-time_steps:i, :])
        y_train.append(ts_train_scaled[i:i+for_periods,2])

    x_train, y_train = np.array(x_train), np.array(y_train)

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2]))
    y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))
    inputs = np.concatenate((ts_train_scaled[-time_steps:], ts_test_scaled[:for_periods]))
    x_test = []

    for i in range(time_steps, len(inputs) - for_periods + 1):
        x_test.append(inputs[i-time_steps:i])
    x_test = np.array(x_test)
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], x_test.shape[2]))

    return x_train, y_train, x_test

'''
len(ts_train_scaled)의 길이는 데이터베이스에서 0번째 행을 제외한 행의 갯수이다.
x_train은 [0-144], y_train은 [144:288, 2]형태로 append
x_test는 마지막 144의 datapoints 즉, 하루의 데이터로 이루어진다.
'''



'\nlen(ts_train_scaled)의 길이는 데이터베이스에서 0번째 행을 제외한 행의 갯수이다.\nx_train은 [0-144], y_train은 [144:288, 2]형태로 append\nx_test는 마지막 144의 datapoints 즉, 하루의 데이터로 이루어진다.\n'

In [31]:
def preprocess_data(data):
    data['DateTime'] = pd.to_datetime(data['DateTime'])
    data['DayOfWeek'] = data['DayOfWeek'].astype(float)
    # ex 12:20:00은 13이 시간, 20이 분이므로 13*6 + 20//10 = 80이 된다.
    data['Time'] = data['Time'].apply(lambda x: int(x.split(':')[0]) * 6 + int(x.split(':')[1]) // 10).astype(float)

    # 필요한 열만 선택 (DateTime 포함)
    features = ['DateTime', 'DayOfWeek', 'Time', 'PowerUsage']
    data = data[features].copy()  # 슬라이스를 명시적으로 복사


    # 정규화
    data['DayOfWeek'] = data['DayOfWeek'] / 6.0  # 요일은 0에서 6 사이 값이므로
    data['Time'] = data['Time'] / 144.0  # 하루는 144개의 10분 단위로 구성

    return data


In [9]:
test_dataset = []
dataset = []
for i in range(len(csv_names)):
    data = read_data(csv_names[i])
    data = preprocess_data(data)
    dataset.append(data)
    train_data, y_train, test_data = ts_train_test(data, time_steps, for_periods)
    test_dataset.append(test_data)
    #model = Transformer_model(train_data)
    model = LSTM_model(train_data)
    model.fit(train_data, y_train, epochs = 20, batch_size=32, verbose = 1)
    model.summary()
    model.save(f'Model/{en_names[i]}.h5')

2024-05-27 17:34:03.321513: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-27 17:34:03.328212: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-27 17:34:03.328322: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-27 17:34:03.329362: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

Epoch 1/20


2024-05-27 17:34:08.953334: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8700
2024-05-27 17:34:09.222823: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 144, 50)           10800     
                                                                 
 dropout (Dropout)           (None, 144, 50)           0         
                                                                 
 lstm_1 (LSTM)               (None, 144, 50)           20200     
                                                                 
 dropout_1 (Dropout)         (None, 144, 50)           0         
                                                                 
 time_distributed (TimeDistr  (None, 144, 1)           51        
 ibuted)                          

In [70]:
predict_result = []
for i in range(len(csv_names)):
    model = load_model(f'Model/{en_names[i]}-{en_names[i]}.h5')
    predict_result.append(model.predict(test_dataset[i]))



In [71]:
for i in range(len(predict_result)):
    print(predict_result[i])

[[[0.53267956]
  [0.52947354]
  [0.52785647]
  [0.52744704]
  [0.5279608 ]
  [0.5288354 ]
  [0.5300704 ]
  [0.5314736 ]
  [0.5329237 ]
  [0.53434753]
  [0.5357035 ]
  [0.5367964 ]
  [0.5380491 ]
  [0.53935415]
  [0.5406464 ]
  [0.54188764]
  [0.54323334]
  [0.5445948 ]
  [0.54609454]
  [0.54762244]
  [0.5489368 ]
  [0.55043167]
  [0.55180395]
  [0.5532327 ]
  [0.55429363]
  [0.55563307]
  [0.55691534]
  [0.55829805]
  [0.55969256]
  [0.56087047]
  [0.56205946]
  [0.5632169 ]
  [0.56414425]
  [0.56526357]
  [0.56610996]
  [0.5655462 ]
  [0.56385183]
  [0.5615975 ]
  [0.5584499 ]
  [0.5546111 ]
  [0.5505853 ]
  [0.5463321 ]
  [0.54236406]
  [0.5389698 ]
  [0.53576833]
  [0.5330473 ]
  [0.53060305]
  [0.5286493 ]
  [0.5271126 ]
  [0.52592826]
  [0.52521354]
  [0.52483934]
  [0.52453715]
  [0.52431816]
  [0.5243585 ]
  [0.5245749 ]
  [0.524909  ]
  [0.52532005]
  [0.5257798 ]
  [0.5262688 ]
  [0.526774  ]
  [0.5272867 ]
  [0.52780145]
  [0.5279656 ]
  [0.5282869 ]
  [0.5285332 ]
  [0.52874

In [11]:
dataset[0]['DateTime'] = pd.to_datetime(dataset[0]['DateTime'])
# training & test set 만들기
start_date = dataset[0].iloc[1]['DateTime']
end_date = dataset[0].iloc[-1]['DateTime']
predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
print(f"start_date: {start_date}")
print(f"end_date: {end_date}")

DatetimeIndex(['2024-05-26 11:10:00', '2024-05-26 11:20:00',
               '2024-05-26 11:30:00', '2024-05-26 11:40:00',
               '2024-05-26 11:50:00', '2024-05-26 12:00:00',
               '2024-05-26 12:10:00', '2024-05-26 12:20:00',
               '2024-05-26 12:30:00', '2024-05-26 12:40:00',
               ...
               '2024-05-27 09:30:00', '2024-05-27 09:40:00',
               '2024-05-27 09:50:00', '2024-05-27 10:00:00',
               '2024-05-27 10:10:00', '2024-05-27 10:20:00',
               '2024-05-27 10:30:00', '2024-05-27 10:40:00',
               '2024-05-27 10:50:00', '2024-05-27 11:00:00'],
              dtype='datetime64[ns]', length=144, freq='10T')
start_date: 2024-04-23 13:30:00
end_date: 2024-05-26 11:10:00


In [72]:
for i in range(len(predict_result)):
    # 그래프 객체 생성
    fig = go.Figure()

    # 기존 전력 사용량 데이터 추가
    fig.add_trace(go.Scatter(x=dataset[i]['DateTime'], y=dataset[i]['PowerUsage'],
                            mode='lines', name='Actual Power Usage'))

    # 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
    fig.add_trace(go.Scatter(x=predicted_dates, y=predict_result[i].flatten(),
                            mode='lines', name='Predicted Power Usage',
                            line=dict(color='red')))  # 예측 데이터에는 다른 색상을 사용

    # 레이아웃 업데이트
    fig.update_layout(
        title=f'{en_names[i]} Power Usage Over Time',
        xaxis_title='Time',
        yaxis_title='Power Usage (W)',
        xaxis_rangeslider_visible=True
    )

    # 그래프 표시
    fig.show()