In [34]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [45]:
import plotly.graph_objects as go
data = pd.read_csv("GeneratedCSV/5_north.csv")
fig = go.Figure()

fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'], mode='lines', name='Power Usage'))
fig.update_layout(
    title='Building north of No. 5 Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True 
)

fig.show()

In [46]:
data

Unnamed: 0,Building,DataValue,DateTime,DayOfWeek,Time,PowerUsage
0,5호북관,17975237,2024-04-23 13:20:00,1,13:20:00,0.000000
1,5호북관,17975271,2024-04-23 13:30:00,1,13:30:00,0.693878
2,5호북관,17975305,2024-04-23 13:40:00,1,13:40:00,0.693878
3,5호북관,17975339,2024-04-23 13:50:00,1,13:50:00,0.693878
4,5호북관,17975374,2024-04-23 14:00:00,1,14:00:00,0.714286
...,...,...,...,...,...,...
2888,5호북관,18072694,2024-05-13 14:40:00,0,14:40:00,0.897959
2889,5호북관,18072736,2024-05-13 14:50:00,0,14:50:00,0.857143
2890,5호북관,18072779,2024-05-13 15:00:00,0,15:00:00,0.877551
2891,5호북관,18072823,2024-05-13 15:10:00,0,15:10:00,0.897959


In [58]:
from sklearn.preprocessing import MinMaxScaler
# time_steps:넣을 데이터 (ex) 1008는 7일 
# for_periods:예측할 요일 (ex) 144는 1일

'''
x_train, y_train: 데이터
x_train: 모델이 입력으로 사용할 데이터
y_train: 모델이 예측해야하는 실제 값. x_train를 기반으로 y_train에 해당하는 미래값을 예측
x_test: 
'''
time_steps = 288
for_periods = 288

data['DateTime'] = pd.to_datetime(data['DateTime'])
print(len(data))
# training & test set 만들기
start_date = data.iloc[1]['DateTime']
end_date = data.iloc[len(data) - time_steps + 1]['DateTime']

print(end_date)

mask2_date = data.iloc[len(data) - (time_steps + 1 + 1008)]['DateTime']
print(mask2_date)

mask1 = (data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)  
mask2 = (data['DateTime'] >= mask2_date) & (data['DateTime'] <= end_date)

ts_train = data.loc[mask1]
ts_test = data.loc[mask2]

sc = MinMaxScaler(feature_range=(0,1))

ts_train_scaled = data.loc[mask1]['PowerUsage'].values.reshape(-1, 1)
ts_test_scaled = data.loc[mask2]['PowerUsage'].values.reshape(-1, 1)
x_train = []
y_train = []

print(ts_test_scaled)
print(len(ts_test_scaled))

for i in range(time_steps, len(ts_train_scaled) - for_periods):
    x_train.append(ts_train_scaled[i-time_steps:i,0])
    y_train.append(ts_train_scaled[i:i+for_periods,0])

       
x_train, y_train = np.array(x_train), np.array(y_train)

# 3차원으로 재구성
'''
RNN, LSTM 모델에서는 Input 데이터가 [샘플 수, 시간 단계 수, 특성 수] 형태의 3차원 배열을 기대한다.
'''
# np.reshape(samples, time stemps, features)로 만듬
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))

# 두 배열을 연결
inputs = np.concatenate((ts_train_scaled[-time_steps:], ts_test_scaled[:for_periods]))
x_test = []

for i in range(time_steps, len(inputs) - for_periods + 1):
    x_test.append(inputs[i-time_steps:i])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))


2893
2024-05-11 15:40:00
2024-05-04 15:20:00
[[0.63265306]
 [0.63265306]
 [0.65306122]
 ...
 [0.75510204]
 [0.7755102 ]
 [0.73469388]]
1011


In [59]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, GRU, LSTM
from keras.optimizers import SGD
from keras.layers import TimeDistributed, Flatten


def LSTM_model(x_train, y_train):
    model = Sequential()

    '''
    unit: LSTM layer의 뉴런 수를 의미한다. 이는 레이어의 출력 차원을 정의하며, 모델의 용량(capacity)를 결정하는 parameter
    return_sequences=True: LSTM layer가 모든 시간 단계에서 출력 시퀀스를 반환할지, 아니면 마지막 시간 단계만 반환할지 결정한다.
    input_shape: 모델의 입력 차원
    activation: LSTM sell의 activation function. 주로 tahn or softmax
    '''
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1), activation='tanh'))

    model.add(LSTM(units=50, return_sequences=True, activation='tanh'))

    model.add(TimeDistributed(Dense(units=1)))  # 각 시간 단계마다 독립적인 예측

    #model.add(Dense(units=1)) # 출력 layer 하나의 예측값 60

    # Compile
    model.compile(optimizer = SGD(learning_rate = 0.01, decay = 1e-7,
                                 momentum=0.9, nesterov=False), loss = 'mean_squared_error')
    model.fit(x_train, y_train, epochs = 20, batch_size=32, verbose = 1)

    return model


In [60]:
print("x_train shape:", x_train.shape)  # (샘플 수, time_steps, 특성 수)
print("y_train shape:", y_train.shape)  # (샘플 수, for_periods, 특성 수)


model = LSTM_model(x_train, y_train)
model.summary()

x_train shape: (2030, 288, 1)
y_train shape: (2030, 288, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_10 (LSTM)              (None, 288, 50)           10400     
                                                                 
 lstm_11 (LSTM)              (None, 288, 50)           20200     
                                                                 
 time_distributed_5 (TimeDis  (None, 288, 1)           51        
 tributed)                                                       
                                                                 
Total params: 30,651
Trainable params: 30,651
Non-trainable params: 0
_______________________

In [61]:
weights = model.get_weights()
print(weights)
print(len(weights))

prediction = model.predict(x_test)
prediction_2d = prediction.reshape(-1, 1)

#print(prediction_2d)
# prediction 값을 원래 scale로 되돌리기
#prediction = sc.inverse_transform(prediction_2d)

[array([[ 0.12013476, -0.07662782, -0.0287686 , -0.10325057,  0.13798079,
        -0.06390405,  0.13329968,  0.06387781, -0.09186907, -0.02959316,
        -0.02666635, -0.00758436, -0.12286077, -0.10713383,  0.07516688,
        -0.0631672 ,  0.16162807, -0.12265358, -0.01077202, -0.02329355,
        -0.08717702, -0.04725597,  0.08628435, -0.12628356,  0.14546682,
         0.13972287,  0.09803773, -0.1127444 ,  0.16240004, -0.10144595,
        -0.03256853, -0.14871024,  0.10911568, -0.06057211,  0.11692783,
        -0.01840843, -0.11800688, -0.03442632, -0.05259414,  0.01692483,
        -0.12867594,  0.00774354,  0.15942448,  0.10599206, -0.05798821,
         0.0467129 , -0.03713534, -0.0228331 , -0.13332455, -0.14229181,
        -0.05192118, -0.09730487,  0.13870378, -0.0531057 ,  0.08065603,
         0.03361203, -0.10092727,  0.113832  , -0.09685987,  0.07467473,
         0.04315712, -0.10905414, -0.00899824, -0.05570832, -0.15708591,
         0.11943567, -0.12043487, -0.02363233,  0.

In [62]:
# data['DateTime'] = pd.to_datetime(data['DateTime'])
# last_datetime = data['DateTime'].iloc[-1]
# print(last_datetime)
predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
predict = prediction
print(predict)

#repeated_predict = np.tile(predict, (1440, 1))
#print(repeated_predict)

DatetimeIndex(['2024-05-11 15:40:00', '2024-05-11 15:50:00',
               '2024-05-11 16:00:00', '2024-05-11 16:10:00',
               '2024-05-11 16:20:00', '2024-05-11 16:30:00',
               '2024-05-11 16:40:00', '2024-05-11 16:50:00',
               '2024-05-11 17:00:00', '2024-05-11 17:10:00',
               ...
               '2024-05-13 14:00:00', '2024-05-13 14:10:00',
               '2024-05-13 14:20:00', '2024-05-13 14:30:00',
               '2024-05-13 14:40:00', '2024-05-13 14:50:00',
               '2024-05-13 15:00:00', '2024-05-13 15:10:00',
               '2024-05-13 15:20:00', '2024-05-13 15:30:00'],
              dtype='datetime64[ns]', length=288, freq='10T')
[[[0.51608616]
  [0.5904238 ]
  [0.64618766]
  [0.6871472 ]
  [0.7165391 ]
  [0.73704463]
  [0.7511784 ]
  [0.7601475 ]
  [0.7649962 ]
  [0.76731837]
  [0.76755005]
  [0.7671066 ]
  [0.76586425]
  [0.76378256]
  [0.76126564]
  [0.75860274]
  [0.7559784 ]
  [0.7528074 ]
  [0.7498318 ]
  [0.7471542 ]
  [0.744

In [63]:
import plotly.graph_objects as go

# 그래프 객체 생성
fig = go.Figure()

# 기존 전력 사용량 데이터 추가
fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'],
                         mode='lines', name='Actual Power Usage'))

# 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
fig.add_trace(go.Scatter(x=predicted_dates, y=predict.flatten(),
                         mode='lines', name='Predicted Power Usage',
                         line=dict(color='red')))  # 예측 데이터에는 다른 색상을 사용

# 레이아웃 업데이트
fig.update_layout(
    title='Building north of No. 5 Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True
)

# 그래프 표시
fig.show()
