In [52]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [53]:
import plotly.graph_objects as go
data = pd.read_csv("2_south.csv")
fig = go.Figure()

fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'], mode='lines', name='Power Usage'))
fig.update_layout(
    title='Building south of No. 2 Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True 
)

fig.show()

In [54]:
data

Unnamed: 0,Building,DataValue,DateTime,DayOfWeek,Time,PowerUsage
0,2호남관/4호관,44446013,2024-04-23 13:20:00,1,13:20:00,0.000000
1,2호남관/4호관,44446073,2024-04-23 13:30:00,1,13:30:00,0.845070
2,2호남관/4호관,44446134,2024-04-23 13:40:00,1,13:40:00,0.859155
3,2호남관/4호관,44446194,2024-04-23 13:50:00,1,13:50:00,0.845070
4,2호남관/4호관,44446256,2024-04-23 14:00:00,1,14:00:00,0.873239
...,...,...,...,...,...,...
2867,2호남관/4호관,44588883,2024-05-13 11:10:00,0,11:10:00,0.887324
2868,2호남관/4호관,44588946,2024-05-13 11:20:00,0,11:20:00,0.887324
2869,2호남관/4호관,44589008,2024-05-13 11:30:00,0,11:30:00,0.873239
2870,2호남관/4호관,44589069,2024-05-13 11:40:00,0,11:40:00,0.859155


In [55]:
from sklearn.preprocessing import MinMaxScaler
# time_steps:넣을 데이터 (ex) 1008는 7일 
# for_periods:예측할 요일 (ex) 144는 1일

'''
x_train, y_train: 데이터
x_train: 모델이 입력으로 사용할 데이터
y_train: 모델이 예측해야하는 실제 값. x_train를 기반으로 y_train에 해당하는 미래값을 예측
x_test: 
'''
time_steps = 288
for_periods = 288

data['DateTime'] = pd.to_datetime(data['DateTime'])
print(len(data))
# training & test set 만들기
start_date = data.iloc[1]['DateTime']
end_date = data.iloc[len(data) - time_steps + 1]['DateTime']

print(end_date)

mask2_date = data.iloc[len(data) - (time_steps + 1 + 1008)]['DateTime']
print(mask2_date)

mask1 = (data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)  
mask2 = (data['DateTime'] >= mask2_date) & (data['DateTime'] <= end_date)

ts_train = data.loc[mask1]
ts_test = data.loc[mask2]

sc = MinMaxScaler(feature_range=(0,1))

ts_train_scaled = data.loc[mask1]['PowerUsage'].values.reshape(-1, 1)
ts_test_scaled = data.loc[mask2]['PowerUsage'].values.reshape(-1, 1)
x_train = []
y_train = []

print(ts_test_scaled)
print(len(ts_test_scaled))

for i in range(time_steps, len(ts_train_scaled) - for_periods):
    x_train.append(ts_train_scaled[i-time_steps:i,0])
    y_train.append(ts_train_scaled[i:i+for_periods,0])

       
x_train, y_train = np.array(x_train), np.array(y_train)

# 3차원으로 재구성
'''
RNN, LSTM 모델에서는 Input 데이터가 [샘플 수, 시간 단계 수, 특성 수] 형태의 3차원 배열을 기대한다.
'''
# np.reshape(samples, time stemps, features)로 만듬
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))

# 두 배열을 연결
inputs = np.concatenate((ts_train_scaled[-time_steps:], ts_test_scaled[:for_periods]))
x_test = []

for i in range(time_steps, len(inputs) - for_periods + 1):
    x_test.append(inputs[i-time_steps:i])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))


2872
2024-05-11 12:10:00
2024-05-04 11:50:00
[[0.63380282]
 [0.61971831]
 [0.64788732]
 ...
 [0.64788732]
 [0.66197183]
 [0.66197183]]
1011


In [56]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, GRU, LSTM
from keras.optimizers import SGD
from keras.layers import TimeDistributed, Flatten


def LSTM_model(x_train, y_train):
    model = Sequential()

    '''
    unit: LSTM layer의 뉴런 수를 의미한다. 이는 레이어의 출력 차원을 정의하며, 모델의 용량(capacity)를 결정하는 parameter
    return_sequences=True: LSTM layer가 모든 시간 단계에서 출력 시퀀스를 반환할지, 아니면 마지막 시간 단계만 반환할지 결정한다.
    input_shape: 모델의 입력 차원
    activation: LSTM sell의 activation function. 주로 tahn or softmax
    '''
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1), activation='tanh'))

    model.add(LSTM(units=50, return_sequences=True, activation='tanh'))

    model.add(TimeDistributed(Dense(units=1)))  # 각 시간 단계마다 독립적인 예측

    #model.add(Dense(units=1)) # 출력 layer 하나의 예측값 60

    # Compile
    model.compile(optimizer = SGD(learning_rate = 0.01, decay = 1e-7,
                                 momentum=0.9, nesterov=False), loss = 'mean_squared_error')
    model.fit(x_train, y_train, epochs = 20, batch_size=32, verbose = 1)

    return model


In [57]:
print("x_train shape:", x_train.shape)  # (샘플 수, time_steps, 특성 수)
print("y_train shape:", y_train.shape)  # (샘플 수, for_periods, 특성 수)


model = LSTM_model(x_train, y_train)
model.summary()

x_train shape: (2009, 288, 1)
y_train shape: (2009, 288, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 288, 50)           10400     
                                                                 
 lstm_3 (LSTM)               (None, 288, 50)           20200     
                                                                 
 time_distributed_1 (TimeDis  (None, 288, 1)           51        
 tributed)                                                       
                                                                 
Total params: 30,651
Trainable params: 30,651
Non-trainable params: 0
_______________________

In [58]:
weights = model.get_weights()
print(weights)
print(len(weights))
print("x_test shape:", x_test.shape)
prediction = model.predict(x_test)
prediction_2d = prediction.reshape(-1, 1)

#print(prediction_2d)
# prediction 값을 원래 scale로 되돌리기
#prediction = sc.inverse_transform(prediction_2d)

[array([[-0.12986398, -0.11591208, -0.11752558, -0.09536427,  0.16261272,
         0.10964838, -0.05315127,  0.12641825,  0.00711598, -0.1054784 ,
        -0.03312633,  0.13793096,  0.08296887, -0.05709429,  0.10127904,
         0.01941431, -0.02123008,  0.08722524,  0.09135757,  0.00488109,
         0.14008476, -0.15328719,  0.05771961,  0.16052516, -0.15113658,
         0.12917906, -0.0044722 ,  0.05653809, -0.01227401, -0.16174139,
        -0.06997102, -0.03713786,  0.03440037, -0.01504893, -0.11224446,
         0.03220414,  0.14446633, -0.1180519 ,  0.01495821, -0.09875312,
        -0.13759188, -0.11950303, -0.08608766, -0.12362215,  0.00897099,
        -0.05241311,  0.17646152, -0.01046633, -0.15791278, -0.13252762,
        -0.08050524, -0.1458751 , -0.11599906, -0.11225615, -0.080666  ,
         0.03298657, -0.08656355, -0.12452597,  0.07912225,  0.00579498,
         0.0555276 , -0.14050171,  0.01473631, -0.01076074,  0.09559689,
         0.072106  , -0.09146464, -0.10413001, -0.

In [59]:
# data['DateTime'] = pd.to_datetime(data['DateTime'])
# last_datetime = data['DateTime'].iloc[-1]
# print(last_datetime)

predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
predict = prediction
print(predict)

#repeated_predict = np.tile(predict, (1440, 1))
#print(repeated_predict)

DatetimeIndex(['2024-05-11 12:10:00', '2024-05-11 12:20:00',
               '2024-05-11 12:30:00', '2024-05-11 12:40:00',
               '2024-05-11 12:50:00', '2024-05-11 13:00:00',
               '2024-05-11 13:10:00', '2024-05-11 13:20:00',
               '2024-05-11 13:30:00', '2024-05-11 13:40:00',
               ...
               '2024-05-13 10:30:00', '2024-05-13 10:40:00',
               '2024-05-13 10:50:00', '2024-05-13 11:00:00',
               '2024-05-13 11:10:00', '2024-05-13 11:20:00',
               '2024-05-13 11:30:00', '2024-05-13 11:40:00',
               '2024-05-13 11:50:00', '2024-05-13 12:00:00'],
              dtype='datetime64[ns]', length=288, freq='10T')
[[[0.51531243]
  [0.5972496 ]
  [0.6637672 ]
  [0.7157602 ]
  [0.7545136 ]
  [0.78252965]
  [0.8021493 ]
  [0.81489265]
  [0.8228141 ]
  [0.82640165]
  [0.82694864]
  [0.82599384]
  [0.82398164]
  [0.82131124]
  [0.8182978 ]
  [0.8151786 ]
  [0.8126606 ]
  [0.8105349 ]
  [0.80816144]
  [0.80580187]
  [0.804

In [60]:
import plotly.graph_objects as go

# 그래프 객체 생성
fig = go.Figure()

# 기존 전력 사용량 데이터 추가
fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'],
                         mode='lines', name='Actual Power Usage'))

# 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
fig.add_trace(go.Scatter(x=predicted_dates, y=predict.flatten(),
                         mode='lines', name='Predicted Power Usage',
                         line=dict(color='red')))  # 예측 데이터에는 다른 색상을 사용

# 레이아웃 업데이트
fig.update_layout(
    title='Building south of No. 2 Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True
)

# 그래프 표시
fig.show()
