In [64]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [66]:
import plotly.graph_objects as go
data = pd.read_csv("GeneratedCSV/2_north.csv")
fig = go.Figure()

fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'], mode='lines', name='Power Usage'))
fig.update_layout(
    title='Building north of No. 2 Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True 
)

fig.show()

In [67]:
data

Unnamed: 0,Building,DataValue,DateTime,DayOfWeek,Time,PowerUsage
0,2호북관,24335334,2024-04-23 13:20:00,1,13:20:00,0.000000
1,2호북관,24335385,2024-04-23 13:30:00,1,13:30:00,0.796875
2,2호북관,24335436,2024-04-23 13:40:00,1,13:40:00,0.796875
3,2호북관,24335488,2024-04-23 13:50:00,1,13:50:00,0.812500
4,2호북관,24335541,2024-04-23 14:00:00,1,14:00:00,0.828125
...,...,...,...,...,...,...
2899,2호북관,24471101,2024-05-13 16:30:00,0,16:30:00,0.953125
2900,2호북관,24471158,2024-05-13 16:40:00,0,16:40:00,0.890625
2901,2호북관,24471218,2024-05-13 16:50:00,0,16:50:00,0.937500
2902,2호북관,24471276,2024-05-13 17:00:00,0,17:00:00,0.906250


In [74]:
from sklearn.preprocessing import MinMaxScaler
# time_steps:넣을 데이터 (ex) 1008는 7일 
# for_periods:예측할 요일 (ex) 144는 1일

'''
x_train, y_train: 데이터
x_train: 모델이 입력으로 사용할 데이터
y_train: 모델이 예측해야하는 실제 값. x_train를 기반으로 y_train에 해당하는 미래값을 예측
x_test: 
'''
time_steps = 288
for_periods = 288

data['DateTime'] = pd.to_datetime(data['DateTime'])
print(len(data))
# training & test set 만들기
start_date = data.iloc[1]['DateTime']
end_date = data.iloc[len(data) - time_steps + 1]['DateTime']
print(start_date)
print(end_date)

mask2_date = data.iloc[len(data) - (time_steps + 1 + 1008)]['DateTime']
print(mask2_date)

mask1 = (data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)  
mask2 = (data['DateTime'] >= mask2_date) & (data['DateTime'] <= end_date)

ts_train = data.loc[mask1]
ts_test = data.loc[mask2]

sc = MinMaxScaler(feature_range=(0,1))

ts_train_scaled = data.loc[mask1]['PowerUsage'].values.reshape(-1, 1)
ts_test_scaled = data.loc[mask2]['PowerUsage'].values.reshape(-1, 1)
x_train = []
y_train = []

print(ts_test_scaled)
print(len(ts_test_scaled))

for i in range(time_steps, len(ts_train_scaled) - for_periods):
    x_train.append(ts_train_scaled[i-time_steps:i,0])
    y_train.append(ts_train_scaled[i:i+for_periods,0])

       
x_train, y_train = np.array(x_train), np.array(y_train)

# 3차원으로 재구성
'''
RNN, LSTM 모델에서는 Input 데이터가 [샘플 수, 시간 단계 수, 특성 수] 형태의 3차원 배열을 기대한다.
'''
# np.reshape(samples, time stemps, features)로 만듬
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))

# 두 배열을 연결
inputs = np.concatenate((ts_train_scaled[-time_steps:], ts_test_scaled[:for_periods]))
x_test = []

for i in range(time_steps, len(inputs) - for_periods + 1):
    x_test.append(inputs[i-time_steps:i])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))


2904
2024-04-23 13:30:00
2024-05-11 17:30:00
2024-05-04 17:10:00
[[0.625   ]
 [0.65625 ]
 [0.609375]
 ...
 [0.71875 ]
 [0.75    ]
 [0.71875 ]]
1011


In [69]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, GRU, LSTM
from keras.optimizers import SGD
from keras.layers import TimeDistributed, Flatten


def LSTM_model(x_train, y_train):
    model = Sequential()

    '''
    unit: LSTM layer의 뉴런 수를 의미한다. 이는 레이어의 출력 차원을 정의하며, 모델의 용량(capacity)를 결정하는 parameter
    return_sequences=True: LSTM layer가 모든 시간 단계에서 출력 시퀀스를 반환할지, 아니면 마지막 시간 단계만 반환할지 결정한다.
    input_shape: 모델의 입력 차원
    activation: LSTM sell의 activation function. 주로 tahn or softmax
    '''
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1), activation='tanh'))

    model.add(LSTM(units=50, return_sequences=True, activation='tanh'))

    model.add(TimeDistributed(Dense(units=1)))  # 각 시간 단계마다 독립적인 예측

    #model.add(Dense(units=1)) # 출력 layer 하나의 예측값 60

    # Compile
    model.compile(optimizer = SGD(learning_rate = 0.01, decay = 1e-7,
                                 momentum=0.9, nesterov=False), loss = 'mean_squared_error')
    model.fit(x_train, y_train, epochs = 20, batch_size=32, verbose = 1)

    return model


In [70]:
print("x_train shape:", x_train.shape)  # (샘플 수, time_steps, 특성 수)
print("y_train shape:", y_train.shape)  # (샘플 수, for_periods, 특성 수)


model = LSTM_model(x_train, y_train)
model.summary()

x_train shape: (2041, 288, 1)
y_train shape: (2041, 288, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 288, 50)           10400     
                                                                 
 lstm_13 (LSTM)              (None, 288, 50)           20200     
                                                                 
 time_distributed_6 (TimeDis  (None, 288, 1)           51        
 tributed)                                                       
                                                                 
Total params: 30,651
Trainable params: 30,651
Non-trainable params: 0
_______________________

In [71]:
weights = model.get_weights()
print(weights)
print(len(weights))

prediction = model.predict(x_test)
prediction_2d = prediction.reshape(-1, 1)

#print(prediction_2d)
# prediction 값을 원래 scale로 되돌리기
#prediction = sc.inverse_transform(prediction_2d)

[array([[ 1.03153512e-01, -3.88792939e-02,  8.86254162e-02,
         5.49483448e-02, -5.83625510e-02, -1.21370658e-01,
         1.09678991e-02, -2.00393535e-02,  1.60134882e-01,
        -5.13610281e-02, -2.86807809e-02, -1.07721247e-01,
         1.41398221e-01,  1.17348127e-01, -5.97809851e-02,
         8.21333975e-02,  1.38261184e-01,  3.82339135e-02,
         8.61863941e-02,  8.93841833e-02, -5.54152839e-02,
        -8.48657265e-02,  1.62263617e-01,  1.65406525e-01,
         1.67079613e-01,  1.49005741e-01, -1.49061931e-02,
        -1.43926561e-01,  1.98358763e-02, -1.59816165e-02,
        -1.18105389e-01,  1.35044336e-01, -1.35127455e-01,
        -1.49988890e-01,  5.77848293e-02,  1.20984018e-01,
        -7.70197511e-02, -2.66885180e-02,  1.06570572e-01,
        -1.10177293e-01, -1.06469259e-01,  7.16746300e-02,
         8.62410888e-02, -1.46226987e-01, -4.26228344e-02,
        -1.75591633e-02, -7.66009763e-02,  6.79248273e-02,
         1.19118243e-01,  1.27037540e-01, -1.58660278e-

In [72]:
# data['DateTime'] = pd.to_datetime(data['DateTime'])
# last_datetime = data['DateTime'].iloc[-1]
# print(last_datetime)
predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
predict = prediction
print(predict)

#repeated_predict = np.tile(predict, (1440, 1))
#print(repeated_predict)

DatetimeIndex(['2024-05-11 17:30:00', '2024-05-11 17:40:00',
               '2024-05-11 17:50:00', '2024-05-11 18:00:00',
               '2024-05-11 18:10:00', '2024-05-11 18:20:00',
               '2024-05-11 18:30:00', '2024-05-11 18:40:00',
               '2024-05-11 18:50:00', '2024-05-11 19:00:00',
               ...
               '2024-05-13 15:50:00', '2024-05-13 16:00:00',
               '2024-05-13 16:10:00', '2024-05-13 16:20:00',
               '2024-05-13 16:30:00', '2024-05-13 16:40:00',
               '2024-05-13 16:50:00', '2024-05-13 17:00:00',
               '2024-05-13 17:10:00', '2024-05-13 17:20:00'],
              dtype='datetime64[ns]', length=288, freq='10T')
[[[0.54297245]
  [0.6200658 ]
  [0.67716813]
  [0.7174059 ]
  [0.74427325]
  [0.76141375]
  [0.77136195]
  [0.7747479 ]
  [0.77530533]
  [0.77401376]
  [0.7723592 ]
  [0.7692944 ]
  [0.76640046]
  [0.76366997]
  [0.76149595]
  [0.7592969 ]
  [0.7568449 ]
  [0.7551682 ]
  [0.7535702 ]
  [0.7517551 ]
  [0.749

In [73]:
import plotly.graph_objects as go

# 그래프 객체 생성
fig = go.Figure()

# 기존 전력 사용량 데이터 추가
fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'],
                         mode='lines', name='Actual Power Usage'))

# 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
fig.add_trace(go.Scatter(x=predicted_dates, y=predict.flatten(),
                         mode='lines', name='Predicted Power Usage',
                         line=dict(color='red')))  # 예측 데이터에는 다른 색상을 사용

# 레이아웃 업데이트
fig.update_layout(
    title='Building north of No. 2 Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True
)

# 그래프 표시
fig.show()
