In [93]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [94]:
import plotly.graph_objects as go
data = pd.read_csv("GeneratedCSV/Hightech_data.csv")
fig = go.Figure()

fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'], mode='lines', name='Power Usage'))

fig.update_layout(
    title='HighTech Center Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True 
)

fig.show()

In [95]:
data

Unnamed: 0,Building,DataValue,DateTime,DayOfWeek,Time,PowerUsage
0,하이테크센터,27871893,2024-04-23 13:20:00,1,13:20:00,0.000000
1,하이테크센터,27871938,2024-04-23 13:30:00,1,13:30:00,0.692308
2,하이테크센터,27871983,2024-04-23 13:40:00,1,13:40:00,0.692308
3,하이테크센터,27872029,2024-04-23 13:50:00,1,13:50:00,0.707692
4,하이테크센터,27872074,2024-04-23 14:00:00,1,14:00:00,0.692308
...,...,...,...,...,...,...
2757,하이테크센터,27975472,2024-05-12 16:50:00,6,16:50:00,0.784615
2758,하이테크센터,27975524,2024-05-12 17:00:00,6,17:00:00,0.800000
2759,하이테크센터,27975575,2024-05-12 17:10:00,6,17:10:00,0.784615
2760,하이테크센터,27975622,2024-05-12 17:20:00,6,17:20:00,0.723077


* 7일간의 전력 데이터 분석을 통해 미래 1일 동안의 전력 패턴을 예측하고 싶다.
* LSTM과 GRU는 3차원 배열의 입력값을 사용한다

1. Samples: 데이터의 크기 (data size), 원본 데이터를 window size에 따라 slicing 할 경우 생기는 데이터의 갯수
2. Time steps: 과거 몇개의 데이터를 볼 것인가를 나타낸다
3. Features: X의 차원, 쉽게 말해 X의 변수 갯수.(과거 X일 동안 미래 Y일 동안의 패턴 예측)


In [96]:
from sklearn.preprocessing import MinMaxScaler
# time_steps:넣을 데이터 (ex) 1008는 7일 
# for_periods:예측할 요일 (ex) 144는 1일

'''
x_train, y_train: 데이터
x_train: 모델이 입력으로 사용할 데이터
y_train: 모델이 예측해야하는 실제 값. x_train를 기반으로 y_train에 해당하는 미래값을 예측
x_test: 
'''
time_steps = 288
for_periods = 288

data['DateTime'] = pd.to_datetime(data['DateTime'])
print(len(data))
# training & test set 만들기
start_date = data.iloc[1]['DateTime']
end_date = data.iloc[len(data) - time_steps + 1]['DateTime']

print(end_date)

mask2_date = data.iloc[len(data) - (time_steps + 1 + 1008)]['DateTime']
print(mask2_date)

mask1 = (data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)  
mask2 = (data['DateTime'] >= mask2_date) & (data['DateTime'] <= end_date)

ts_train = data.loc[mask1]
ts_test = data.loc[mask2]

sc = MinMaxScaler(feature_range=(0,1))

ts_train_scaled = data.loc[mask1]['PowerUsage'].values.reshape(-1, 1)
ts_test_scaled = data.loc[mask2]['PowerUsage'].values.reshape(-1, 1)
x_train = []
y_train = []

print(ts_test_scaled)
print(len(ts_test_scaled))

for i in range(time_steps, len(ts_train_scaled) - for_periods):
    x_train.append(ts_train_scaled[i-time_steps:i,0])
    y_train.append(ts_train_scaled[i:i+for_periods,0])

       
x_train, y_train = np.array(x_train), np.array(y_train)

# 3차원으로 재구성
'''
RNN, LSTM 모델에서는 Input 데이터가 [샘플 수, 시간 단계 수, 특성 수] 형태의 3차원 배열을 기대한다.
'''
# np.reshape(samples, time stemps, features)로 만듬
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))

# 두 배열을 연결
inputs = np.concatenate((ts_train_scaled[-time_steps:], ts_test_scaled[:for_periods]))
x_test = []

for i in range(time_steps, len(inputs) - for_periods + 1):
    x_test.append(inputs[i-time_steps:i])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))


2762
2024-05-10 17:50:00
2024-05-03 17:30:00
[[0.69230769]
 [0.70769231]
 [0.69230769]
 ...
 [0.89230769]
 [0.87692308]
 [0.73846154]]
1011


In [97]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, GRU, LSTM
from keras.optimizers import SGD
from keras.layers import TimeDistributed, Flatten


def LSTM_model(x_train, y_train, x_test, sc):
    model = Sequential()

    '''
    unit: LSTM layer의 뉴런 수를 의미한다. 이는 레이어의 출력 차원을 정의하며, 모델의 용량(capacity)를 결정하는 parameter
    return_sequences=True: LSTM layer가 모든 시간 단계에서 출력 시퀀스를 반환할지, 아니면 마지막 시간 단계만 반환할지 결정한다.
    input_shape: 모델의 입력 차원
    activation: LSTM sell의 activation function. 주로 tahn or softmax
    '''
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1), activation='tanh'))

    model.add(LSTM(units=50, return_sequences=True, activation='tanh'))

    model.add(TimeDistributed(Dense(units=1)))  # 각 시간 단계마다 독립적인 예측

    #model.add(Dense(units=1)) # 출력 layer 하나의 예측값 60

    # Compile
    model.compile(optimizer = SGD(learning_rate = 0.01, decay = 1e-7,
                                 momentum=0.9, nesterov=False), loss = 'mean_squared_error')
    model.fit(x_train, y_train, epochs = 20, batch_size=32, verbose = 1)

    return model


In [98]:
print("x_train shape:", x_train.shape)  # (샘플 수, time_steps, 특성 수)
print("y_train shape:", y_train.shape)  # (샘플 수, for_periods, 특성 수)


model = LSTM_model(x_train, y_train, x_test, sc)
model.summary()

x_train shape: (1899, 288, 1)
y_train shape: (1899, 288, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 288, 50)           10400     
                                                                 
 lstm_7 (LSTM)               (None, 288, 50)           20200     
                                                                 
 time_distributed_3 (TimeDis  (None, 288, 1)           51        
 tributed)                                                       
                                                                 
Total params: 30,651
Trainable params: 30,651
Non-trainable params: 0
_______________________

In [99]:
weights = model.get_weights()
print(weights)
print(len(weights))

prediction = model.predict(x_test)
prediction_2d = prediction.reshape(-1, 1)

#print(prediction_2d)
# prediction 값을 원래 scale로 되돌리기
#prediction = sc.inverse_transform(prediction_2d)

[array([[ 0.06874845,  0.08381792, -0.02850658, -0.04504905, -0.11803374,
         0.11212078,  0.08655434, -0.06164212,  0.09695321, -0.1456441 ,
        -0.00918461, -0.12009452,  0.09701686, -0.04445362,  0.1231055 ,
         0.01418796, -0.15210338,  0.15155171,  0.09142014, -0.05838758,
         0.1392414 ,  0.02511172,  0.06883254, -0.1082454 , -0.15264359,
         0.11389635, -0.06684338, -0.08600629,  0.13982266,  0.09455523,
        -0.06975077,  0.16737041, -0.14972585, -0.03189581,  0.01099834,
         0.09065924, -0.13980852, -0.13524516,  0.0796246 , -0.1076582 ,
         0.05996698,  0.14338793, -0.02939758,  0.07212169,  0.09000085,
        -0.01987894, -0.09064892,  0.00959911,  0.00665551, -0.11062614,
         0.16146897,  0.04416268,  0.15476277, -0.03001093,  0.10615626,
         0.0305892 ,  0.13835678, -0.08820353,  0.0070667 , -0.11771746,
        -0.04454052, -0.04395683,  0.00562962,  0.13708708, -0.13959084,
         0.17213623, -0.02639751,  0.08276577,  0.

In [100]:
# data['DateTime'] = pd.to_datetime(data['DateTime'])
# last_datetime = data['DateTime'].iloc[-1]
# print(last_datetime)
predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
predict = prediction
print(predict)

#repeated_predict = np.tile(predict, (1440, 1))
#print(repeated_predict)

DatetimeIndex(['2024-05-10 17:50:00', '2024-05-10 18:00:00',
               '2024-05-10 18:10:00', '2024-05-10 18:20:00',
               '2024-05-10 18:30:00', '2024-05-10 18:40:00',
               '2024-05-10 18:50:00', '2024-05-10 19:00:00',
               '2024-05-10 19:10:00', '2024-05-10 19:20:00',
               ...
               '2024-05-12 16:10:00', '2024-05-12 16:20:00',
               '2024-05-12 16:30:00', '2024-05-12 16:40:00',
               '2024-05-12 16:50:00', '2024-05-12 17:00:00',
               '2024-05-12 17:10:00', '2024-05-12 17:20:00',
               '2024-05-12 17:30:00', '2024-05-12 17:40:00'],
              dtype='datetime64[ns]', length=288, freq='10T')
[[[0.4203516 ]
  [0.48619592]
  [0.53745365]
  [0.575387  ]
  [0.6023388 ]
  [0.6205002 ]
  [0.63124335]
  [0.63711464]
  [0.63880587]
  [0.6376606 ]
  [0.6346946 ]
  [0.6306628 ]
  [0.6261127 ]
  [0.6208596 ]
  [0.61545885]
  [0.61026084]
  [0.60547197]
  [0.6006298 ]
  [0.59606564]
  [0.5913911 ]
  [0.587

In [101]:
import plotly.graph_objects as go

# 그래프 객체 생성
fig = go.Figure()

# 기존 전력 사용량 데이터 추가
fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'],
                         mode='lines', name='Actual Power Usage'))

# 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
fig.add_trace(go.Scatter(x=predicted_dates, y=predict.flatten(),
                         mode='lines', name='Predicted Power Usage',
                         line=dict(color='red')))  # 예측 데이터에는 다른 색상을 사용

# 레이아웃 업데이트
fig.update_layout(
    title='HighTech Center Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True
)

# 그래프 표시
fig.show()
