In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [37]:
import plotly.graph_objects as go
data = pd.read_csv("GeneratedCSV/7building.csv")
fig = go.Figure()

fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'], mode='lines', name='Power Usage'))

fig.update_layout(
    title='The No.7th Building Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True 
)

fig.show()

In [4]:
data

Unnamed: 0,Building,DataValue,DateTime,DayOfWeek,Time,PowerUsage
0,7호관(학생회관),20439289,2024-04-23 13:20:00,1,13:20:00,0.000000
1,7호관(학생회관),20439325,2024-04-23 13:30:00,1,13:30:00,0.878049
2,7호관(학생회관),20439358,2024-04-23 13:40:00,1,13:40:00,0.804878
3,7호관(학생회관),20439393,2024-04-23 13:50:00,1,13:50:00,0.853659
4,7호관(학생회관),20439427,2024-04-23 14:00:00,1,14:00:00,0.829268
...,...,...,...,...,...,...
2866,7호관(학생회관),20498817,2024-05-13 11:00:00,0,11:00:00,0.829268
2867,7호관(학생회관),20498852,2024-05-13 11:10:00,0,11:10:00,0.853659
2868,7호관(학생회관),20498886,2024-05-13 11:20:00,0,11:20:00,0.829268
2869,7호관(학생회관),20498921,2024-05-13 11:30:00,0,11:30:00,0.853659


In [5]:
from sklearn.preprocessing import MinMaxScaler
# time_steps:넣을 데이터 (ex) 1008는 7일 
# for_periods:예측할 요일 (ex) 144는 1일

'''
x_train, y_train: 데이터
x_train: 모델이 입력으로 사용할 데이터
y_train: 모델이 예측해야하는 실제 값. x_train를 기반으로 y_train에 해당하는 미래값을 예측
x_test: 
'''
time_steps = 288
for_periods = 288

data['DateTime'] = pd.to_datetime(data['DateTime'])
print(len(data))
# training & test set 만들기
start_date = data.iloc[1]['DateTime']
end_date = data.iloc[len(data) - time_steps + 1]['DateTime']

print(end_date)

mask2_date = data.iloc[len(data) - (time_steps + 1 + 1008)]['DateTime']
print(mask2_date)

mask1 = (data['DateTime'] >= start_date) & (data['DateTime'] <= end_date)  
mask2 = (data['DateTime'] >= mask2_date) & (data['DateTime'] <= end_date)

ts_train = data.loc[mask1]
ts_test = data.loc[mask2]

sc = MinMaxScaler(feature_range=(0,1))

ts_train_scaled = data.loc[mask1]['PowerUsage'].values.reshape(-1, 1)
ts_test_scaled = data.loc[mask2]['PowerUsage'].values.reshape(-1, 1)
x_train = []
y_train = []

print(ts_test_scaled)
print(len(ts_test_scaled))

for i in range(time_steps, len(ts_train_scaled) - for_periods):
    x_train.append(ts_train_scaled[i-time_steps:i,0])
    y_train.append(ts_train_scaled[i:i+for_periods,0])

       
x_train, y_train = np.array(x_train), np.array(y_train)

# 3차원으로 재구성
'''
RNN, LSTM 모델에서는 Input 데이터가 [샘플 수, 시간 단계 수, 특성 수] 형태의 3차원 배열을 기대한다.
'''
# np.reshape(samples, time stemps, features)로 만듬
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))

# 두 배열을 연결
inputs = np.concatenate((ts_train_scaled[-time_steps:], ts_test_scaled[:for_periods]))
x_test = []

for i in range(time_steps, len(inputs) - for_periods + 1):
    x_test.append(inputs[i-time_steps:i])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))


2871
2024-05-11 12:00:00
2024-05-04 11:40:00
[[0.41463415]
 [0.43902439]
 [0.43902439]
 ...
 [0.3902439 ]
 [0.3902439 ]
 [0.43902439]]
1011


In [6]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, GRU, LSTM
from keras.optimizers import SGD
from keras.layers import TimeDistributed, Flatten


def LSTM_model(x_train, y_train):
    model = Sequential()

    '''
    unit: LSTM layer의 뉴런 수를 의미한다. 이는 레이어의 출력 차원을 정의하며, 모델의 용량(capacity)를 결정하는 parameter
    return_sequences=True: LSTM layer가 모든 시간 단계에서 출력 시퀀스를 반환할지, 아니면 마지막 시간 단계만 반환할지 결정한다.
    input_shape: 모델의 입력 차원
    activation: LSTM sell의 activation function. 주로 tahn or softmax
    '''
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1), activation='tanh'))

    model.add(LSTM(units=50, return_sequences=True, activation='tanh'))

    model.add(TimeDistributed(Dense(units=1)))  # 각 시간 단계마다 독립적인 예측

    #model.add(Dense(units=1)) # 출력 layer 하나의 예측값 60

    # Compile
    model.compile(optimizer = SGD(learning_rate = 0.01, decay = 1e-7,
                                 momentum=0.9, nesterov=False), loss = 'mean_squared_error')
    model.fit(x_train, y_train, epochs = 20, batch_size=32, verbose = 1)

    return model


2024-05-13 19:26:33.841440: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-13 19:26:34.134552: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [7]:
print("x_train shape:", x_train.shape)  # (샘플 수, time_steps, 특성 수)
print("y_train shape:", y_train.shape)  # (샘플 수, for_periods, 특성 수)


model = LSTM_model(x_train, y_train)
model.summary()

x_train shape: (2008, 288, 1)
y_train shape: (2008, 288, 1)


2024-05-13 19:26:40.201611: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-13 19:26:40.259296: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-13 19:26:40.259357: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-13 19:26:40.260786: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

Epoch 1/20


2024-05-13 19:26:48.238518: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8700
2024-05-13 19:26:50.275219: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 288, 50)           10400     
                                                                 
 lstm_1 (LSTM)               (None, 288, 50)           20200     
                                                                 
 time_distributed (TimeDistr  (None, 288, 1)           51        
 ibuted)                                                         
                                                                 
Total params: 30,651
Trainable params: 30,651
Non-trainable params: 0
_________________________________________________________________


In [38]:
weights = model.get_weights()
print(weights)
print(len(weights))
print("x_test shape:", x_test.shape)
prediction = model.predict(x_test)
prediction_2d = prediction.reshape(-1, 1)

#print(prediction_2d)
# prediction 값을 원래 scale로 되돌리기
#prediction = sc.inverse_transform(prediction_2d)

[array([[ 0.03747724,  0.07503697,  0.13848801,  0.16013402, -0.03777966,
         0.11737185,  0.13590263,  0.09044472,  0.17396396,  0.11884862,
         0.10240501,  0.09205077, -0.10111532, -0.03087678,  0.10439843,
         0.04987404,  0.042782  , -0.09314657,  0.13267358,  0.12292118,
        -0.06110562,  0.0772635 ,  0.09386431,  0.08686042,  0.07278594,
         0.04880419,  0.12478005, -0.08422376, -0.11393694,  0.00444561,
         0.1396447 , -0.0097809 , -0.0411467 , -0.03514161,  0.12122352,
        -0.16772734, -0.04275801, -0.09547864, -0.01063798,  0.12604776,
        -0.00024767,  0.06858584,  0.12294105,  0.14265233,  0.03771618,
         0.1281582 ,  0.03816607,  0.00281942,  0.05258762,  0.08117504,
         0.16845848, -0.1339624 , -0.0685796 , -0.04717528, -0.09099311,
         0.06162306,  0.02077119, -0.08394047,  0.13512035,  0.10301469,
        -0.16003555,  0.10753001, -0.07147847,  0.1544901 ,  0.05308432,
         0.14006612, -0.12015325, -0.03931939, -0.

In [39]:
# data['DateTime'] = pd.to_datetime(data['DateTime'])
# last_datetime = data['DateTime'].iloc[-1]
# print(last_datetime)

predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
predict = prediction
print(predict)

#repeated_predict = np.tile(predict, (1440, 1))
#print(repeated_predict)

DatetimeIndex(['2024-05-11 12:00:00', '2024-05-11 12:10:00',
               '2024-05-11 12:20:00', '2024-05-11 12:30:00',
               '2024-05-11 12:40:00', '2024-05-11 12:50:00',
               '2024-05-11 13:00:00', '2024-05-11 13:10:00',
               '2024-05-11 13:20:00', '2024-05-11 13:30:00',
               ...
               '2024-05-13 10:20:00', '2024-05-13 10:30:00',
               '2024-05-13 10:40:00', '2024-05-13 10:50:00',
               '2024-05-13 11:00:00', '2024-05-13 11:10:00',
               '2024-05-13 11:20:00', '2024-05-13 11:30:00',
               '2024-05-13 11:40:00', '2024-05-13 11:50:00'],
              dtype='datetime64[ns]', length=288, freq='10T')
[[[0.38939252]
  [0.45740366]
  [0.5135108 ]
  [0.557993  ]
  [0.5926502 ]
  [0.61853886]
  [0.6377268 ]
  [0.6511661 ]
  [0.6591877 ]
  [0.66174245]
  [0.6614826 ]
  [0.6592729 ]
  [0.6551175 ]
  [0.6499851 ]
  [0.64520645]
  [0.6407491 ]
  [0.6359191 ]
  [0.6333866 ]
  [0.63130736]
  [0.62961376]
  [0.627

In [46]:
import plotly.graph_objects as go

# 그래프 객체 생성
fig = go.Figure()

# 기존 전력 사용량 데이터 추가
fig.add_trace(go.Scatter(x=data['DateTime'], y=data['PowerUsage'],
                         mode='lines', name='Actual Power Usage'))

# 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
fig.add_trace(go.Scatter(x=predicted_dates, y=predict.flatten(),
                         mode='lines', name='Predicted Power Usage',
                         line=dict(color='tomato')))  # 예측 데이터에는 다른 색상을 사용

# 레이아웃 업데이트
fig.update_layout(
    title='Building south of No. 5 Power Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Power Usage (W)',
    xaxis_rangeslider_visible=True
)

# 그래프 표시
fig.show()
