In [1]:
import psutil
import matplotlib.pyplot as plt
import pandas as pd
import time
import csv
import random
import numpy as np
from datetime import datetime, timedelta
import plotly.graph_objects as go


In [12]:
# 기존 CSV 파일 경로
input_file = "user3_battery_data.csv"
output_file = "user3_processed_data.csv"

# 데이터를 읽어서 전처리 수행
processed_data = []
with open(input_file, mode="r", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    prev_battery_percentage = None

    for row in reader:
        # 시간 파싱
        current_time = datetime.strptime(row["time"], "%Y-%m-%d %H:%M:%S")
        current_battery_percentage = float(row["Current battery percentage"])
        
        # 배터리 사용량 계산
        battery_usage = None
        if prev_battery_percentage is not None:
            battery_usage = round(prev_battery_percentage - current_battery_percentage, 2)
        
        # 요일 추가 및 배터리 사용량 기록
        processed_data.append({
            "time": row["time"],
            "weekday": current_time.weekday(),
            "CPU utilization percentage": row["CPU utilization percentage"],
            "GPU Utilization": row["GPU Utilization"],
            "Current battery percentage": row["Current battery percentage"],
            "Battery Status": row["Battery Status"],
            "Battery Usage": battery_usage
        })
        
        prev_battery_percentage = current_battery_percentage

# 전처리된 데이터를 새로운 CSV로 저장
with open(output_file, mode="w", newline="", encoding="utf-8") as file:
    fieldnames = [
        "time", "weekday", "CPU utilization percentage", "GPU Utilization",
        "Current battery percentage", "Battery Status", "Battery Usage"
    ]
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(processed_data)

print(f"Processed data has been saved to {output_file}.")

Processed data has been saved to user3_processed_data.csv.


In [14]:
# Read CSV for visualization
df = pd.read_csv("user3_processed_data.csv")

# Plot the data for visualization using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=df["time"], y=df["Battery Usage"],
                         mode='lines', name='Battery Usage',
                         line=dict(color='blue')))

fig.update_layout(
    title='Battery Usage Over Time',
    xaxis_title='Time',
    yaxis_title='Battery Usage',
    xaxis_rangeslider_visible=True
)

fig.show()

In [15]:
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input, GRU, Dense, LSTM, TimeDistributed
from keras.optimizers.legacy import SGD
from keras.optimizers.schedules import ExponentialDecay

def LSTM_model(x_train, y_train):
    model = Sequential()

    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1), activation='tanh'))
    model.add(LSTM(units=50, return_sequences=True, activation='tanh'))
    model.add(TimeDistributed(Dense(units=1)))  # 각 시간 단계마다 독립적인 예측

    # Compile
    model.compile(optimizer=SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=False),
                  loss='mean_squared_error')
    model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=1)


    return model

def GRU_model(x_train, y_train):

    # 모델 정의
    model = Sequential()

    # GRU 레이어 추가
    model.add(GRU(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1), activation='tanh'))
    model.add(GRU(units=50, return_sequences=True, activation='tanh'))

    # 각 타임스텝별 Dense 레이어
    model.add(TimeDistributed(Dense(units=1)))

    # 모델 컴파일
    model.compile(optimizer='adam', loss='mean_squared_error')

    # 모델 학습
    model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=1)

    return model


2024-12-16 18:47:45.582785: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-16 18:47:45.602163: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-16 18:47:45.602183: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-16 18:47:45.602722: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-16 18:47:45.606023: I tensorflow/core/platform/cpu_feature_guar

In [16]:
from sklearn.preprocessing import MinMaxScaler

# CSV 데이터 읽기
data = pd.read_csv("user3_processed_data.csv")

# 'time' 컬럼을 datetime 형식으로 변환
data['time'] = pd.to_datetime(data['time'])

# 필요한 변수 초기화
time_steps = 288  # 입력 데이터 시퀀스 길이 (예: 하루)
for_periods = 288  # 예측할 시퀀스 길이 (예: 하루)

# Train/Test 분리
start_date = data['time'].iloc[1]
end_date = data['time'].iloc[len(data) - time_steps]

mask2_date = data['time'].iloc[len(data) - (time_steps*2)]
print(f"mask2_date: {mask2_date}")
print(f"start_date: {start_date}")
print(f"end_data: {end_date}")
mask1 = (data['time'] >= start_date) & (data['time'] <= end_date)
mask2 = (data['time'] >= mask2_date) & (data['time'] <= end_date)

ts_train = data.loc[mask1]
ts_test = data.loc[mask2]

# MinMaxScaler로 데이터 정규화
sc = MinMaxScaler(feature_range=(0, 1))

ts_train_scaled = sc.fit_transform(ts_train['Current battery percentage'].values.reshape(-1, 1))
ts_test_scaled = sc.transform(ts_test['Current battery percentage'].values.reshape(-1, 1))

# x_train, y_train 만들기
x_train = []
y_train = []

for i in range(time_steps, len(ts_train_scaled) - for_periods):
    x_train.append(ts_train_scaled[i-time_steps:i, 0])
    y_train.append(ts_train_scaled[i:i+for_periods, 0])

x_train, y_train = np.array(x_train), np.array(y_train)

# RNN/LSTM 입력 형태로 reshape
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1))

# x_test 만들기
inputs = np.concatenate((ts_train_scaled[-time_steps:], ts_test_scaled[:for_periods]))
x_test = []

for i in range(time_steps, len(inputs) - for_periods + 1):
    x_test.append(inputs[i-time_steps:i])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}")


mask2_date: 2024-12-13 00:00:00
start_date: 2024-11-16 00:10:00
end_data: 2024-12-15 00:00:00
x_train shape: (3600, 288, 1)
y_train shape: (3600, 288, 1)
x_test shape: (1, 288, 1)


In [17]:
print("x_train shape:", x_train.shape)  # (샘플 수, time_steps, 특성 수)
print("y_train shape:", y_train.shape)  # (샘플 수, for_periods, 특성 수)


#model = LSTM_model(x_train, y_train)
model = GRU_model(x_train, y_train)
model.summary()

model.save("user3_model.h5")


x_train shape: (3600, 288, 1)
y_train shape: (3600, 288, 1)


2024-12-16 18:47:58.860056: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-12-16 18:47:58.860171: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-12-16 18:47:58.879472: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Epoch 1/20


2024-12-16 18:48:00.383901: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
2024-12-16 18:48:00.570941: I external/local_xla/xla/service/service.cc:168] XLA service 0x7c4f901b0ae0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-12-16 18:48:00.570964: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Ti SUPER, Compute Capability 8.9
2024-12-16 18:48:00.570967: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (1): NVIDIA GeForce RTX 4070 Ti SUPER, Compute Capability 8.9
2024-12-16 18:48:00.573933: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


  1/113 [..............................] - ETA: 2:42 - loss: 0.5639

I0000 00:00:1734342480.624268 2573855 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 288, 50)           7950      
                                                                 
 gru_1 (GRU)                 (None, 288, 50)           15300     
                                                                 
 time_distributed (TimeDist  (None, 288, 1)            51        
 ributed)                                                        
                                                                 
Total params: 23301 (91.02 KB)
Trainable params: 23301 (91.02 KB)
Non-trainable params: 0 (0.00 Byte)
________________________________________________________________


You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.



In [18]:
weights = model.get_weights()
print(weights)
print(len(weights))
print("x_test shape:", x_test.shape)
lstm_prediction = model.predict(x_test)
prediction_2d = lstm_prediction.reshape(-1, 1)

[array([[-0.27970535,  0.08919875, -0.00637354, -0.2777693 ,  0.02574401,
        -0.2010888 ,  0.14460444, -0.12603949, -0.17410918,  0.40725544,
         0.20876412, -0.2076625 , -0.01328799, -0.72060126,  0.18281071,
        -0.06056387,  0.02282603,  0.1247351 ,  0.25090247, -0.00416079,
        -0.2375379 ,  0.11992358,  0.16596286, -0.13423333, -0.29242542,
        -0.4967843 , -0.01159295,  0.06314733,  0.14637542, -0.06948071,
        -0.14781891, -0.16098051, -0.24599311,  0.15842775, -0.0689439 ,
        -0.01570408, -0.27580518,  0.48110533,  0.03186019, -0.25278255,
        -0.0397405 , -0.03877957, -0.19891252,  0.03376947, -0.4971785 ,
         0.1116739 ,  0.01891263,  0.48462614,  0.10159748,  0.06533156,
         0.26129544,  0.02504449, -0.06670053, -0.0138139 , -0.18756013,
         0.08330472, -0.16960302, -0.16489469,  0.00811455,  0.06369586,
        -0.11894035, -0.1664922 , -0.06745625,  0.14501314, -0.22567633,
         0.03354136, -0.15375824,  0.03474303, -0.

In [19]:
weights = model.get_weights()
print(weights)
print(len(weights))
print("x_test shape:", x_test.shape)
gru_prediction = model.predict(x_test)
prediction_2d = gru_prediction.reshape(-1, 1)

[array([[-0.27970535,  0.08919875, -0.00637354, -0.2777693 ,  0.02574401,
        -0.2010888 ,  0.14460444, -0.12603949, -0.17410918,  0.40725544,
         0.20876412, -0.2076625 , -0.01328799, -0.72060126,  0.18281071,
        -0.06056387,  0.02282603,  0.1247351 ,  0.25090247, -0.00416079,
        -0.2375379 ,  0.11992358,  0.16596286, -0.13423333, -0.29242542,
        -0.4967843 , -0.01159295,  0.06314733,  0.14637542, -0.06948071,
        -0.14781891, -0.16098051, -0.24599311,  0.15842775, -0.0689439 ,
        -0.01570408, -0.27580518,  0.48110533,  0.03186019, -0.25278255,
        -0.0397405 , -0.03877957, -0.19891252,  0.03376947, -0.4971785 ,
         0.1116739 ,  0.01891263,  0.48462614,  0.10159748,  0.06533156,
         0.26129544,  0.02504449, -0.06670053, -0.0138139 , -0.18756013,
         0.08330472, -0.16960302, -0.16489469,  0.00811455,  0.06369586,
        -0.11894035, -0.1664922 , -0.06745625,  0.14501314, -0.22567633,
         0.03354136, -0.15375824,  0.03474303, -0.

In [18]:
predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
lstm_predict = lstm_prediction
lstm_predict = lstm_predict * 100
print(lstm_predict)


DatetimeIndex(['2024-12-15 00:10:00', '2024-12-15 00:20:00',
               '2024-12-15 00:30:00', '2024-12-15 00:40:00',
               '2024-12-15 00:50:00', '2024-12-15 01:00:00',
               '2024-12-15 01:10:00', '2024-12-15 01:20:00',
               '2024-12-15 01:30:00', '2024-12-15 01:40:00',
               ...
               '2024-12-16 22:30:00', '2024-12-16 22:40:00',
               '2024-12-16 22:50:00', '2024-12-16 23:00:00',
               '2024-12-16 23:10:00', '2024-12-16 23:20:00',
               '2024-12-16 23:30:00', '2024-12-16 23:40:00',
               '2024-12-16 23:50:00', '2024-12-17 00:00:00'],
              dtype='datetime64[ns]', length=288, freq='10min')
[[[58.773518]
  [64.839745]
  [69.59312 ]
  [72.89095 ]
  [75.12395 ]
  [76.54173 ]
  [77.548836]
  [78.15229 ]
  [78.548256]
  [78.77753 ]
  [78.84775 ]
  [78.82779 ]
  [78.46591 ]
  [77.95749 ]
  [77.424904]
  [76.85251 ]
  [76.44542 ]
  [75.96283 ]
  [75.641174]
  [75.282394]
  [74.97121 ]
  [74.76627 


'T' is deprecated and will be removed in a future version, please use 'min' instead.



In [20]:
predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq='10T')
print(predicted_dates)
gru_predict = gru_prediction
gru_predict = gru_predict * 100
print(gru_predict)


DatetimeIndex(['2024-12-15 00:00:00', '2024-12-15 00:10:00',
               '2024-12-15 00:20:00', '2024-12-15 00:30:00',
               '2024-12-15 00:40:00', '2024-12-15 00:50:00',
               '2024-12-15 01:00:00', '2024-12-15 01:10:00',
               '2024-12-15 01:20:00', '2024-12-15 01:30:00',
               ...
               '2024-12-16 22:20:00', '2024-12-16 22:30:00',
               '2024-12-16 22:40:00', '2024-12-16 22:50:00',
               '2024-12-16 23:00:00', '2024-12-16 23:10:00',
               '2024-12-16 23:20:00', '2024-12-16 23:30:00',
               '2024-12-16 23:40:00', '2024-12-16 23:50:00'],
              dtype='datetime64[ns]', length=288, freq='10min')
[[[63.793217]
  [67.08024 ]
  [66.298294]
  [65.23226 ]
  [66.383286]
  [68.534615]
  [68.964355]
  [70.99896 ]
  [71.76047 ]
  [71.88418 ]
  [72.921265]
  [73.28814 ]
  [73.416985]
  [72.909966]
  [72.31703 ]
  [70.64196 ]
  [69.54031 ]
  [68.49955 ]
  [66.622925]
  [65.57193 ]
  [63.680614]
  [63.306572


'T' is deprecated and will be removed in a future version, please use 'min' instead.



In [21]:

# 그래프 객체 생성
fig = go.Figure()

# 기존 전력 사용량 데이터 추가
fig.add_trace(go.Scatter(x=data['time'], y=data['Current battery percentage'],
                         mode='lines', name='Actual battery percentage'))

# 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
fig.add_trace(go.Scatter(x=predicted_dates, y=gru_predict.flatten(),
                         mode='lines', name='Predicted battery percentage',
                         line=dict(color='tomato')))  # 예측 데이터에는 다른 색상을 사용

# 레이아웃 업데이트
fig.update_layout(
    title='GRU battery percentage',
    xaxis_title='Time',
    yaxis_title='Current battery percentage',
    xaxis_rangeslider_visible=True
)

# 그래프 표시
fig.show()

In [19]:

# 그래프 객체 생성
fig = go.Figure()

# 기존 전력 사용량 데이터 추가
fig.add_trace(go.Scatter(x=data['time'], y=data['Current battery percentage'],
                         mode='lines', name='Actual battery percentage'))

# 예측 전력 사용량 데이터 추가 (여기서 수정된 부분)
fig.add_trace(go.Scatter(x=predicted_dates, y=lstm_predict.flatten(),
                         mode='lines', name='Predicted battery percentage',
                         line=dict(color='tomato')))  # 예측 데이터에는 다른 색상을 사용

# 레이아웃 업데이트
fig.update_layout(
    title='LSTM battery percentage',
    xaxis_title='Time',
    yaxis_title='Current battery percentage',
    xaxis_rangeslider_visible=True
)

# 그래프 표시
fig.show()

In [22]:
print(f"Length of predicted_dates: {len(predicted_dates)}")
print(f"Length of gru_predict: {len(gru_predict)}")


Length of predicted_dates: 288
Length of gru_predict: 1


In [23]:
import pandas as pd

# 기존 CSV 파일 경로
input_file = "user3_processed_data.csv"
output_file = "user3_predictions.csv"

# 기존 데이터를 읽어오기
data = pd.read_csv(input_file)

# 예측 데이터와 날짜 생성
predicted_dates = pd.date_range(start=end_date, periods=for_periods, freq="10T")
gru_predict = gru_prediction.squeeze() * 100  # 1차원으로 변환하고 값 조정

# 기존 데이터에 예측값을 추가할 DataFrame 생성
predicted_df = pd.DataFrame({
    "time": predicted_dates,
    "prediction": gru_predict
})

# 기존 데이터의 'time' 컬럼을 datetime 형식으로 변환
data["time"] = pd.to_datetime(data["time"])

# 예측값과 기존 데이터를 병합
final_data = pd.merge(data, predicted_df, on="time", how="outer")

# 최종 데이터를 새로운 CSV 파일로 저장
final_data.to_csv(output_file, index=False, encoding="utf-8")

print(f"Final data with predictions has been saved to {output_file}.")


Final data with predictions has been saved to user3_predictions.csv.



'T' is deprecated and will be removed in a future version, please use 'min' instead.

