In [3]:
import pandas as pd
import numpy as np

# 날짜 생성 (2020-01-01 ~ 2025-03-31, 하루 단위)
dates = pd.date_range(start="2020-01-01", end="2025-03-31", freq="D")

# 가상의 전력 사용량 데이터 (200~500 kWh 사이 난수)
usage = np.random.randint(200, 500, size=len(dates))

# 가상의 요금 데이터 (기본요금 + 사용량 기반 요금)
price = 5000 + usage * np.random.uniform(80, 120)  # kWh당 80~120원

# 데이터프레임 생성
df = pd.DataFrame({
    "date": dates,
    "usage": usage,
    "price": price.astype(int)  # 소수점 제거
})

# CSV 저장
file_path = "electricity_data.csv"
df.to_csv(file_path, index=False, encoding="utf-8-sig")

print(f"CSV 파일이 생성되었습니다: {file_path}")
print(df.head())


CSV 파일이 생성되었습니다: electricity_data.csv
        date  usage  price
0 2020-01-01    338  42298
1 2020-01-02    244  31925
2 2020-01-03    321  40422
3 2020-01-04    293  37332
4 2020-01-05    342  42739


In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# ----------------------------------------
# 1. CSV 데이터 불러오기
# ----------------------------------------
file_path = "electricity_data.csv"  # CSV 파일 경로
df = pd.read_csv(file_path, parse_dates=["date"])
df = pd.read_csv(file_path, encoding="utf-8")  # 또는 encoding="cp949"

# 결측치 제거
df = df.dropna()

# ----------------------------------------
# 2. 데이터 전처리
# ----------------------------------------
scaler = MinMaxScaler()
price_scaled = scaler.fit_transform(df["price"].values.reshape(-1, 1))

sequence_length = 10
X, y = [], []
for i in range(len(price_scaled) - sequence_length):
    X.append(price_scaled[i:i+sequence_length])
    y.append(price_scaled[i+sequence_length])
X, y = np.array(X), np.array(y)

# Train / Test split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# ----------------------------------------
# 3. LSTM 모델 정의 & 학습
# ----------------------------------------
model = Sequential([
    LSTM(50, activation='relu', input_shape=(sequence_length, 1)),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=30, batch_size=16, verbose=1)

# ----------------------------------------
# 4. 예측
# ----------------------------------------
y_pred_scaled = model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred_scaled)  # 스케일 되돌리기
y_test_real = scaler.inverse_transform(y_test)    # 실제 값도 되돌리기

# ----------------------------------------
# 5. 퍼센타일 구간 경계값 계산 (학습 데이터 기준)
# ----------------------------------------
boundaries = {
    "하위 25%": np.percentile(df["price"], 25),
    "하위 10%": np.percentile(df["price"], 10),
    "하위 5%" : np.percentile(df["price"], 5),
    "상위 25%": np.percentile(df["price"], 75),
    "상위 10%": np.percentile(df["price"], 90),
    "상위 5%" : np.percentile(df["price"], 95)
}

# 구간 분류 함수
def classify_price(price, bounds):
    if price >= bounds["상위 5%"]:
        return "상위 5%"
    elif price >= bounds["상위 10%"]:
        return "상위 10%"
    elif price >= bounds["상위 25%"]:
        return "상위 25%"
    elif price <= bounds["하위 5%"]:
        return "하위 5%"
    elif price <= bounds["하위 10%"]:
        return "하위 10%"
    elif price <= bounds["하위 25%"]:
        return "하위 25%"
    else:
        return "중간 구간"

# ----------------------------------------
# 6. 결과 출력
# ----------------------------------------
print("\n=== 예측 결과 샘플 ===")
for i in range(10):  # 예측 10개만 출력
    pred_price = y_pred[i][0]
    category = classify_price(pred_price, boundaries)
    print(f"예측 요금: {pred_price:,.0f}원 → {category}")


Epoch 1/30


  super().__init__(**kwargs)


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1931 
Epoch 2/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0879
Epoch 3/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0879
Epoch 4/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0864
Epoch 5/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0855
Epoch 6/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0861
Epoch 7/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0842
Epoch 8/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0858
Epoch 9/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0792
Epoch 10/30
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0834
Epoch 11/30
[1m96/

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# ----------------------------------------
# 1. CSV 데이터 불러오기
# ----------------------------------------
file_path = "electricity_data.csv"  # CSV 파일 경로

# CSV 읽기 (한 번만, 날짜 파싱, 인덱스 정렬)
df = pd.read_csv(file_path, parse_dates=["date"], encoding="utf-8")
df = df.sort_values("date").reset_index(drop=True)
df['price'] = pd.to_numeric(df['price'].astype(str).str.replace(',', ''), errors='coerce')
df = df.dropna(subset=['price'])

# 시퀀스 생성은 동일 (price_scaled은 나중에 train에서 fit)
sequence_length = 10
# ... (만들기 전에 price 배열 준비)
prices = df['price'].values.reshape(-1,1)

# 슬라이딩 윈도우
X_all, y_all = [], []
for i in range(len(prices) - sequence_length):
    X_all.append(prices[i:i+sequence_length])
    y_all.append(prices[i+sequence_length])
X_all = np.array(X_all).astype(float)   # shape (N, seq_len, 1)
y_all = np.array(y_all).astype(float)   # shape (N, 1)

# Train/test split (index 기준)
train_size = int(len(X_all) * 0.8)
X_train, X_test = X_all[:train_size], X_all[train_size:]
y_train, y_test = y_all[:train_size], y_all[train_size:]

# 스케일러는 train에서만 fit
scaler = MinMaxScaler()
X_train_flat = X_train.reshape(-1,1)   # fit하기 위한 형태 변환
scaler.fit(X_train_flat)
# transform
X_train_scaled = scaler.transform(X_train_flat).reshape(X_train.shape)
X_test_scaled  = scaler.transform(X_test.reshape(-1,1)).reshape(X_test.shape)
y_train_scaled = scaler.transform(y_train)
y_test_scaled  = scaler.transform(y_test)

# 모델 학습 (activation 기본값으로)
model = Sequential([
    LSTM(50, input_shape=(sequence_length,1)),  # activation 기본 tanh
    Dense(1)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.fit(X_train_scaled, y_train_scaled, epochs=50, batch_size=16, validation_split=0.1,
          callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)])

# # 예측 및 원복
# y_pred_scaled = model.predict(X_test_scaled)
# y_pred = scaler.inverse_transform(y_pred_scaled)
# y_test_real = scaler.inverse_transform(y_test_scaled)


# 퍼센타일 경계값도 train 기준으로 계산
boundaries = {
    "하위 25%": np.percentile(df["price"], 25),
    "하위 10%": np.percentile(df["price"], 10),
    "하위 5%" : np.percentile(df["price"], 5),
    "상위 25%": np.percentile(df["price"], 75),
    "상위 10%": np.percentile(df["price"], 90),
    "상위 5%" : np.percentile(df["price"], 95)
}

# 구간 분류 함수
def classify_price(price, bounds):
    if price >= bounds["상위 5%"]:
        return "상위 5%"
    elif price >= bounds["상위 10%"]:
        return "상위 10%"
    elif price >= bounds["상위 25%"]:
        return "상위 25%"
    elif price <= bounds["하위 5%"]:
        return "하위 5%"
    elif price <= bounds["하위 10%"]:
        return "하위 10%"
    elif price <= bounds["하위 25%"]:
        return "하위 25%"
    else:
        return "중간 구간"

# ----------------------------------------
# 6. 결과 출력
# ----------------------------------------
print("\n=== 예측 결과 샘플 ===")
for i in range(10):  # 예측 10개만 출력
    pred_price = y_pred[i][0]
    category = classify_price(pred_price, boundaries)
    print(f"예측 요금: {pred_price:,.0f}원 → {category}")


Epoch 1/50


  super().__init__(**kwargs)


[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.1410 - mae: 0.3043 - val_loss: 0.0784 - val_mae: 0.2377
Epoch 2/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0854 - mae: 0.2508 - val_loss: 0.0774 - val_mae: 0.2361
Epoch 3/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0827 - mae: 0.2471 - val_loss: 0.0771 - val_mae: 0.2361
Epoch 4/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0819 - mae: 0.2451 - val_loss: 0.0783 - val_mae: 0.2344
Epoch 5/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0837 - mae: 0.2495 - val_loss: 0.0771 - val_mae: 0.2360
Epoch 6/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0876 - mae: 0.2549 - val_loss: 0.0782 - val_mae: 0.2343
Epoch 7/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0848 - mae: 0.251