In [1]:
import glob
import numpy as np
import pandas as pd
import torch
from torch import nn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [2]:
files = glob.glob("*csv")

In [3]:
dfs = []
for file in files:
    df = pd.read_csv(file, encoding="cp949")
    dfs.append(df)

In [4]:
df = pd.concat(dfs,ignore_index = True)
df = df.iloc[:,2:]
df.columns = ["times", "temp"]
df = df.sort_values("times").reset_index(drop=True)
df["times"] = pd.to_datetime(df["times"])
_df = pd.DataFrame({"times":pd.date_range(df["times"].min(),df["times"].max(), freq="h")})
df = pd.merge(df, _df,on="times",how="outer")

# df["temp"].fillna(method="bfill").fillna(method="ffill")
df["temp"] = df["temp"].ffill().bfill()
df = df.sort_values("times").reset_index(drop=True)

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))

In [6]:
scaler.fit(df.loc[df["times"].dt.year!=2025,["temp"]])

In [7]:
df.loc[df["times"].dt.year!=2025,"temp"] = scaler.transform(
    df.loc[df["times"].dt.year!=2025,["temp"]])
df.loc[df["times"].dt.year==2025,["temp"]] = scaler.transform(
    df.loc[df["times"].dt.year==2025,["temp"]])

In [8]:
n_lags = 24
n_forecast = 3

df_lag = df.copy()
for i in range(n_lags-1, 0, -1):
    df_lag[f'lag_{i}'] = df_lag['temp'].shift(i)
df_lag['lag_0'] = df_lag['temp']
for i in range(1, n_forecast + 1):
    df_lag[f'target_{i}h'] = df_lag['temp'].shift(-i)
df_rnn = df_lag.dropna().reset_index(drop=True)
df_rnn = df_rnn.drop("temp",axis=1)

In [9]:
tr_df = df_rnn[df_rnn["times"].dt.year!=2025]
te_df = df_rnn[df_rnn["times"].dt.year==2025]

In [10]:
tr_df, val_df = train_test_split(tr_df, test_size=0.2, random_state=42, shuffle=True)

In [11]:
tr_df = tr_df.reset_index(drop = True)
val_df = val_df.reset_index(drop = True)

In [69]:
class RNNDataset(torch.utils.data.Dataset):
    def __init__(self, df, x_idx, y_idx):
        self.df = df 
        self.x_idx = x_idx
        self.y_idx = y_idx
    def __len__(self):
        return self.df.shape[0]
    def __getitem__(self, idx):
        x = self.df.iloc[idx, self.x_idx].to_numpy(dtype=np.float32)
        y = self.df.iloc[idx, self.y_idx].to_numpy(dtype=np.float32)
        x = torch.tensor(x, dtype = torch.float32)
        y = torch.tensor(y, dtype = torch.float32)        
        return x, y

In [70]:
x_idx = range(1,25)
y_idx = range(25,28)
tr_dataset = RNNDataset(tr_df, x_idx, y_idx)
val_dataset = RNNDataset(val_df, x_idx, y_idx)

In [71]:
tr_loader = torch.utils.data.DataLoader(
    tr_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=32, shuffle=True)

In [72]:
x.shape, y.shape

(torch.Size([32, 24]), torch.Size([32, 3]))

In [73]:
x,y=next(iter(tr_loader))

In [84]:
import pytorch_lightning as pl

class RNNModel(pl.LightningModule):
    def __init__(
        self, lr= 0.001,
        input_dim= 1, hidden_dim = 64, 
        num_layers = 1,
        output_dim = 3
    ):
        super().__init__()
        self.rnn = nn.RNN(
            input_dim, hidden_dim, num_layers,
            batch_first = True
        )
        self.fc = nn.Linear(
            hidden_dim, output_dim)
        self.lr = lr
    def forward(self, x):
        out, _ = self.rnn(x) # batch, seq_len, hidden_dim
        out = out[:,-1 , :] # 마지막 시퀀스 hidden만 사용
        out = self.fc(out)
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x.unsqueeze(-1))
        loss = nn.functional.mse_loss(y_hat, y)
        self.log("train_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x.unsqueeze(-1))
        loss = nn.functional.mse_loss(y_hat, y)
        self.log("val_loss", loss, prog_bar=True, on_step=False, on_epoch=True)
        return loss
    def configure_optimizers(self):
        return torch.optim.Adam(
            self.parameters(), lr=self.lr)

In [85]:
model = RNNModel()
trainer = pl.Trainer(max_epochs = 20)
trainer.fit(model, tr_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type   | Params
--------------------------------
0 | rnn  | RNN    | 4.3 K 
1 | fc   | Linear | 195   
--------------------------------
4.5 K     Trainable params
0         Non-trainable params
4.5 K     Total params
0.018     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [97]:
model.eval()

RNNModel(
  (rnn): RNN(1, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=3, bias=True)
)

In [100]:
te_x = torch.tensor(
    te_df.iloc[:,1:25].values, dtype=torch.float32).unsqueeze(-1)
te_x = te_x.to(torch.device("cuda:0"))
with torch.no_grad():
    pred = model(te_x)
pred = pred.detach().cpu().numpy()

In [101]:
y_true = scaler.inverse_transform(te_df.iloc[:,25:])
y_pred = scaler.inverse_transform(pred)

In [102]:
[mean_absolute_error(y_true[:,i], y_pred[:,i]) for i in range(0,n_forecast)]

[0.7615870522412056, 1.1109903781990271, 1.4804764174868952]

In [None]:
    # def __init__(self, input_dim=1, hidden_dim1=64, hidden_dim2=64, dense_dim=32, output_dim=3, lr=1e-3):
    #     super().__init__()
    #     # 2단 LSTM
    #     self.lstm1 = nn.LSTM(input_dim, hidden_dim1, batch_first=True)
    #     self.lstm2 = nn.LSTM(hidden_dim1, hidden_dim2, batch_first=True)
        
    #     # Fully connected layers
    #     self.fc1 = nn.Linear(hidden_dim2, dense_dim)
    #     self.fc2 = nn.Linear(dense_dim, output_dim)
    #     self.lr = lr

    # def forward(self, x):
    #     out, _ = self.lstm1(x)       # (batch, seq_len, hidden_dim1)
    #     out, _ = self.lstm2(out)     # (batch, seq_len, hidden_dim2)
    #     out = out[:, -1, :]           # 마지막 시퀀스 hidden
    #     out = F.relu(self.fc1(out))   # Dense 32 + ReLU
    #     out = self.fc2(out)           # Dense horizon
    #     return out