In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
from sklearn.preprocessing import MinMaxScaler
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torch

In [2]:
# gpu 설정
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Using device: cpu


In [3]:
# LSTM Autoencoder 설계
class LSTMAutoencoder(nn.Module):
    def __init__(self, seq_len, n_features):
        super().__init__()
        self.seq_len = seq_len
        self.n_features = n_features

        self.encoder1 = nn.LSTM(input_size=n_features, hidden_size=16, batch_first=True)
        self.encoder2 = nn.LSTM(input_size=16, hidden_size=8, batch_first=True)

        self.decoder_input = nn.Linear(8, 8)
        self.decoder1 = nn.LSTM(input_size=8, hidden_size=8, batch_first=True)
        self.decoder2 = nn.LSTM(input_size=8, hidden_size=16, batch_first=True)
        self.output_layer = nn.Linear(16, n_features)

    def forward(self, x):
        x, _ = self.encoder1(x)
        x, (h_n, _) = self.encoder2(x)
        h_last = h_n[-1].unsqueeze(1).repeat(1, self.seq_len, 1)
        x = self.decoder_input(h_last)
        x, _ = self.decoder1(x)
        x, _ = self.decoder2(x)
        x = self.output_layer(x)
        return x

In [None]:
# data 셋 생성1
# class SequenceDataset(Dataset):
#     def __init__(self, data):
#         self.data = data  # 이미 (n_seq, time_step, feature) 형태
#     def __len__(self):
#         return len(self.data)
#     def __getitem__(self, idx):
#         x = self.data[idx]
#         return torch.from_numpy(x).float(), torch.from_numpy(x).float()


# data 셋 생성2
class SequenceDataset(Dataset):
    def __init__(self, data, seq_len):
        self.data = data
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len + 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_len]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(x, dtype=torch.float32)

In [5]:
# Load & preprocess
def data_load(path):
    df = pd.read_csv(path)
    df['dates'] = pd.to_datetime(df['dates'])
    df.sort_values('dates', inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    return df

def data_sclaer(df,col):

    df_scale = df.copy()

    sclaer = MinMaxScaler()
    
    df_scale[col] = sclaer.fit_transform(df_scale[[col]])


    return df_scale, sclaer

# 학습용 데이터 shape 맞추기
def data_shape(data,time_steps,col_list):
    '''
    data,time_steps,col_list
    
    return data(shape(row,time_steps,feature))
    '''
    data_list = []

    for i in tqdm(range(len(data)-time_steps+1)):
        data_list.append(data.loc[i:(i+time_steps-1),col_list].values)

    data_list = np.array(data_list)

    print(data_list.shape)

    return data_list

In [None]:
# 파라미터 정의
time_step = 120
batch = 32
EPOCHS = 50
feature = 3
train_col = ['rn', 'vl', 'wl']

In [7]:
# 데이터 불러오기
gwangjoo_train_df = data_load('inter_data/2920010001045020.csv')
changwon_train_df = data_load('inter_data/4812110001018020.csv')

# 데이터 sclaer

gwangjoo_train_data,g_rn_sclaer = data_sclaer(gwangjoo_train_df,'rn')
gwangjoo_train_data,g_vl_sclaer = data_sclaer(gwangjoo_train_data,'vl')
gwangjoo_train_data,g_wl_sclaer = data_sclaer(gwangjoo_train_data,'wl')
changwon_train_data,c_rn_sclaer = data_sclaer(changwon_train_df,'rn')
changwon_train_data,c_vl_sclaer = data_sclaer(changwon_train_data,'vl')
changwon_train_data,c_wl_sclaer = data_sclaer(changwon_train_data,'wl')

# # 데이터 shape
gwangjoo_train_data = data_shape(gwangjoo_train_data,time_step,train_col)
changwon_train_data = data_shape(changwon_train_data,time_step,train_col)

  0%|          | 1223/394500 [00:11<55:57, 117.13it/s]  

: 

In [None]:
train_dataset = SequenceDataset(gwangjoo_train_data)
train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True,num_workers=0)

In [None]:
# 모델 정의
model = LSTMAutoencoder(seq_len=time_step, n_features=feature).to(DEVICE)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
best_loss = float('inf')  # 초기값 설정
save_dir = "torch_rv_60"
os.makedirs(save_dir, exist_ok=True)  # 저장 폴더 없으면 생성

In [16]:
# 모델 학습
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0

    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {avg_loss:.6f}")

    # ✅ Best loss 기준 저장
    if avg_loss < best_loss:
        best_loss = avg_loss
        save_path = os.path.join(save_dir, f"best_model_epoch{epoch+1}_loss{avg_loss:.6f}.pt")
        torch.save(model.state_dict(), save_path)
        print(f"Best model saved at epoch {epoch+1} with loss {avg_loss:.6f}")

Epoch 1/50 | Loss: 0.007571
Best model saved at epoch 1 with loss 0.007571
Epoch 2/50 | Loss: 0.005274
Best model saved at epoch 2 with loss 0.005274
Epoch 3/50 | Loss: 0.002758
Best model saved at epoch 3 with loss 0.002758
Epoch 4/50 | Loss: 0.002519
Best model saved at epoch 4 with loss 0.002519
Epoch 5/50 | Loss: 0.002427
Best model saved at epoch 5 with loss 0.002427
Epoch 6/50 | Loss: 0.002352
Best model saved at epoch 6 with loss 0.002352
Epoch 7/50 | Loss: 0.002305
Best model saved at epoch 7 with loss 0.002305
Epoch 8/50 | Loss: 0.002257
Best model saved at epoch 8 with loss 0.002257
Epoch 9/50 | Loss: 0.002220
Best model saved at epoch 9 with loss 0.002220
Epoch 10/50 | Loss: 0.002179
Best model saved at epoch 10 with loss 0.002179
Epoch 11/50 | Loss: 0.002135
Best model saved at epoch 11 with loss 0.002135
Epoch 12/50 | Loss: 0.002111
Best model saved at epoch 12 with loss 0.002111
Epoch 13/50 | Loss: 0.002073
Best model saved at epoch 13 with loss 0.002073
Epoch 14/50 | Los

In [None]:
# 1. Dataset & DataLoader for changwon
fine_dataset = SequenceDataset(changwon_train_data)
fine_loader = DataLoader(fine_dataset, batch_size=batch, shuffle=True)

In [18]:
# 4. 추가 학습 파라미터
EPOCHS_FINE = 50
best_loss_fine = float('inf')
fine_save_dir = "torch_rvl_60"
os.makedirs(fine_save_dir, exist_ok=True)

# 5. Fine-tuning loop
for epoch in range(EPOCHS_FINE):
    model.train()
    epoch_loss = 0

    for x, y in fine_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(fine_loader)
    print(f"[FineTune Epoch {epoch+1}/{EPOCHS_FINE}] Loss: {avg_loss:.6f}")

    # ✅ best weight 저장
    if avg_loss < best_loss_fine:
        best_loss_fine = avg_loss
        save_path = os.path.join(fine_save_dir, f"finetune_changwon_best_epoch{epoch+1}_loss{avg_loss:.6f}.pt")
        torch.save(model.state_dict(), save_path)
        print(f"Fine-tuned model saved at epoch {epoch+1} with loss {avg_loss:.6f}")


[FineTune Epoch 1/50] Loss: 0.001653
Fine-tuned model saved at epoch 1 with loss 0.001653
[FineTune Epoch 2/50] Loss: 0.001577
Fine-tuned model saved at epoch 2 with loss 0.001577
[FineTune Epoch 3/50] Loss: 0.001555
Fine-tuned model saved at epoch 3 with loss 0.001555
[FineTune Epoch 4/50] Loss: 0.001546
Fine-tuned model saved at epoch 4 with loss 0.001546
[FineTune Epoch 5/50] Loss: 0.001534
Fine-tuned model saved at epoch 5 with loss 0.001534
[FineTune Epoch 6/50] Loss: 0.001526
Fine-tuned model saved at epoch 6 with loss 0.001526
[FineTune Epoch 7/50] Loss: 0.001518
Fine-tuned model saved at epoch 7 with loss 0.001518
[FineTune Epoch 8/50] Loss: 0.001512
Fine-tuned model saved at epoch 8 with loss 0.001512
[FineTune Epoch 9/50] Loss: 0.001508
Fine-tuned model saved at epoch 9 with loss 0.001508
[FineTune Epoch 10/50] Loss: 0.001504
Fine-tuned model saved at epoch 10 with loss 0.001504
[FineTune Epoch 11/50] Loss: 0.001500
Fine-tuned model saved at epoch 11 with loss 0.001500
[FineT