In [None]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
SELECTED_CSV_PATH = "/content/drive/MyDrive/LSTMAE_PROJECT/random_selected_stocks_sets_nodup.csv"

# 10컬럼 × 30row (selected_stocks1~10)
selected_df = pd.read_csv(SELECTED_CSV_PATH)
# 10개 세트 리스트로 변환
stock_sets = []
for col in selected_df.columns:
    stock_list = selected_df[col].dropna().tolist()
    stock_list = [str(s).strip() for s in stock_list]
    stock_sets.append(stock_list)


In [None]:
# ✅ LSTM-AE 정의
class LSTMAutoEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super().__init__()
        self.encoder = torch.nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.latent = torch.nn.Linear(hidden_dim, latent_dim)
        self.decoder_input = torch.nn.Linear(latent_dim, hidden_dim)
        self.decoder = torch.nn.LSTM(hidden_dim, input_dim, batch_first=True)

    def forward(self, x):
        _, (h_n, _) = self.encoder(x)
        z = self.latent(h_n[-1])
        dec_input = self.decoder_input(z).unsqueeze(1).repeat(1, x.size(1), 1)
        out, _ = self.decoder(dec_input)
        return out

def create_sequences(data, window_size):
    return np.array([data[i:i+window_size] for i in range(len(data)-window_size)])

# ✅ 환경/하이퍼파라미터
features = ['종가', '대비', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '시가총액', '상장주식수']
window_size = 30
batch_size = 32
n_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_path = "/content/drive/MyDrive/LSTMAE_PROJECT/filtered_kospi_data_300.csv"
weight_save_dir = "/content/drive/MyDrive/LSTMAE_PROJECT/ensemble_weights"
thresh_save_dir = "/content/drive/MyDrive/LSTMAE_PROJECT/ensemble_thresholds"
os.makedirs(weight_save_dir, exist_ok=True)
os.makedirs(thresh_save_dir, exist_ok=True)

# ✅ 전체 데이터 로드
df_all = pd.read_csv(data_path)
df_all['날짜'] = pd.to_datetime(df_all['날짜'])

for set_idx, stock_list in enumerate(stock_sets, 1):
    weights_dict = {}
    thresholds_dict = {}

    for stock in tqdm(stock_list, desc=f"SET {set_idx}"):
        try:
            df = df_all[df_all['종목명'].str.strip() == stock.strip()].copy()
            if df.empty:
                print(f"[{stock}] 데이터 없음 - 스킵")
                continue

            df = df.sort_values('날짜').set_index('날짜')
            full_index = pd.date_range(df.index.min(), df.index.max(), freq='D')
            df = df.reindex(full_index)
            df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
            df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')

            # 정규화
            scaler = MinMaxScaler()
            scaled = scaler.fit_transform(df[features].values)
            sequences = create_sequences(scaled, window_size)
            if len(sequences) < 10:
                print(f"[{stock}] 시퀀스 부족 - 스킵")
                continue

            tensor_data = torch.tensor(sequences, dtype=torch.float32)
            dataset = TensorDataset(tensor_data)
            loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

            # 학습
            model = LSTMAutoEncoder(input_dim=len(features), hidden_dim=64, latent_dim=16).to(device)
            criterion = torch.nn.MSELoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
            model.train()
            for epoch in range(n_epochs):
                for batch in loader:
                    x = batch[0].to(device)
                    output = model(x)
                    loss = criterion(output, x)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

            # 가중치 저장 (딕셔너리)
            weights_dict[stock] = model.state_dict()

            # 임계치 계산 (validation/recon error 기준)
            model.eval()
            errors = []
            with torch.no_grad():
                for batch in DataLoader(dataset, batch_size=64, shuffle=False):
                    x = batch[0].to(device)
                    output = model(x)
                    loss = torch.mean((x - output) ** 2, dim=[1, 2])
                    errors.extend(loss.cpu().numpy())
            errors = np.array(errors)
            k = 3  # threshold factor
            threshold = np.mean(errors) + k * np.std(errors)
            thresholds_dict[stock] = float(threshold)

        except Exception as e:
            print(f"[{stock}] 처리 실패: {e}")

    # SET별로 묶어서 저장
    torch.save(weights_dict, f"{weight_save_dir}/ensemble_weights_set{set_idx}.pt")
    thresh_df = pd.DataFrame(list(thresholds_dict.items()), columns=['종목명', '임계값'])
    thresh_df.to_csv(f"{thresh_save_dir}/ensemble_thresholds_set{set_idx}.csv", index=False, encoding='utf-8-sig')

    print(f"\n[SET {set_idx}] 30개 종목 모델 및 임계값 저장 완료.\n")


  df_all = pd.read_csv(data_path)
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[featu


[SET 1] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(me


[SET 2] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].


[SET 3] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[[

[중앙제지(1우B)] 시퀀스 부족 - 스킵


  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(me


[SET 4] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[[


[SET 5] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[[


[SET 6] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].


[SET 7] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[[


[SET 8] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(me


[SET 9] 30개 종목 모델 및 임계값 저장 완료.



  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].

[대신밸류리츠] 시퀀스 부족 - 스킵


  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[['종목코드', '종목명']] = df[['종목코드', '종목명']].fillna(method='ffill')
  df[features] = df[features].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
SET 10: 100%|██████████| 30/30 [04:00<00:00,  8.03s/it]


[SET 10] 30개 종목 모델 및 임계값 저장 완료.




