In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import RobustScaler # 使用 RobustScaler 更好地处理异常值
from sklearn.model_selection import TimeSeriesSplit # 保持时序交叉验证
import warnings
import gc # 导入垃圾回收模块
import os # 用于文件操作

warnings.filterwarnings('ignore')

In [2]:
# 设置随机种子
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    # 确保在 CUDA 可用时设置 CUDA 相关的种子
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(42)

In [3]:

class StockDataProcessor:
    def __init__(self, window_size=20):
        self.window_size = window_size

    def _calculate_single_stock_features(self, df_single_stock):
        """为单个股票的DataFrame计算特征（假设已按日期排序）"""
        features = pd.DataFrame(index=df_single_stock.index)

        # 基础价格特征
        features['returns'] = df_single_stock['收盘'].pct_change()
        features['log_returns'] = np.log(df_single_stock['收盘'] / df_single_stock['收盘'].shift(1))

        # 价格相关特征
        features['high_low_ratio'] = df_single_stock['最高'] / df_single_stock['最低'].replace(0, np.nan)
        features['close_open_ratio'] = df_single_stock['收盘'] / df_single_stock['开盘'].replace(0, np.nan)

        # 成交量比率
        volume_ma = df_single_stock['成交量'].rolling(window=20, min_periods=1).mean()
        features['volume_ratio'] = df_single_stock['成交量'] / volume_ma.replace(0, np.nan)

        # RSI
        delta = df_single_stock['收盘'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14, min_periods=1).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14, min_periods=1).mean()
        rs = gain / loss.replace(0, np.nan)
        features['rsi'] = 100 - (100 / (1 + rs))

        # 移动平均
        for period in [5, 10, 20, 60]:
            ma = df_single_stock['收盘'].rolling(window=period, min_periods=1).mean()
            features[f'ma_{period}'] = ma
            features[f'ma_ratio_{period}'] = df_single_stock['收盘'] / ma.replace(0, np.nan)

        # 布林带
        ma20 = df_single_stock['收盘'].rolling(window=20, min_periods=1).mean()
        std20 = df_single_stock['收盘'].rolling(window=20, min_periods=1).std()
        features['bb_upper'] = ma20 + 2 * std20
        features['bb_lower'] = ma20 - 2 * std20
        bb_diff = features['bb_upper'] - features['bb_lower']
        features['bb_width'] = bb_diff / ma20.replace(0, np.nan)
        features['bb_position'] = ((df_single_stock['收盘'] - features['bb_lower']) /
                                   bb_diff.replace(0, np.nan))

        # MACD
        exp1 = df_single_stock['收盘'].ewm(span=12, adjust=False, min_periods=1).mean()
        exp2 = df_single_stock['收盘'].ewm(span=26, adjust=False, min_periods=1).mean()
        features['macd'] = exp1 - exp2
        features['macd_signal'] = features['macd'].ewm(span=9, adjust=False, min_periods=1).mean()
        features['macd_diff'] = features['macd'] - features['macd_signal']

        # OBV
        features['obv'] = (np.sign(df_single_stock['收盘'].diff().fillna(0)) * df_single_stock['成交量']).fillna(0).cumsum()
        features['volume_ma'] = df_single_stock['成交量'].rolling(window=20, min_periods=1).mean()

        # 波动率
        features['volatility'] = df_single_stock['收盘'].pct_change().rolling(window=20, min_periods=1).std()

        # 价格位置
        price_min = df_single_stock['最低'].rolling(window=20, min_periods=1).min()
        price_max = df_single_stock['最高'].rolling(window=20, min_periods=1).max()
        price_range = price_max - price_min
        features['price_position'] = ((df_single_stock['收盘'] - price_min) /
                                      price_range.replace(0, np.nan))

        # 新增特征: Stochastic Oscillator (%K and %D)
        low_14 = df_single_stock['最低'].rolling(window=14, min_periods=1).min()
        high_14 = df_single_stock['最高'].rolling(window=14, min_periods=1).max()
        stoch_k_numerator = df_single_stock['收盘'] - low_14
        stoch_k_denominator = (high_14 - low_14).replace(0, np.nan)
        features['stoch_k'] = 100 * (stoch_k_numerator / stoch_k_denominator)
        features['stoch_d'] = features['stoch_k'].rolling(window=3, min_periods=1).mean()

        # 新增特征: Rate of Change (ROC)
        for period in [10, 20]:
             features[f'roc_{period}'] = df_single_stock['收盘'].pct_change(periods=period) * 100

        features = features.replace([np.inf, -np.inf], np.nan)

        return features

In [4]:
 def calculate_all_features(self, df_all_stocks):
        """对所有股票应用特征计算（按股票分组并排序）"""
        original_index = df_all_stocks.index
        df_all_stocks_sorted = df_all_stocks.sort_values(['股票代码', '日期'])

        all_features_list = []
        for stock_code, group_df in df_all_stocks_sorted.groupby('股票代码'):
            single_stock_features = self._calculate_single_stock_features(group_df.copy())
            all_features_list.append(single_stock_features)

        if not all_features_list:
            if not df_all_stocks.empty:
                sample_cols_df = self._calculate_single_stock_features(df_all_stocks.iloc[:1].copy())
                return pd.DataFrame(columns=sample_cols_df.columns, index=original_index)
            return pd.DataFrame(index=original_index)

        final_features_df = pd.concat(all_features_list)
        final_features_df = final_features_df.reindex(original_index)
        return final_features_df

In [5]:
   def prepare_sequences(self, stock_data, features_df, max_sequences=None):
        """准备序列数据用于LSTM，确保特征与目标正确对齐，并支持内存限制"""
        X, y, dates, stocks = [], [], [], []

        # 预估特征数量，用于初始化空 NumPy 数组
        num_expected_features = features_df.shape[1] if features_df is not None and not features_df.empty else 0
        if num_expected_features == 0 and not stock_data.empty:
            print("警告: 计算的特征数量为0。请检查特征计算逻辑。")
            # 如果没有特征，则无法构建序列，直接返回空数组
            return np.array([]).reshape(0, self.window_size, 0), np.array([]), [], []

        sequence_count = 0 # 跟踪已生成的序列数量

        for stock_code in stock_data['股票代码'].unique():
            current_stock_data_slice = stock_data[stock_data['股票代码'] == stock_code]
            current_stock_features_slice = features_df.loc[current_stock_data_slice.index]

            temp_df = current_stock_data_slice[['日期', '收盘']].join(current_stock_features_slice)
            temp_df = temp_df.sort_values('日期').reset_index(drop=True)

            temp_df['future_return'] = temp_df['收盘'].shift(-1) / temp_df['收盘'] - 1
            temp_df['future_return'] = temp_df['future_return'].replace([np.inf, -np.inf], np.nan)

            feature_column_names = current_stock_features_slice.columns.tolist()

            for i in range(self.window_size - 1, len(temp_df) - 1):
                if max_sequences and sequence_count >= max_sequences:
                    break # 达到最大序列数，停止生成

                if pd.isna(temp_df['future_return'].iloc[i]):
                    continue

                feature_seq_start_idx = i - (self.window_size - 1)
                feature_seq_end_idx = i + 1

                feature_values_for_seq = temp_df[feature_column_names].iloc[feature_seq_start_idx:feature_seq_end_idx].values

                if feature_values_for_seq.shape[0] != self.window_size:
                    continue

                X.append(feature_values_for_seq.astype(np.float32)) # 立即转换为float32
                y.append(temp_df['future_return'].iloc[i].astype(np.float32)) # 立即转换为float32
                dates.append(temp_df['日期'].iloc[i])
                stocks.append(stock_code)
                sequence_count += 1

            if max_sequences and sequence_count >= max_sequences:
                break # 如果外层循环也达到限制，则停止

        if not X:
             return np.array([]).reshape(0, self.window_size, num_expected_features), np.array([]), [], []

        # 返回 NumPy 数组，类型已为 float32
        return np.array(X), np.array(y), dates, stocks

In [6]:
#======================== 模型部分 ========================

class AttentionLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=3, dropout=0.3):
        super(AttentionLSTM, self).__init__()

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0, # 如果只有一层，dropout设为0
            bidirectional=True
        )

        self.attention = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1)
        )

        self.fc = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        attention_weights = torch.softmax(self.attention(lstm_out), dim=1)
        context = torch.sum(attention_weights * lstm_out, dim=1)
        out = self.fc(context)
        return out


In [7]:
class StockDataset(Dataset):
    def __init__(self, X, y=None):
        # 确保X是np.array并且是float32类型
        self.X = X.astype(np.float32) if isinstance(X, np.ndarray) else np.array(X, dtype=np.float32)
        # 确保y是np.array并且是float32类型
        self.y = y.astype(np.float32) if y is not None and isinstance(y, np.ndarray) else (np.array(y, dtype=np.float32) if y is not None else None)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.y is not None:
            return torch.from_numpy(self.X[idx]), torch.tensor(self.y[idx]) # 使用from_numpy避免复制
        else:
            return torch.from_numpy(self.X[idx])

In [8]:
# ======================== 训练和预测 ========================

class StockPredictor:
    def __init__(self, window_size=20, hidden_size=128, num_layers=3, dropout=0.3):
        self.window_size = window_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.models = []
        self.scalers = []
        self.feature_cols = None # 用于存储训练时使用的特征列

    def train(self, train_data, num_epochs=50, batch_size=64, n_splits=5, max_sequences=None):
        processor = StockDataProcessor(self.window_size)

        print("计算技术指标特征...")
        features = processor.calculate_all_features(train_data)
        features = features.ffill().bfill()
        features = features.replace([np.inf, -np.inf], np.nan)
        features = features.fillna(0) # 最终填充0

        if features.empty:
            raise ValueError("特征计算结果为空，无法继续训练。")

        # 存储使用的特征列，供预测时使用
        self.feature_cols = features.columns.tolist()

        print(f"特征形状: {features.shape}")
        print(f"特征中缺失值数量: {features.isna().sum().sum()}")
        print(f"特征中无穷值检查: {np.isinf(features.values).sum()}")

        print("准备序列数据...")
        # 传递 max_sequences 参数
        X, y, _, _ = processor.prepare_sequences(train_data, features, max_sequences=max_sequences)

        # 清理features DataFrame内存
        del features
        gc.collect()
        torch.cuda.empty_cache()

        if X.size == 0 or len(X) == 0:
            raise ValueError("没有足够的数据用于训练，序列为空。")

        # 确保X中没有NaN或Inf (在prepare_sequences中已转换为float32并尝试处理，这里是最终检查)
        if np.isnan(X).any() or np.isinf(X).any():
            print("警告: 序列X中存在NaN或Inf，进行最终清理...")
            X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)

        print(f"序列数量: {len(X)}, 每个序列长度: {X.shape[1]}, 特征数量: {X.shape[-1]}")

        # 使用 TimeSeriesSplit 进行交叉验证
        tscv = TimeSeriesSplit(n_splits=n_splits)

        for fold, (train_idx, val_idx) in enumerate(tscv.split(X)):
            print(f"\n训练第 {fold + 1}/{n_splits} 个模型...")

            X_train, X_val = X[train_idx], X[val_idx]
            y_train, y_val = y[train_idx], y[val_idx]

            # 内存优化：在每个折叠内仅保留当前折叠所需的数据
            # 由于 X, y 已经是 NumPy 数组，直接切片并不会复制大数据

            # 初始化并拟合 RobustScaler
            scaler = RobustScaler()
            # Reshape X_train to 2D for scaler: (num_samples * sequence_length, num_features)
            X_train_2d = X_train.reshape(-1, X_train.shape[-1])
            X_train_scaled_2d = scaler.fit_transform(X_train_2d)
            X_train_scaled = X_train_scaled_2d.reshape(X_train.shape)

            # Transform X_val
            X_val_2d = X_val.reshape(-1, X_val.shape[-1])
            X_val_scaled_2d = scaler.transform(X_val_2d)
            X_val_scaled = X_val_scaled_2d.reshape(X_val.shape)

            train_dataset = StockDataset(X_train_scaled, y_train)
            val_dataset = StockDataset(X_val_scaled, y_val)

            # 释放当前折叠中不需要的原始数据副本
            del X_train, X_val, y_train, y_val, X_train_scaled_2d, X_val_2d, X_val_scaled_2d
            gc.collect()
            torch.cuda.empty_cache()

            # DataLoader 中设置 num_workers=0 以避免多进程内存复制问题
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

            model = AttentionLSTM(
                input_size=X.shape[-1], # num_features
                hidden_size=self.hidden_size,
                num_layers=self.num_layers,
                dropout=self.dropout
            ).to(self.device)

            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5, verbose=True)

            best_val_loss = float('inf')
            patience_counter = 0
            best_model_state = None

            for epoch in range(num_epochs):
                model.train()
                train_loss = 0
                for batch_X, batch_y in train_loader:
                    batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device).squeeze() # Squeeze target for MSELoss

                    optimizer.zero_grad()
                    outputs = model(batch_X).squeeze(-1) # Squeeze model output
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                    optimizer.step()
                    train_loss += loss.item()

                # 定期清理GPU内存
                torch.cuda.empty_cache()
                gc.collect()

                model.eval()
                val_loss = 0
                with torch.no_grad():
                    for batch_X, batch_y in val_loader:
                        batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device).squeeze()
                        outputs = model(batch_X).squeeze(-1)
                        val_loss += criterion(outputs, batch_y).item()

                avg_train_loss = train_loss / len(train_loader)
                avg_val_loss = val_loss / len(val_loader)

                scheduler.step(avg_val_loss)

                if avg_val_loss < best_val_loss:
                    best_val_loss = avg_val_loss
                    patience_counter = 0
                    best_model_state = model.state_dict()
                else:
                    patience_counter += 1

                if epoch % 5 == 0 or epoch == num_epochs - 1: # 每5个epoch打印一次
                    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {avg_train_loss:.6f}, Val Loss: {avg_val_loss:.6f}, LR: {optimizer.param_groups[0]['lr']:.7f}")

                if patience_counter >= 10: # 早停耐心值
                    print(f"早停 at epoch {epoch + 1}")
                    break

                # 每个epoch结束后再次清理GPU内存
                torch.cuda.empty_cache()
                gc.collect()

            if best_model_state is not None:
                model.load_state_dict(best_model_state)
                self.models.append(model)
                self.scalers.append(scaler)
            else:
                print(f"警告: 第 {fold + 1} 折未能成功训练模型。")

            # 清理当前折叠的模型和加载器
            del train_loader, val_loader, train_dataset, val_dataset, model
            gc.collect()
            torch.cuda.empty_cache()

        # 训练结束后，清理大块序列数据
        del X, y
        gc.collect()
        torch.cuda.empty_cache()


    def predict(self, test_data):
        if not self.models or not self.scalers or self.feature_cols is None:
            print("错误: 模型或缩放器未训练或特征列未设置。")
            return pd.DataFrame({'涨幅最大股票代码': [''] * 10, '涨幅最小股票代码': [''] * 10})

        processor = StockDataProcessor(self.window_size)

        print("为测试数据计算特征...")
        features = processor.calculate_all_features(test_data)
        features = features.ffill().bfill()
        features = features.replace([np.inf, -np.inf], np.nan)
        features = features.fillna(0) # 最终填充0

        if features.empty:
            print("错误: 测试数据的特征计算结果为空。")
            return pd.DataFrame({'涨幅最大股票代码': [''] * 10, '涨幅最小股票代码': [''] * 10})

        # 确保测试数据使用的特征列与训练时一致
        # 如果 test_data 缺少训练时的某些特征，需要填充或报错
        missing_features = [col for col in self.feature_cols if col not in features.columns]
        if missing_features:
            print(f"警告: 测试数据缺少训练时使用的特征: {missing_features}。将填充0。")
            for col in missing_features:
                features[col] = 0.0
        # 确保特征顺序一致
        features = features[self.feature_cols]

        last_prediction_input_date = test_data['日期'].max()
        print(f"将基于日期 {last_prediction_input_date} 的数据进行预测。")

        all_stock_codes = test_data['股票代码'].unique()

        X_predict_sequences = []
        predict_stock_order = []

        for stock_code in all_stock_codes:
            current_stock_data_slice = test_data[test_data['股票代码'] == stock_code]
            # 从全局 features 中获取当前股票的特征
            current_stock_features_slice = features.loc[current_stock_data_slice.index]

            temp_df = current_stock_data_slice[['日期']].join(current_stock_features_slice)
            temp_df = temp_df.sort_values('日期').reset_index(drop=True)

            if temp_df.empty or temp_df['日期'].iloc[-1] != last_prediction_input_date:
                continue
            if len(temp_df) < self.window_size:
                continue

            last_sequence = temp_df[self.feature_cols].iloc[-self.window_size:].values # 使用保存的特征列顺序

            if last_sequence.shape[0] != self.window_size:
                continue

            if np.isnan(last_sequence).any() or np.isinf(last_sequence).any():
                last_sequence = np.nan_to_num(last_sequence, nan=0.0, posinf=0.0, neginf=0.0)

            X_predict_sequences.append(last_sequence.astype(np.float32))
            predict_stock_order.append(stock_code)

        if not X_predict_sequences:
            print("没有为任何股票准备好有效的预测序列。")
            return pd.DataFrame({'涨幅最大股票代码': [''] * 10, '涨幅最小股票代码': [''] * 10})

        X_predict_array = np.array(X_predict_sequences) # (num_stocks_to_predict, window_size, num_features)

        ensemble_predictions = np.zeros(len(predict_stock_order))

        for model_idx, (model, scaler) in enumerate(zip(self.models, self.scalers)):
            X_pred_2d = X_predict_array.reshape(-1, X_predict_array.shape[-1])
            X_pred_scaled_2d = scaler.transform(X_pred_2d) # 使用对应的 scaler
            X_pred_scaled = X_pred_scaled_2d.reshape(X_predict_array.shape)

            X_tensor = torch.from_numpy(X_pred_scaled).to(self.device) # 使用 from_numpy

            model.eval()
            with torch.no_grad():
                current_model_preds = model(X_tensor).cpu().numpy().squeeze(-1)
            ensemble_predictions += current_model_preds

            # 预测后立即清理GPU内存
            torch.cuda.empty_cache()
            gc.collect()

        if self.models:
            ensemble_predictions /= len(self.models)

        predictions_map = {stock_code: pred for stock_code, pred in zip(predict_stock_order, ensemble_predictions)}

        valid_predictions = {k: v for k, v in predictions_map.items() if pd.notna(v) and np.isfinite(v)}
        sorted_stocks = sorted(valid_predictions.items(), key=lambda item: item[1], reverse=True)

        top_10 = [stock[0] for stock in sorted_stocks[:10]]
        bottom_10_sorted = sorted_stocks[-10:] # 取最后10个
        bottom_10 = [stock[0] for stock in bottom_10_sorted]

        while len(top_10) < 10:
            top_10.append('')
        while len(bottom_10) < 10:
            bottom_10.append('')

        result_df = pd.DataFrame({
            '涨幅最大股票代码': top_10,
            '涨幅最小股票代码': bottom_10
        })

        return result_df


# ======================== 主程序 ========================
def main():
    print("=== 开始股票预测模型训练 (内存优化版) ===")

    try:
        print("1. 读取训练数据...")
        # 即使数据量大，也尝试一次性读取，因为后续会通过 max_sequences 和分块处理
        # 比赛数据量对于 train.csv (2015-2025) 来说，如果直接读取，需要足够的内存
        # 如果还是爆内存，考虑像参考代码一样，先保存为 feather 或 parquet 格式，然后再分块读取。
        # 或者在 main 函数中也实现分块读取（但不计算特征）。
        # 这里为了简化，假设 train.csv 能够被一次性加载。
        train_data = pd.read_csv('/kaggle/input/2025bdc/train.csv', encoding='utf-8')
        test_data = pd.read_csv('/kaggle/input/2025bdc/test.csv', encoding='utf-8')

        # 检查文件是否存在，防止路径错误
        if train_data.empty or test_data.empty:
            raise FileNotFoundError("训练或测试数据文件可能为空或路径错误。")

        print(f"原始训练数据形状: {train_data.shape}")
        print(f"原始测试数据形状: {test_data.shape}")

        # 列名检查和映射
        required_columns = ['日期', '股票代码', '开盘', '最高', '最低', '收盘', '成交量']
        column_mapping = {
            '日期': ['date', 'Date', 'DATE', 'trade_date'], '股票代码': ['code', 'Code', 'CODE', 'stock_code', 'symbol', 'ts_code'],
            '开盘': ['open', 'Open', 'OPEN'], '最高': ['high', 'High', 'HIGH'],
            '最低': ['low', 'Low', 'LOW'], '收盘': ['close', 'Close', 'CLOSE'],
            '成交量': ['volume', 'Volume', 'VOLUME', 'vol']
        }
        for df in [train_data, test_data]:
            missing_cols_in_df = [col for col in required_columns if col not in df.columns]
            if missing_cols_in_df:
                for chinese_col, possible_names in column_mapping.items():
                    if chinese_col in missing_cols_in_df:
                        for eng_col in possible_names:
                            if eng_col in df.columns:
                                df.rename(columns={eng_col: chinese_col}, inplace=True)
                                break
            final_missing = [col for col in required_columns if col not in df.columns]
            if final_missing:
                raise ValueError(f"数据帧中仍缺少必要的列: {final_missing}，即使在尝试映射后。")

        # 数据预处理
        for df in [train_data, test_data]:
            df['日期'] = pd.to_datetime(df['日期'])
            numeric_cols_to_convert = ['开盘', '最高', '最低', '收盘', '成交量', '成交额', '换手率', '振幅', '涨跌额', '涨跌幅']
            for col in numeric_cols_to_convert:
                if col in df.columns:
                    df[col] = pd.to_numeric(df[col], errors='coerce')
            df.dropna(subset=['开盘', '最高', '最低', '收盘', '成交量'], inplace=True) # 只对核心列进行dropna
            if '股票代码' in df.columns:
                 df['股票代码'] = df['股票代码'].astype(str)

        print(f"清理后训练数据形状: {train_data.shape}")
        print(f"清理后测试数据形状: {test_data.shape}")

        if train_data.empty or test_data.empty:
            print("错误：预处理后数据为空！无法继续。")
            return

        print("2. 初始化预测器并训练模型...")
        predictor = StockPredictor(
            window_size=20,
            hidden_size=128,
            num_layers=3,
            dropout=0.3
        )

        # 训练模型，并设定 max_sequences 限制训练数据量
        # 这个参数非常关键，用于控制传入LSTM的序列数量，防止OOM
        # 根据你的显存大小调整这个值
        # 100000 序列可能在 8GB 或 16GB GPU 上是可行的，如果仍OOM，请降低
        predictor.train(
            train_data,
            num_epochs=50,
            batch_size=64,
            n_splits=5,
            max_sequences=300000 # 限制训练序列总数，防止内存溢出。请根据你的显存大小调整。
        )

        print("3. 生成预测结果...")
        result = predictor.predict(test_data)

        print("4. 保存结果...")
        result.to_csv('result.csv', index=False, encoding='utf-8')
        print("预测结果已保存到 result.csv")
        print("预测结果样本:")
        print(result.head())

        # 清理最终内存
        del train_data, test_data, result, predictor
        gc.collect()
        torch.cuda.empty_cache()

        print("\n=== 内存优化训练完成 ===")

    except Exception as e:
        print(f"训练或预测过程发生错误: {e}")
        import traceback
        traceback.print_exc() # 打印详细错误栈

        # 确保在出错时也尝试释放GPU内存
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

if __name__ == "__main__":
    main()

=== 开始股票预测模型训练 (内存优化版) ===
1. 读取训练数据...
训练或预测过程发生错误: [Errno 2] No such file or directory: '/kaggle/input/2025bdc/train.csv'


Traceback (most recent call last):
  File "C:\Users\骆鞠恒\AppData\Local\Temp\ipykernel_66300\3324082121.py", line 289, in main
    train_data = pd.read_csv('/kaggle/input/2025bdc/train.csv', encoding='utf-8')
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\programming\1111ANCONDAZ_END\Lib\site-packages\pandas\io\parsers\readers.py", line 912, in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\programming\1111ANCONDAZ_END\Lib\site-packages\pandas\io\parsers\readers.py", line 577, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\programming\1111ANCONDAZ_END\Lib\site-packages\pandas\io\parsers\readers.py", line 1407, in __init__
    self._engine = self._make_engine(f, self.engine)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\programming\1111ANCONDAZ_END\Lib\site-packages\pandas\io\parsers\