# 股票预测模型

本notebook用于加载预训练模型并生成股票预测结果

In [25]:
# 导入必要的库
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from mamba import Mamba, MambaConfig
import glob
import os

## 模型定义

In [26]:
# 定义模型参数
class Args:
    def __init__(self):
        self.hidden = 16
        self.layer = 2
        self.cuda = torch.cuda.is_available()

args = Args()

# 定义模型结构
class Net(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.config = MambaConfig(d_model=args.hidden, n_layers=args.layer)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim, args.hidden),
            Mamba(self.config),
            nn.Linear(args.hidden, out_dim),
            nn.Tanh()
        )
    def forward(self, x):
        x = self.mamba(x)
        return x.flatten()

## 数据处理和预测函数

In [27]:
def handle_outliers(df, columns, n_sigmas=3):
    """处理异常值"""
    for col in columns:
        mean = df[col].mean()
        std = df[col].std()
        df[col] = df[col].clip(mean - n_sigmas * std, mean + n_sigmas * std)
    return df

def predict_stock(model, data):
    """使用模型进行预测"""
    model.eval()
    with torch.no_grad():
        if args.cuda:
            data = data.cuda()
        output = model(data)
        if args.cuda:
            output = output.cpu()
    return output.numpy().flatten()[0]

## 加载预训练模型并进行预测

In [28]:
# 定义特征列表
features = [
    'open', 'high', 'low', 'vol', 'amount',
    'turnover_rate', 'turnover_rate_f', 'volume_ratio',
    'pe', 'pe_ttm', 'pb', 'ps', 'ps_ttm',
    'total_share', 'float_share', 'free_share', 'total_mv', 'circ_mv',
    'ma_bfq_5', 'ma_bfq_10', 'ma_bfq_20', 'ma_bfq_30',
    'ema_bfq_5', 'ema_bfq_10', 'ema_bfq_20',
    'macd_dif_bfq', 'macd_dea_bfq', 'macd_bfq',
    'kdj_k_bfq', 'kdj_d_bfq',
    'rsi_bfq_6', 'rsi_bfq_12',
    'boll_upper_bfq', 'boll_mid_bfq', 'boll_lower_bfq',
    'vr_bfq', 'obv_bfq'
]

# 读取所有股票数据并进行预测
data_list = []
stock_files = glob.glob('stock/merged_stock data/*.csv')

for stock_file in stock_files:
    ts_code = os.path.basename(stock_file).replace('.csv', '')
    
    # 读取数据
    data = pd.read_csv(stock_file)
    data['trade_date'] = pd.to_datetime(data['trade_date'], format='%Y%m%d')
    data = data.sort_values('trade_date').reset_index(drop=True)
    
    # 删除不需要的列
    data.drop(columns=['pre_close', 'change', 'pct_chg', 'close'], inplace=True)
    
    # 检查并移除完全缺失的特征
    valid_features = [f for f in features if not data[f].isna().all()]
    
    # 处理异常值和缺失值
    data = handle_outliers(data, valid_features)
    for feature in valid_features:
        data[feature] = data[feature].fillna(method='ffill').fillna(method='bfill')
    
    # 准备预测数据
    test_data = data[valid_features].values[-1:, :]
    test_tensor = torch.from_numpy(test_data).float().unsqueeze(0)
    
    # 加载模型并预测
    model = Net(len(valid_features), 1)
    clean_ts_code = ts_code.replace('\\', '').replace('/', '')
    model_path = os.path.join('models', f'{clean_ts_code}_model.pth')
    model.load_state_dict(torch.load(model_path))
    if args.cuda:
        model = model.cuda()
    
    pred_pct_chg = predict_stock(model, test_tensor) * 100
    
    data_list.append({
        'ts_code': ts_code,
        'pct_chg': pred_pct_chg
    })

## 输出预测结果

In [30]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.6f' % x)

# 生成结果表格
result_df = pd.DataFrame(data_list)
print("预测结果：")
display(result_df)

# 保存预测结果
result_df.to_csv('predictions.csv', index=False)

预测结果：


Unnamed: 0,ts_code,pct_chg
0,000001.SZ,0.081912
1,000002.SZ,2.035935
2,000063.SZ,1.128226
3,000100.SZ,0.778605
4,000157.SZ,-0.01346
5,000166.SZ,1.00561
6,000301.SZ,-0.093606
7,000333.SZ,0.041038
8,000338.SZ,-0.872489
9,000408.SZ,-0.183143
