In [8]:
import json
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 读取JSON数据
with open('./full_data/2024_stock_backtest_data.json', 'r') as f:
    data = json.load(f)

# 数据预处理
stock_data = []
for entry in data:
    for stock in entry['data']:
        stock_data.append({
            'date': entry['date'],
            'code': stock['code'],
            'price': float(stock['price']),
            'market_value': float(stock['market_value'].replace('亿', '')) * 1e8,
            'limit_ups': stock['limit_ups'],
            'limit_downs': stock['limit_downs'],
            'current_opening_increase': float(stock['current_opening_increase'].replace('%', '')) / 100,
            'next_opening_increase': float(stock['next_opening_increase'].replace('%', '')) / 100,
            'next_close_increase': float(stock['next_close_increase'])
        })

df = pd.DataFrame(stock_data)

# 特征提取
df['price_diff'] = df['price'].diff()
df['price_pct_change'] = df['price'].pct_change()
df['market_value_diff'] = df['market_value'].diff()

# 数据标准化
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[['price', 'price_diff', 'price_pct_change', 'market_value', 'market_value_diff']])

# 划分数据集
X = scaled_features[:-1]
y = df['next_close_increase'][1:]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 构建LSTM模型
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')
model.summary()

# 训练模型
X_train_reshaped = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

model.fit(X_train_reshaped, y_train, epochs=50, batch_size=32, validation_data=(X_test_reshaped, y_test))

# 评估模型
predictions = model.predict(X_test_reshaped)
print("模型评估：")
print("均方误差（MSE）：", np.mean((predictions - y_test)**2))

# 保存模型
model.save('stock_prediction_model.h5')


AnalyzeExit: Terminating analyze.py execution