In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams

# 设置字体，解决中文乱码
rcParams['font.sans-serif'] = ['SimHei']
rcParams['axes.unicode_minus'] = False
# 配置路径
data_dir = 'E:/python数据处理/基于技术指标与时间序列建模的股票价格预测研究/股票数据/结果图'
save_dir = os.path.join(data_dir, '结果图_预测')
os.makedirs(save_dir, exist_ok=True)


In [3]:
def evaluate(true, pred):
    rmse = np.sqrt(mean_squared_error(true, pred))
    mae = mean_absolute_error(true, pred)
    return rmse, mae

def forecast_arima(series, name):
    train, test = series[:-10], series[-10:]
    try:
        model = ARIMA(train, order=(5, 1, 2))
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=10)

        rmse, mae = evaluate(test, forecast)

        plt.figure(figsize=(10, 5))
        plt.plot(series, label='历史收盘价')
        plt.plot(forecast.index, forecast, label='ARIMA预测', color='red')
        plt.title(f'{name} - ARIMA预测')
        plt.xlabel('日期'); plt.ylabel('收盘价')
        plt.legend()
        plt.tight_layout()
        plt.savefig(f'{save_dir}/{name}_ARIMA预测.png')
        plt.close()

        return rmse, mae
    except Exception as e:
        print(f'[ARIMA] {name} 出错: {e}')
        return None, None

def forecast_prophet(series, name):
    prophet_df = series.reset_index()
    prophet_df.columns = ['ds', 'y']
    try:
        model = Prophet()
        model.fit(prophet_df[:-10])

        future = model.make_future_dataframe(periods=10)
        forecast = model.predict(future)

        rmse, mae = evaluate(prophet_df['y'][-10:], forecast['yhat'][-10:])

        fig = model.plot(forecast)
        plt.title(f'{name} - Prophet预测')
        plt.tight_layout()
        fig.savefig(f'{save_dir}/{name}_Prophet预测.png')
        plt.close()

        return rmse, mae
    except Exception as e:
        print(f'[Prophet] {name} 出错: {e}')
        return None, None


In [4]:
results = []

for file in os.listdir(data_dir):
    if file.endswith('_with_ta.csv'):
        path = os.path.join(data_dir, file)
        name = file.split('_')[0]

        try:
            df = pd.read_csv(path, parse_dates=['日期'])
            df = df.sort_values('日期')
            df.set_index('日期', inplace=True)
            series = df['收盘']

            print(f'正在处理：{name}')

            arima_rmse, arima_mae = forecast_arima(series, name)
            prophet_rmse, prophet_mae = forecast_prophet(series, name)

            results.append({
                '股票': name,
                'ARIMA_RMSE': arima_rmse,
                'ARIMA_MAE': arima_mae,
                'Prophet_RMSE': prophet_rmse,
                'Prophet_MAE': prophet_mae
            })
        except Exception as e:
            print(f'跳过 {file}: {e}')

# 保存所有评价指标
result_df = pd.DataFrame(results)
result_df[['ARIMA_RMSE', 'ARIMA_MAE', 'Prophet_RMSE', 'Prophet_MAE']] = result_df[['ARIMA_RMSE', 'ARIMA_MAE', 'Prophet_RMSE', 'Prophet_MAE']].round(3)
result_df.to_csv(os.path.join(save_dir, '预测结果评估指标.csv'), index=False)
result_df


正在处理：000333


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
23:51:33 - cmdstanpy - INFO - Chain [1] start processing
23:51:34 - cmdstanpy - INFO - Chain [1] done processing


正在处理：000858


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
23:51:36 - cmdstanpy - INFO - Chain [1] start processing
23:51:36 - cmdstanpy - INFO - Chain [1] done processing


正在处理：002415


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  return get_prediction_index(
23:51:38 - cmdstanpy - INFO - Chain [1] start processing
23:51:38 - cmdstanpy - INFO - Chain [1] done processing


正在处理：300750


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
23:51:40 - cmdstanpy - INFO - Chain [1] start processing
23:51:40 - cmdstanpy - INFO - Chain [1] done processing


正在处理：600519


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
23:51:42 - cmdstanpy - INFO - Chain [1] start processing
23:51:42 - cmdstanpy - INFO - Chain [1] done processing


正在处理：600893


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
23:51:43 - cmdstanpy - INFO - Chain [1] start processing
23:51:43 - cmdstanpy - INFO - Chain [1] done processing


正在处理：601318


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
23:51:45 - cmdstanpy - INFO - Chain [1] start processing
23:51:45 - cmdstanpy - INFO - Chain [1] done processing


正在处理：688981


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
23:51:46 - cmdstanpy - INFO - Chain [1] start processing
23:51:47 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,股票,ARIMA_RMSE,ARIMA_MAE,Prophet_RMSE,Prophet_MAE
0,333,1.47,1.252,4.766,4.611
1,858,4.334,3.934,6.361,5.626
2,2415,0.149,0.108,1.096,1.011
3,300750,8.525,6.722,7.638,6.52
4,600519,29.311,24.265,132.261,125.881
5,600893,0.63,0.585,1.123,1.001
6,601318,1.371,1.307,1.481,1.255
7,688981,1.697,1.539,20.862,20.789


In [5]:
import os
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from matplotlib import rcParams

# 中文字体设置
rcParams['font.sans-serif'] = ['SimHei']
rcParams['axes.unicode_minus'] = False

# 文件夹路径
folder = 'E:/python数据处理/基于技术指标与时间序列建模的股票价格预测研究/股票数据/结果图'
output_folder = os.path.join(folder, 'lightgbm预测图')
os.makedirs(output_folder, exist_ok=True)

# 存放评估结果
results = []

# 遍历文件夹中所有带 _with_ta.csv 的文件
for filename in os.listdir(folder):
    if filename.endswith('_with_ta.csv'):
        filepath = os.path.join(folder, filename)
        try:
            # 读取数据
            df = pd.read_csv(filepath, parse_dates=['日期'])
            df = df.sort_values('日期')

            # 构造目标变量（次日收盘价）
            df['target'] = df['收盘'].shift(-1)

            # 构造特征
            features = ['MA20', 'MACD', 'MACD_signal', 'MACD_hist', 'RSI', 'BOLL_mid', 'BOLL_upper', 'BOLL_lower']
            df = df.dropna()

            X = df[features]
            y = df['target']

            # 划分训练/测试集
            train_size = int(len(df) * 0.8)
            X_train, X_test = X[:train_size], X[train_size:]
            y_train, y_test = y[:train_size], y[train_size:]
            dates_test = df['日期'][train_size:]

            # 模型训练与预测
            model = lgb.LGBMRegressor()
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            # 评估指标
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
            mae = mean_absolute_error(y_test, y_pred)

            # 提取股票代码和名称
            stock_code = filename.split('_')[0]
            stock_name = filename.split('_')[1] if '_' in filename else filename

            results.append({
                '股票代码': stock_code,
                '文件名': filename,
                'RMSE': round(rmse, 3),
                'MAE': round(mae, 3)
            })

            # 绘图
            plt.figure(figsize=(12, 6))
            plt.plot(dates_test, y_test.values, label='真实值')
            plt.plot(dates_test, y_pred, label='预测值')
            plt.title(f'{stock_code} 预测效果（LightGBM）')
            plt.xlabel('日期')
            plt.ylabel('收盘价')
            plt.legend()
            plt.grid(True)
            plt.tight_layout()

            # 保存图像
            output_path = os.path.join(output_folder, f'{stock_code}_预测图.png')
            plt.savefig(output_path)
            plt.close()

        except Exception as e:
            print(f"{filename} 处理失败: {e}")

# 输出评估结果表
results_df = pd.DataFrame(results)
print("预测结果评估：")
print(results_df)

# 可选：保存为 Excel/CSV 文件
results_df.to_csv(os.path.join(output_folder, 'LightGBM预测指标汇总.csv'), index=False, encoding='utf-8-sig')


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 942, number of used features: 8
[LightGBM] [Info] Start training from score 58.820234
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000074 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 942, number of used features: 8
[LightGBM] [Info] Start training from score 187.693705
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000060 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 9

In [6]:
import pandas as pd

# 路径设置
origin_path = "E:/python数据处理/基于技术指标与时间序列建模的股票价格预测研究/股票数据/结果图/结果图_预测/预测结果评估指标.csv"
lightgbm_path = "E:/python数据处理/基于技术指标与时间序列建模的股票价格预测研究/股票数据/结果图/lightgbm预测图/LightGBM预测指标汇总.csv"

# 读取两个文件
origin_df = pd.read_csv(origin_path)
lightgbm_df = pd.read_csv(lightgbm_path)

# 提取股票名称，方便 merge
lightgbm_df['股票'] = lightgbm_df['股票代码']

# 合并数据
merged_df = pd.merge(origin_df, lightgbm_df[['股票', 'RMSE', 'MAE']], on='股票', how='left')
merged_df.rename(columns={
    'RMSE': 'LightGBM_RMSE',
    'MAE': 'LightGBM_MAE'
}, inplace=True)

# 保存合并后的新文件
merged_df.to_csv("E:/python数据处理/基于技术指标与时间序列建模的股票价格预测研究/股票数据/结果图/结果图_预测/预测结果评估指标_更新.csv", index=False, encoding='utf-8-sig')


In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# 读取合并后的评估指标文件
df = pd.read_csv(
    "E:/python数据处理/基于技术指标与时间序列建模的股票价格预测研究/股票数据/结果图/结果图_预测/预测结果评估指标_更新.csv"
)

# 设置中文支持
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

x = df['股票']
bar_width = 0.25
index = range(len(x))

# 保存图像的文件夹路径
save_folder = r"E:\python数据处理\基于技术指标与时间序列建模的股票价格预测研究\股票数据\结果图\预测对比图"
os.makedirs(save_folder, exist_ok=True)  # 文件夹不存在则创建

# RMSE对比图
plt.figure(figsize=(14,7))
plt.bar(index, df['ARIMA_RMSE'], bar_width, label='ARIMA_RMSE')
plt.bar([i + bar_width for i in index], df['Prophet_RMSE'], bar_width, label='Prophet_RMSE')
plt.bar([i + 2*bar_width for i in index], df['LightGBM_RMSE'], bar_width, label='LightGBM_RMSE')

plt.xticks([i + bar_width for i in index], x)
plt.ylabel('RMSE')
plt.title('ARIMA、Prophet与LightGBM模型RMSE对比')
plt.legend()
plt.tight_layout()

# 保存RMSE图
rmse_path = os.path.join(save_folder, '模型RMSE对比图.png')
plt.savefig(rmse_path)
plt.close()

# MAE对比图
plt.figure(figsize=(14,7))
plt.bar(index, df['ARIMA_MAE'], bar_width, label='ARIMA_MAE')
plt.bar([i + bar_width for i in index], df['Prophet_MAE'], bar_width, label='Prophet_MAE')
plt.bar([i + 2*bar_width for i in index], df['LightGBM_MAE'], bar_width, label='LightGBM_MAE')

plt.xticks([i + bar_width for i in index], x)
plt.ylabel('MAE')
plt.title('ARIMA、Prophet与LightGBM模型MAE对比')
plt.legend()
plt.tight_layout()

# 保存MAE图
mae_path = os.path.join(save_folder, '模型MAE对比图.png')
plt.savefig(mae_path)
plt.close()

print(f"对比图已保存到：\n{rmse_path}\n{mae_path}")


对比图已保存到：
E:\python数据处理\基于技术指标与时间序列建模的股票价格预测研究\股票数据\结果图\预测对比图\模型RMSE对比图.png
E:\python数据处理\基于技术指标与时间序列建模的股票价格预测研究\股票数据\结果图\预测对比图\模型MAE对比图.png
