In [1]:
import pandas as pd
import numpy as np
import datetime
import os

from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl import config_tickers
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3

%matplotlib inline
check_and_make_directories([TRAINED_MODEL_DIR])
import itertools
import talib

In [30]:
df = pd.read_csv("A股.csv",usecols=lambda x: x != 'Unnamed: 0')
df.rename(columns={
    'time': 'date',
    'code': 'tic'
}, inplace=True)
df_cleaned = df.dropna(subset=['close'])
df_raw = df_cleaned

In [25]:
# 查询最早的时间
earliest_date = df_raw['date'].min()
print(f"最早的时间是: {earliest_date}")
# 查询最晚的时间
latest_date = df_raw['date'].max()
print(f"最晚的时间是: {latest_date}")

最早的时间是: 2005-01-04
最晚的时间是: 2025-09-10


In [26]:
TRAIN_START_DATE = '2005-01-01'
TRAIN_END_DATE = '2014-12-31'
TRADE_START_DATE = '2015-01-01'
TRADE_END_DATE = '2025-09-10'

In [None]:
import numpy as np
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import talib

def calculate_macd_for_ticker(data):
    """
    为单个股票计算MACD指标
    :param data: 单个股票的 DataFrame 数据
    :return: 包含 MACD 列的 DataFrame
    """
    # 计算 MACD 指标
    macd, macd_signal, macd_histogram = talib.MACD(data['close'], 
                           fastperiod=12, 
                           slowperiod=26, 
                           signalperiod=9)
    
    # 将结果添加到数据中
    data['macd'] = macd
    data['macd_signal'] = macd_signal
    data['macd_histogram'] = macd_histogram
    return data

def calculate_macd_threaded(df):
    """
    多线程计算 MACD 指标
    :param df: 原始 DataFrame
    :return: 包含 MACD 列的 DataFrame
    """
    # 按股票分组并排序
    grouped = df.groupby('tic').apply(lambda x: x.sort_values('date'))
    
    # 使用线程池并行处理每个股票
    with ThreadPoolExecutor(max_workers=8) as executor:
        results = list(executor.map(calculate_macd_for_ticker, 
                                  [group for _, group in grouped]))
    
    # 合并所有结果
    result_df = pd.concat(results).reset_index(drop=True)
    return result_df

# 应用函数
df_raw = calculate_macd_threaded(df_raw)


In [32]:
df_raw.head()

Unnamed: 0,date,tic,open,close,high,low,volume,money,macd
0,2005-01-04,000001.XSHE,1.25,1.24,1.25,1.23,9264005.0,11465603.0,
1,2005-01-04,000882.XSHE,1.91,1.92,1.93,1.9,439106.0,839582.0,
2,2005-01-04,000537.XSHE,2.48,2.46,2.49,2.44,227832.0,560524.0,
3,2005-01-04,600538.XSHG,2.43,2.33,2.43,2.33,535624.0,1266614.0,
4,2005-01-04,600784.XSHG,1.62,1.58,1.62,1.56,1493072.0,2369522.0,


In [28]:
# 计算布林带指标
def calculate_bollinger_bands(group):
    # 确保数据按日期排序
    group = group.sort_values('date')
    
    # 计算布林带
    close_prices = group['close'].values
    upper, middle, lower = talib.BBANDS(close_prices, 
                                       timeperiod=20, 
                                       nbdevup=2, 
                                       nbdevdn=2, 
                                       matype=0)
    
    # 添加到组中
    group = group.copy()
    group['bb_upper'] = upper      # 布林带上轨
    group['bb_middle'] = middle    # 布林带中轨（20日移动平均线）
    group['bb_lower'] = lower      # 布林带下轨
    
    return group

# 按股票代码分组计算布林带
df_raw = df_raw.groupby('tic', group_keys=False).apply(calculate_bollinger_bands, include_groups=False).reset_index(drop=True)

print("布林带计算完成！")
print("新增的列:", ['bb_upper', 'bb_middle', 'bb_lower'])

KeyError: 'tic'