In [1]:
# src/data_ingestion/downloader.py
import yfinance as yf
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import numpy as np  
import datetime
import ta  # 技术分析库

In [2]:
# 定义下载数据的函数
def download_stock_data(tickers, start_date, end_date, auto_adjust=True, prepost=False):
    """
    批量下载股票数据
    :param tickers: 股票代码列表
    :param start_date: 开始日期
    :param end_date: 结束日期
    :return: 包含所有股票数据的 Pandas DataFrame
    """
    # 获取原始数据
    df = yf.download(
        tickers=tickers,
        start=start_date,
        end=end_date,
        group_by='ticker',
        auto_adjust=auto_adjust,
        prepost=prepost,
        threads=True,  # 启用多线程下载
        progress=True  # 显示进度条
    )

    # 调整 DataFrame 结构，去掉多重索引
    df.columns = df.columns.droplevel('Ticker')
    df.columns.name = None

    # 添加分析指标
    df['Daily Return'] = df['Close'].pct_change() * 100  # 日收益率，反映了价格变动的幅度
    df['Log Return'] = np.log(df['Close'] / df['Close'].shift(1))  # 对数收益率
    df['Range Pct'] = (df['High'] - df['Low']) / df['Low'] * 100  # 用于衡量资产在一天内的价格波动程度
    df['MA10'] = df['Close'].rolling(10).mean()
    df['MA50'] = df['Close'].rolling(50).mean()
    df['Volatility'] = df['Log Return'].rolling(20).std() * np.sqrt(252)  # 波动率
    df['RSI'] = ta.momentum.RSIIndicator(df['Close']).rsi()  # RSI 指标

    return df

In [3]:
# 下载股票数据
start_date = '2025-01-01'
end_date = '2025-04-01'
ticker = ['PDD']

df = download_stock_data( ticker , start_date , end_date )
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Daily Return,Log Return,Range Pct,MA10,MA50,Volatility,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-01-02,95.589996,98.459999,95.5,96.889999,4898800,,,3.099475,,,,
2025-01-03,97.989998,98.480003,95.919998,96.82,4668900,-0.072247,-0.000723,2.668896,,,,
2025-01-06,101.410004,103.300003,98.440002,99.019997,10435500,2.272255,0.022468,4.937018,,,,
2025-01-07,99.129997,100.779999,98.629997,100.620003,6759000,1.615841,0.016029,2.179866,,,,
2025-01-08,99.059998,101.349998,98.599998,100.32,6102600,-0.298154,-0.002986,2.789047,,,,
