In [1]:
import os
import pandas as pd
import akshare as ak
import glob
import json
from datetime import datetime
from utilities.get_stock_data import get_stock_market_data, get_industry_stock_mapping_data

In [2]:
stock_zh_a_spot_em_df = get_stock_market_data()
industry_stock_mapping_df = get_industry_stock_mapping_data()

In [3]:
def validate_stock_name(stock_code, stock_name, df):
    try:
        actual_name = df[df["代码"] == stock_code]["名称"].values[0]
        if actual_name != stock_name:
            raise ValueError(
                f"Stock name mismatch for {stock_code}: {stock_name} != {actual_name}"
            )
    except (IndexError, KeyError):
        raise ValueError(f"Stock code {stock_code} not found")

In [4]:
# TODO: Concurrent processing of multiple stocks in an industry
def stock_analysis(industry_name, stock_code, stock_name, days=29):

    print(f"Processing {stock_name} ({stock_code}) in {industry_name} industry...")
    # Determine the market based on the stock code
    if stock_code.startswith("6"):
        market = "sh"
    elif stock_code.startswith("0") or stock_code.startswith("3"):
        market = "sz"
    else:
        market = "bj"

    # Extract the stock's market data
    stock_total_market_value = (
        stock_zh_a_spot_em_df[stock_zh_a_spot_em_df["代码"] == stock_code][
            "总市值"
        ].values[0]
        / 1e8
    )  # Convert to billions
    stock_circulating_market_value = (
        stock_zh_a_spot_em_df[stock_zh_a_spot_em_df["代码"] == stock_code][
            "流通市值"
        ].values[0]
        / 1e8
    )  # Convert to billions
    stock_pe_dynamic = stock_zh_a_spot_em_df[
        stock_zh_a_spot_em_df["代码"] == stock_code
    ]["市盈率-动态"].values[0]
    stock_pb = stock_zh_a_spot_em_df[
        stock_zh_a_spot_em_df["代码"] == stock_code
    ]["市净率"].values[0]
    stock_60d_change = stock_zh_a_spot_em_df[
        stock_zh_a_spot_em_df["代码"] == stock_code
    ]["60日涨跌幅"].values[0]
    stock_ytd_change = stock_zh_a_spot_em_df[
        stock_zh_a_spot_em_df["代码"] == stock_code
    ]["年初至今涨跌幅"].values[0]

    # Extract the historical data of the stock
    stock_individual_fund_flow_df = ak.stock_individual_fund_flow(
        stock=stock_code, market=market
    )
    if len(stock_individual_fund_flow_df) < days:
        print(
            f"Skipping {stock_name} ({stock_code}) due to insufficient data for the last {days} days."
        )
        return None
    stock_individual_fund_flow_df = stock_individual_fund_flow_df.iloc[-days:]
    # Get the main net inflow data
    stock_main_net_flow = stock_individual_fund_flow_df["主力净流入-净额"].sum()
    stock_main_net_flow = round(stock_main_net_flow / 1e8, 2)  # Convert to billions
    # Calculate change percentage
    stock_1st_price = stock_individual_fund_flow_df.iloc[-days]["收盘价"]
    stock_last_price = stock_individual_fund_flow_df.iloc[-1]["收盘价"]
    stock_price_change_percentage = (
        (stock_last_price - stock_1st_price) / stock_1st_price * 100
    )
    stock_price_change_percentage = round(stock_price_change_percentage, 2)

    return [
        industry_name,
        stock_code,
        stock_name,
        stock_total_market_value,
        stock_circulating_market_value,
        stock_pe_dynamic,
        stock_pb,
        stock_main_net_flow,
        stock_price_change_percentage,
        stock_60d_change,
        stock_ytd_change
    ]

In [5]:
DIR_PATH = "data/holding_stocks"
days = 29
# Initialize a pandas Dataframe to hold industry names, industry main net flow, and industry index change percentage
df = pd.DataFrame(
    columns=[
        "账户",
        "行业",
        "代码",
        "名称",
        "总市值(亿)",
        "流通市值(亿)",
        "市盈率-动态",
        "市净率",
        f"{days}日主力净流入-总净额(亿)",
        f"{days}日涨跌幅(%)",
        "60日涨跌幅(%)",
        "年初至今涨跌幅(%)",
    ]
)

for file in glob.glob(os.path.join(DIR_PATH, "*.json")):
    with open(file, "r") as f:
        account_name = os.path.splitext(os.path.basename(file))[0]
        holding_stocks = json.load(f)
        for stock_code, stock_name in holding_stocks.items():
            validate_stock_name(stock_code, stock_name, stock_zh_a_spot_em_df)
            industry_name = industry_stock_mapping_df[
                industry_stock_mapping_df["代码"] == stock_code
            ]["行业"].values[0]
            result = stock_analysis(
                industry_name=industry_name,
                stock_code=stock_code,
                stock_name=stock_name,
                days=days,
            )
            if result is not None:
                df.loc[len(df)] = [f"{account_name}"] + result

Processing 凌云股份 (600480) in 汽车零部件 industry...
Processing 普洛药业 (000739) in 化学制药 industry...
Processing 润贝航科 (001316) in 贸易行业 industry...
Processing 欣旺达 (300207) in 电池 industry...
Processing 上能电气 (300827) in 光伏设备 industry...
Processing 德固特 (300950) in 专用设备 industry...
Processing 怡合达 (301029) in 通用设备 industry...
Processing 润贝航科 (001316) in 贸易行业 industry...
Processing 上能电气 (300827) in 光伏设备 industry...
Processing 韵达股份 (002120) in 物流行业 industry...
Processing 精达股份 (600577) in 电网设备 industry...
Processing 雪榕生物 (300511) in 农牧饲渔 industry...


In [6]:
# Define the report date
last_date = ak.stock_sector_fund_flow_hist(symbol="证券").iloc[-1]["日期"]
last_date_str = last_date.strftime("%Y%m%d")
# Output the df to a CSV file
df.to_csv(f"{DIR_PATH}/reports/持股报告-{last_date_str}.csv", index=True)
print(f"Report saved to {DIR_PATH}/reports/持股报告-{last_date_str}.csv")

0it [00:00, ?it/s]

Report saved to data/holding_stocks/reports/持股报告-20250714.csv
