# Import

In [1]:
import os
import sys
import logging
import time
import math
import multiprocessing
import pandas as pd
import numpy as np
import sqlalchemy
import exchange_calendars as xcals
from dotenv import load_dotenv

# import exchange_calendars as xcals
from datetime import datetime, timedelta

# import pytz
# import pandas as pd
# from IPython.display import display, HTML
from sqlalchemy import create_engine, text
from sqlalchemy.dialects.postgresql import insert
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache

import akshare as ak

# Init

In [2]:
t_start = time.time()

load_dotenv()  # take environment variables from .env.

# module_path = os.getenv("LOCAL_AKSHARE_DEV_MODULE")
# if module_path is not None and module_path not in sys.path:
#     sys.path.insert(0, module_path)
import akshare as ak  # noqa: E402

print(ak.__version__)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

# Create an engine instance
alchemyEngine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}",
    pool_recycle=3600,
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)

file_handler = logging.FileHandler("etl.log")
console_handler = logging.StreamHandler()

# Step 4: Create a formatter
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")

# Step 5: Attach the formatter to the handlers
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Step 6: Add the handlers to the logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

xshg = xcals.get_calendar("XSHG")

1.16.83


# Helper functions

In [3]:
def update_on_conflict(table_def, conn, df: pd.DataFrame, primary_keys):
    """
    Insert new records, update existing records without nullifying columns not included in the dataframe
    """
    # Load the table metadata
    # table = sqlalchemy.Table(table, sqlalchemy.MetaData(), autoload_with=conn)
    # Create an insert statement from the DataFrame records
    insert_stmt = insert(table_def).values(df.to_dict(orient="records"))

    if hasattr(table_def, "__table__"):
        table_columns = table_def.__table__.columns
    else:
        table_columns = table_def.columns

    # Build a dictionary of column values to be updated, excluding primary keys and non-existent columns
    update_dict = {
        c.name: insert_stmt.excluded[c.name]
        for c in table_columns
        if c.name in df.columns and c.name not in primary_keys
    }
    # Construct the on_conflict_do_update statement
    on_conflict_stmt = insert_stmt.on_conflict_do_update(
        index_elements=primary_keys, set_=update_dict
    )
    # Execute the on_conflict_do_update statement
    conn.execute(on_conflict_stmt)


def ignore_on_conflict(table, conn, df, primary_keys):
    """
    Insert new records, ignore existing records
    """
    table = sqlalchemy.Table(table, sqlalchemy.MetaData(), autoload_with=conn)
    insert_stmt = insert(table).values(df.to_dict(orient="records"))
    on_conflict_stmt = insert_stmt.on_conflict_do_nothing(index_elements=primary_keys)
    conn.execute(on_conflict_stmt)


def get_max_for_column(conn, symbol, table, col_for_max="date", non_null_col=None):
    query = f"SELECT max({col_for_max}) FROM {table} where 1=1"
    if non_null_col is not None:
        query += f" and {non_null_col} is not null"
    if symbol is not None:
        query += " and symbol = :symbol"
        result = conn.execute(text(query), {"symbol": symbol})
    else:
        result = conn.execute(text(query))
    return result.fetchone()[0]


def saveAsCsv(file_name_main: str, df):
    """
    Save dataframe to CSV file
    """
    # save to file
    # Get the current timestamp to append to the filename
    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    # Save the dataframe to a csv file with timestamp as suffix. Need to properly encode and display Chinese characters.
    df.to_csv(f"{file_name_main}_{current_time}.csv", encoding="utf_8_sig", index=False)


@lru_cache()
def last_trade_date():
    current_date = datetime.now().date()
    # Iterate backwards from current_date until a valid session is found
    last_session = current_date
    while not xshg.is_session(last_session):
        last_session -= timedelta(days=1)
    return last_session

# fund_etf_spot_em 

In [21]:
fund_etf_spot_em_df = ak.fund_etf_spot_em()
print(fund_etf_spot_em_df)

          代码          名称    最新价  IOPV实时估值  基金折价率    涨跌额    涨跌幅       成交量  \
0     159698       粮食ETF  0.912    0.9061  -0.65  0.023   2.59     89849   
1     159616       农牧ETF  0.720    0.7176  -0.33  0.017   2.42    143881   
2     159827     农业50ETF  0.728    0.7268  -0.17  0.016   2.25     47874   
3     516810     农业50ETF  0.688    0.6877  -0.04  0.015   2.23    144733   
4     159825       农业ETF  0.681    0.6803  -0.10  0.014   2.10   1245589   
...      ...         ...    ...       ...    ...    ...    ...       ...   
1000  159529     标普消费ETF  1.586    1.3032 -21.70 -0.065  -3.94  12096490   
1001  517900     银行ETF优选  1.312    1.3080  -0.31 -0.090  -6.42    444315   
1002  520890  港股通红利低波ETF  1.228    1.2051  -1.90 -0.136  -9.97     84746   
1003  159333   港股央企红利ETF  1.238    1.1952  -3.58 -0.138 -10.03    225864   
1004  159331     红利港股ETF  1.148    1.1050  -3.89 -0.128 -10.03    153303   

               成交额    开盘价  ...    小单净流入-净额  小单净流入-净占比      现手     买一     卖一  \
0     8.

In [22]:
fund_etf_spot_em_df[fund_etf_spot_em_df["代码"]=='159338']

Unnamed: 0,代码,名称,最新价,IOPV实时估值,基金折价率,涨跌额,涨跌幅,成交量,成交额,开盘价,...,小单净流入-净额,小单净流入-净占比,现手,买一,卖一,最新份额,流通市值,总市值,数据日期,更新时间
646,159338,中证A500ETF,0.967,0.9681,0.11,-0.002,-0.21,37598911,3646593000.0,0.968,...,5395202.0,0.15,-26221,0.967,0.968,29642090000.0,28663897796,28663897796,2024-12-27,2024-12-27 15:34:24+08:00


In [7]:
# Get laste fund / ETF data set for today (or latest trading date), and persists into database.

df = ak.fund_etf_spot_em()
df = df[
    [
        "代码",
        "名称",
        "最新价",
        "IOPV实时估值",
        "基金折价率",
        "涨跌额",
        "涨跌幅",
        "成交量",
        "成交额",
        "开盘价",
        "最高价",
        "最低价",
        "昨收",
        "换手率",
        "量比",
        "委比",
        "外盘",
        "内盘",
        "主力净流入-净额",
        "主力净流入-净占比",
        "超大单净流入-净额",
        "超大单净流入-净占比",
        "大单净流入-净额",
        "大单净流入-净占比",
        "中单净流入-净额",
        "中单净流入-净占比",
        "小单净流入-净额",
        "小单净流入-净占比",
        "流通市值",
        "总市值",
        "最新份额",
        "数据日期",
        "更新时间",
    ]
]

saveAsCsv("fund_etf_spot_em", df)

# Rename the columns of df to match the table's column names
df = df.rename(
    columns={
        "数据日期": "date",
        "更新时间": "update_time",
        "代码": "code",
        "名称": "name",
        "最新价": "latest_price",
        "IOPV实时估值": "iopv",
        "基金折价率": "fund_discount_rate",
        "涨跌额": "change_amount",
        "涨跌幅": "change_rate",
        "成交量": "volume",
        "成交额": "turnover",
        "开盘价": "opening_price",
        "最高价": "highest_price",
        "最低价": "lowest_price",
        "昨收": "previous_close",
        "换手率": "turnover_rate",
        "量比": "volume_ratio",
        "委比": "order_ratio",
        "外盘": "external_disc",
        "内盘": "internal_disc",
        "主力净流入-净额": "main_force_net_inflow_amount",
        "主力净流入-净占比": "main_force_net_inflow_ratio",
        "超大单净流入-净额": "super_large_net_inflow_amount",
        "超大单净流入-净占比": "super_large_net_inflow_ratio",
        "大单净流入-净额": "large_net_inflow_amount",
        "大单净流入-净占比": "large_net_inflow_ratio",
        "中单净流入-净额": "medium_net_inflow_amount",
        "中单净流入-净占比": "medium_net_inflow_ratio",
        "小单净流入-净额": "small_net_inflow_amount",
        "小单净流入-净占比": "small_net_inflow_ratio",
        "流通市值": "circulating_market_value",
        "总市值": "total_market_value",
        "最新份额": "latest_shares",
    }
)

# with alchemyEngine.begin() as conn:
#     update_on_conflict("fund_etf_spot_em", conn, df, ["code", "date"])

# fund_etf_perf_em

In [9]:
fund_exchange_rank_em_df = ak.fund_exchange_rank_em()

saveAsCsv("fund_exchange_rank_em", fund_exchange_rank_em_df)

column_mapping = {
    "序号": "id",
    "基金代码": "fundcode",
    "基金简称": "fundname",
    "类型": "type",
    "日期": "date",
    "单位净值": "unitnav",
    "累计净值": "accumulatednav",
    "近1周": "pastweek",
    "近1月": "pastmonth",
    "近3月": "past3months",
    "近6月": "past6months",
    "近1年": "pastyear",
    "近2年": "past2years",
    "近3年": "past3years",
    "今年来": "ytd",
    "成立来": "sinceinception",
    "成立日期": "inceptiondate",
}
fund_exchange_rank_em_df.rename(columns=column_mapping, inplace=True)

# remove rows where `date` is null or NaT
fund_exchange_rank_em_df.dropna(subset=['date'], inplace=True)

with alchemyEngine.begin() as conn:
    update_on_conflict("fund_etf_perf_em", conn, fund_exchange_rank_em_df, ["fundcode"])

# Get a full list of ETF fund

In [19]:
fund_etf_spot_ths_df = ak.fund_etf_spot_ths()
print(fund_etf_spot_ths_df)

        序号    基金代码              基金名称  当前-单位净值  当前-累计净值  前一日-单位净值  前一日-累计净值  \
0        1  562900    易方达中证现代农业主题ETF   0.7358   0.7358    0.7192    0.7192   
1        2  159825       富国中证农业主题ETF   0.6800   0.6800    0.6659    0.6659   
2        3  516810       华夏中证农业主题ETF   0.6876   0.6876    0.6734    0.6734   
3        4  159827              农业50   0.7261   0.7261    0.7113    0.7113   
4        5  159587       广发国证粮食产业ETF   1.1771   1.1771    1.1545    1.1545   
...    ...     ...               ...      ...      ...       ...       ...   
1069  1070  588810      富国上证科创板芯片ETF      NaN      NaN       NaN       NaN   
1070  1071  588870   汇添富上证科创板50成份ETF      NaN      NaN       NaN       NaN   
1071  1072  588930             科创板AI      NaN      NaN       NaN       NaN   
1072  1073  588950  景顺长城上证科创板50成份ETF      NaN      NaN       NaN       NaN   
1073  1074  588960     富国上证科创板新能源ETF      NaN      NaN       NaN       NaN   

         增长值   增长率 赎回状态 申购状态      最新-交易日  最新-单位净值  最新-累计净值 基金类型

In [17]:
len(fund_etf_spot_ths_df)

1074

In [20]:
fund_etf_spot_ths_df[fund_etf_spot_ths_df["基金代码"]=='159338']

Unnamed: 0,序号,基金代码,基金名称,当前-单位净值,当前-累计净值,前一日-单位净值,前一日-累计净值,增长值,增长率,赎回状态,申购状态,最新-交易日,最新-单位净值,最新-累计净值,基金类型,查询日期
568,569,159338,国泰中证A500ETF,0.9681,0.9681,0.9694,0.9694,-0.0013,-0.13,开放,开放,2024-12-27,0.9681,0.9681,股票型,2024-12-27


In [32]:
fund_etf_category_sina_df = ak.fund_etf_category_sina(symbol="ETF基金")

In [33]:
len(fund_etf_category_sina_df)

1000

In [14]:
fund_etf_category_sina_df[fund_etf_category_sina_df["代码"].str.contains("159338")]

Unnamed: 0,代码,名称,最新价,涨跌额,涨跌幅,买入,卖出,昨收,今开,最高,最低,成交量,成交额


In [34]:
fund_etf_category_sina_df.rename(columns={"代码": "基金代码"}, inplace=True)

In [35]:
fund_etf_category_sina_df["基金代码"] = fund_etf_category_sina_df["基金代码"].str[2:]

In [36]:
fund_etf_category_sina_df

Unnamed: 0,基金代码,名称,最新价,涨跌额,涨跌幅,买入,卖出,昨收,今开,最高,最低,成交量,成交额
0,159998,计算机ETF,0.916,-0.012,-1.293,0.916,0.917,0.928,0.928,0.940,0.914,52633800,48897600
1,159997,电子ETF,1.154,-0.017,-1.452,1.154,1.155,1.171,1.167,1.180,1.150,47619029,55198721
2,159996,家电ETF,1.354,-0.013,-0.951,1.354,1.355,1.367,1.363,1.365,1.349,44515700,60383932
3,159995,芯片ETF,1.271,-0.010,-0.781,1.271,1.272,1.281,1.280,1.305,1.267,882091117,1133797660
4,159994,5GETF,0.975,-0.019,-1.911,0.975,0.977,0.994,0.993,0.997,0.972,56563830,55842533
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,510630,消费30ETF,0.972,0.002,0.206,0.972,0.973,0.970,0.968,0.977,0.965,19329200,18777710
996,510600,申万上证50ETF,3.462,-0.010,-0.288,3.460,3.465,3.472,3.472,3.482,3.450,1552256,5377778
997,510590,中证500ETF平安,6.090,0.026,0.429,6.079,6.091,6.064,6.103,6.156,6.082,344100,2107168
998,510580,中证500ETF易方达,2.958,0.009,0.305,2.955,2.957,2.949,2.944,2.996,2.944,23999476,71216192


In [48]:
merged_df = fund_etf_spot_ths_df.merge(fund_etf_category_sina_df, on=["基金代码"], how='outer', indicator=True)


In [49]:
len(merged_df)

1076

In [51]:
merged_df.columns

Index(['序号', '基金代码', '基金名称', '当前-单位净值', '当前-累计净值', '前一日-单位净值', '前一日-累计净值',
       '增长值', '增长率', '赎回状态', '申购状态', '最新-交易日', '最新-单位净值', '最新-累计净值', '基金类型',
       '查询日期', '名称', '最新价', '涨跌额', '涨跌幅', '买入', '卖出', '昨收', '今开', '最高', '最低',
       '成交量', '成交额', '_merge'],
      dtype='object')

In [43]:
rows_in_df1_not_in_df2 = merged_df[merged_df["_merge"] == "right_only"]

In [44]:
rows_in_df1_not_in_df2 = rows_in_df1_not_in_df2.drop(columns="_merge")

In [45]:
len(rows_in_df1_not_in_df2)

2

In [46]:
rows_in_df1_not_in_df2

Unnamed: 0,序号,基金代码,基金名称,当前-单位净值,当前-累计净值,前一日-单位净值,前一日-累计净值,增长值,增长率,赎回状态,...,涨跌额,涨跌幅,买入,卖出,昨收,今开,最高,最低,成交量,成交额
941,,511990,,,,,,,,,...,0.01,0.01,100.0,100.004,99.994,100.002,100.006,99.999,153567924,15357247445
950,,511880,,,,,,,,,...,0.016,0.016,101.602,101.603,101.586,101.6,101.604,101.6,114771578,11660992120


In [6]:
# retrieve list from Sina
fund_etf_category_sina_df = ak.fund_etf_category_sina(symbol="ETF基金")

# keep only 2 columns from `fund_etf_category_sina_df`: 代码, 名称.
# split `代码` values by `exchange code` and `symbol` and store into 2 columns. No need to keep the `代码` column.
# for example: 代码=sz159998, split into `exch=sz`, `symbol=159998`.
df = fund_etf_category_sina_df[["代码", "名称"]].copy()
df.columns = ["code", "name"]
df[["exch", "symbol"]] = df["code"].str.extract(r"([a-z]+)(\d+)")
df.drop(columns=["code"], inplace=True)

# Now, use the update_on_conflict function to insert or update the data
with alchemyEngine.begin() as conn:
    update_on_conflict("fund_etf_list_sina", conn, df, ["exch", "symbol"])

# Get historical trades

In [8]:
end_date = datetime.now().strftime("%Y%m%d")
start_date = (datetime.now() - timedelta(days=20)).strftime("%Y%m%d")
# start_date = '19700101' # For entire history.


# Function to fetch and process ETF data
def fetch_and_process_etf(symbol):
    try:
        df = ak.fund_etf_hist_em(
            symbol=symbol,
            period="daily",
            start_date=start_date,
            end_date=end_date,
            adjust="qfq",
        )

        # if df contains no row at all, return immediately
        if df.empty:
            return None
        
        df["symbol"] = symbol
        df = df.rename(
            columns={
                "日期": "date",
                "开盘": "open",
                "收盘": "close",
                "最高": "high",
                "最低": "low",
                "成交量": "volume",
                "成交额": "turnover",
                "振幅": "amplitude",
                "涨跌幅": "change_rate",
                "涨跌额": "change_amount",
                "换手率": "turnover_rate",
            }
        )
        df = df[
            [
                "symbol",
                "date",
                "open",
                "close",
                "high",
                "low",
                "volume",
                "turnover",
                "amplitude",
                "change_rate",
                "change_amount",
                "turnover_rate",
            ]
        ]
        with alchemyEngine.begin() as conn:
            ignore_on_conflict("fund_etf_daily_em", conn, df, ["symbol", "date"])
    except Exception:
        logging.error(
            f"failed to get daily trade history data for {symbol}", exc_info=True
        )
        return None
    return df


# Fetch the ETF list
etf_list_df = pd.read_sql("SELECT symbol FROM fund_etf_list_sina", alchemyEngine)

# get the number of CPU cores
num_cores = multiprocessing.cpu_count()

# Use ThreadPoolExecutor to fetch data in parallel
with ThreadPoolExecutor(max_workers=num_cores) as executor:
    futures = [
        executor.submit(fetch_and_process_etf, symbol)
        for symbol in etf_list_df["symbol"]
    ]
    results = [future.result() for future in futures]

# Calculate ETF Performance Metrics

## Get historical bond rate (risk-free interest rate)

In [9]:
# start_date = (datetime.now() - timedelta(days=20)).strftime('%Y%m%d')
start_date = None  # For entire history.

bzur = ak.bond_zh_us_rate(start_date)
bzur = bzur.rename(
    columns={
        "日期": "date",
        "中国国债收益率2年": "china_yield_2y",
        "中国国债收益率5年": "china_yield_5y",
        "中国国债收益率10年": "china_yield_10y",
        "中国国债收益率30年": "china_yield_30y",
        "中国国债收益率10年-2年": "china_yield_spread_10y_2y",
        "中国GDP年增率": "china_gdp_growth",
        "美国国债收益率2年": "us_yield_2y",
        "美国国债收益率5年": "us_yield_5y",
        "美国国债收益率10年": "us_yield_10y",
        "美国国债收益率30年": "us_yield_30y",
        "美国国债收益率10年-2年": "us_yield_spread_10y_2y",
        "美国GDP年增率": "us_gdp_growth",
    }
)
with alchemyEngine.begin() as conn:
    ignore_on_conflict("bond_metrics_em", conn, bzur, ["date"])

                                               

## Calc / Update metrics in fund_etf_perf_em table

In [6]:
interval = 250  # assume 250 trading days annualy
end_date = last_trade_date()
# start_date = (end_date - timedelta(days=interval)).strftime('%Y%m%d')
# start_date = '19700101' # For entire history.

# load historical data from daily table and calc metrics, then update perf table
def update_etf_metrics(symbol):
    try:
        with alchemyEngine.begin() as conn:
            # load the latest (top) `interval` records of historical market data records from `fund_etf_daily_em` table for `symbol`, order by `date`.
            # select columns: date, change_rate
            query = """SELECT date, change_rate FROM fund_etf_daily_em WHERE symbol = '{}' ORDER BY date DESC LIMIT {}""".format(
                symbol, interval
            )
            df = pd.read_sql(query, conn, parse_dates=["date"])
    
            # get oldest df['date'] as state_date
            start_date = df['date'].iloc[-1]
            # get 2-years CN bond IR as risk-free IR from bond_metrics_em table. 1-year series (natural dates).
            # select date, china_yield_2y from table `bond_metrics_em`, where date is between start_date and end_date (inclusive). Load into a dataframe.
            query = """SELECT date, china_yield_2y FROM bond_metrics_em WHERE date BETWEEN '{}' AND '{}' and china_yield_2y <> 'nan'""".format(
                start_date, end_date
            )
            bme_df = pd.read_sql(query, conn, parse_dates=["date"])
            # Convert annualized rate to a daily rate
            bme_df["china_yield_2y_daily"] = bme_df["china_yield_2y"] / 365.25

            # merge df with bme_df by matching dates.
            df = pd.merge_asof(
                df.sort_values("date"),
                bme_df.sort_values("date"),
                on="date",
                direction="backward",
            ).dropna(subset=["change_rate"])

            # calculate the Sharpe ratio, Sortino ratio, and max drawdown with the time series data inside df.
            df["excess_return"] = df["change_rate"] - df["china_yield_2y_daily"]
            # Annualize the excess return
            annualized_excess_return = np.mean(df["excess_return"])

            # Calculate the standard deviation of the excess returns
            std_dev = df["excess_return"].std()

            # Sharpe ratio
            sharpe_ratio = annualized_excess_return / std_dev

            # Calculate the downside deviation (Sortino ratio denominator)
            downside_dev = df[df["excess_return"] < 0]["excess_return"].std()

            # Sortino ratio
            sortino_ratio = (
                annualized_excess_return / downside_dev if downside_dev > 0 else None
            )

            # To calculate max drawdown, get the cummulative_returns
            df["cumulative_returns"] = np.cumprod(1 + df["change_rate"]/100.) - 1
            # Calculate the maximum cumulative return up to each point
            peak = np.maximum.accumulate(df["cumulative_returns"])
            # Calculate drawdown as the difference between the current value and the peak
            drawdown = (df["cumulative_returns"] - peak) / (1 + peak) * 100
            # Calculate max drawdown
            max_drawdown = np.min(drawdown)  # This is a negative number

            # update the `sharperatio, sortinoratio, maxdrawdown` columns for `symbol` in the table `fund_etf_perf_em` using the calculated metrics.
            update_query = text(
                "UPDATE fund_etf_perf_em SET sharperatio = :sharperatio, sortinoratio = :sortinoratio, maxdrawdown = :maxdrawdown WHERE fundcode = :fundcode"
            )
            params = {
                "sharperatio": round(sharpe_ratio, 2)
                if sharpe_ratio is not None and math.isfinite(sharpe_ratio)
                else None,
                "sortinoratio": round(sortino_ratio, 2)
                if sortino_ratio is not None and math.isfinite(sortino_ratio)
                else None,
                "maxdrawdown": round(max_drawdown, 2)
                if math.isfinite(max_drawdown)
                else None,
                "fundcode": symbol,
            }
            conn.execute(update_query, params)

    except Exception:
        logging.error(f"failed to update ETF metrics for {symbol}", exc_info=True)
        return None
    return df


# Fetch the ETF list
etf_list_df = pd.read_sql("SELECT symbol FROM fund_etf_list_sina", alchemyEngine)

# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_etf_metrics, symbol) for symbol in etf_list_df["symbol"]
    ]
    results = [future.result() for future in futures]

# Dividend Event

In [4]:
fund_fh_em_df = ak.fund_fh_em()
print(fund_fh_em_df)

                                               

KeyboardInterrupt: 

In [50]:
# sort fund_fh_em_df by column `权益登记日` (date column), descending.
fund_fh_em_df = fund_fh_em_df.sort_values(by='权益登记日', ascending=False)
print(fund_fh_em_df)

          序号    基金代码            基金简称       权益登记日        除息日期      分红  \
34075  34076  508098  嘉实京东仓储基础设施REIT  2024-05-06  2024-05-06  0.0689   
25861  25862  015933     中泰安悦6个月定开债A  2024-04-30  2024-04-30  0.0220   
25863  25864  015934     中泰安悦6个月定开债C  2024-04-30  2024-04-30  0.0200   
26348  26349  018566     恒生前海恒源泓利债券A  2024-04-29  2024-04-29  0.0630   
17279  17280  007062     中加聚盈四个月定开债C  2024-04-29  2024-04-29  0.0150   
...      ...     ...             ...         ...         ...     ...   
27707  27708  080001       长盛成长价值混合A  2003-02-27  2003-02-28  0.0200   
31122  31123  202001        南方稳健成长混合  2002-09-19  2002-09-20  0.0150   
0          1  000001          华夏成长混合  2002-06-25  2002-06-26  0.0270   
26653  26654  040001          华安创新混合  2002-06-24  2002-06-24  0.0220   
31121  31122  202001        南方稳健成长混合  2002-04-19  2002-04-22  0.0250   

            分红发放日  
34075  2024-05-08  
25861  2024-05-06  
25863  2024-05-06  
26348  2024-04-30  
17279  2024-04-30  
...           .

In [53]:
# filter fund_fh_em_df by `基金简称` column, just include value containing `ETF` and not containing "联接"
fund_fh_em_df = fund_fh_em_df[fund_fh_em_df['基金简称'].str.contains('ETF') & ~fund_fh_em_df['基金简称'].str.contains('联接')]
print(fund_fh_em_df)

          序号    基金代码               基金简称       权益登记日        除息日期      分红  \
28794  28795  159691  工银瑞信中证港股通高股息精选ETF  2024-04-24  2024-04-25  0.0103   
34216  34217  511090     鹏扬中债-30年期国债ETF  2024-04-23  2024-04-24  1.5000   
34250  34251  511220        海富通上证城投债ETF  2024-04-10  2024-04-11  0.0650   
34271  34272  511270       海富通上证10年期ETF  2024-03-27  2024-03-28  0.6500   
34215  34216  511060        海富通上证5年期ETF  2024-03-27  2024-03-28  0.7000   
...      ...     ...                ...         ...         ...     ...   
34081  34082  510050          华夏上证50ETF  2008-11-18  2008-11-19  0.0600   
28797  28798  159901        易方达深证100ETF  2007-07-11  2007-07-11  0.1200   
34129  34130  510180         华安上证180ETF  2006-11-22  2006-11-23  0.0450   
34080  34081  510050          华夏上证50ETF  2006-11-15  2006-11-16  0.0370   
34079  34080  510050          华夏上证50ETF  2006-05-18  2006-05-19  0.0240   

            分红发放日  
28794  2024-04-29  
34216  2024-04-29  
34250  2024-04-16  
34271  2024-04-02  

In [5]:
from marten.data.api.em import EastMoneyAPI
from marten.data.tabledef import fund_dividend_events

df = EastMoneyAPI.fund_fh_em()

  0%|          | 0/371 [00:00<?, ?it/s]2024-05-12 17:48:49 - [master] - marten.utils.logger - INFO - page #1, size: 100
2024-05-12 17:48:49 - [master] - marten.utils.logger - INFO -          0        1           2           3       4           5  6
0   000001   华夏成长混合  2002-06-25  2002-06-26   0.027  2002-06-27  1
1   000001   华夏成长混合  2003-06-24  2003-06-25   0.033  2003-06-26  1
2   000001   华夏成长混合  2004-06-24  2004-06-25    0.03  2004-06-28  1
3   000001   华夏成长混合  2004-12-27  2004-12-28    0.03  2004-12-29  1
4   000001   华夏成长混合  2006-04-20  2006-04-21    0.02  2006-04-24  1
..     ...      ...         ...         ...     ...         ... ..
95  000015  华夏纯债债券A  2023-12-19  2023-12-19   0.115  2023-12-20  1
96  000015  华夏纯债债券A  2024-01-29  2024-01-29  0.0035  2024-01-30  1
97  000015  华夏纯债债券A  2024-04-29  2024-04-29   0.021  2024-04-30  1
98  000016  华夏纯债债券C  2016-01-06  2016-01-06    0.03  2016-01-07  1
99  000016  华夏纯债债券C  2018-12-20  2018-12-20   0.055  2018-12-21  1

[100 rows x 7

KeyboardInterrupt: 

In [None]:
from marten.data.api.em import EastMoneyAPI
from marten.data.tabledef import fund_dividend_events

df = EastMoneyAPI.fund_fh_em()

df.rename(
    columns={
        "序号": "id",
        "基金代码": "symbol",
        "基金简称": "short_name",
        "权益登记日": "rights_registration_date",
        "除息日期": "ex_dividend_date",
        "分红": "dividend",
        "分红发放日": "dividend_payment_date",
    },
    inplace=True,
)

with alchemyEngine.begin() as conn:
    max_id = get_max_for_column(
        conn, symbol=None, table="fund_dividend_events", col_for_max="id"
    )
    if max_id is not None:
        start_id = max_id - 10
        df = df[df["id"] >= start_id]
    update_on_conflict(
        fund_dividend_events, conn, df, ["id", "rights_registration_date"]
    )

In [7]:
# print out row in `df` with any of the columns is null
print(df[df.isnull().any(axis=1)])

          id  symbol  short_name rights_registration_date ex_dividend_date  \
2976    2977  000930       博时黄金I               2015-03-30       2015-03-30   
2977    2978  000930       博时黄金I               2015-04-01       2015-04-01   
2978    2979  000930       博时黄金I               2015-04-07       2015-04-07   
2979    2980  000930       博时黄金I               2015-04-09       2015-04-09   
2980    2981  000930       博时黄金I               2015-04-10       2015-04-10   
...      ...     ...         ...                      ...              ...   
3082    3083  000930       博时黄金I               2017-07-11       2017-07-11   
3083    3084  000930       博时黄金I               2017-07-25       2017-07-25   
3084    3085  000930       博时黄金I               2017-08-22       2017-08-22   
34467  34468  519029      华夏稳增混合               2007-02-02       2007-02-02   
36979  36980  970124  国元元赢六个月定开债               2023-09-19       2023-09-19   

       dividend dividend_payment_date  
2976     0.0002        

In [6]:
# convert `NaT` values in `ex_dividend_date` or `dividend_payment_date` columns of `df` to None
df[['ex_dividend_date', 'dividend_payment_date']] = df[['ex_dividend_date', 'dividend_payment_date']].where(df[['ex_dividend_date', 'dividend_payment_date']].notnull(), None)

In [8]:
# from marten.data.tabledef import fund_dividend_events

with alchemyEngine.begin() as conn:
    max_id = get_max_for_column(
        conn, symbol=None, table="fund_dividend_events", col_for_max="id"
    )
    if max_id is not None:
        start_id = max_id - 10
        df = df[df["id"] >= start_id]
    update_on_conflict(
        fund_dividend_events, conn, df, ["id", "rights_registration_date"]
    )

# China Market Indices

In [4]:
sziss = ak.stock_zh_index_spot_sina()

  0%|          | 0/8 [00:00<?, ?it/s]

In [5]:
sziss

Unnamed: 0,代码,名称,最新价,涨跌额,涨跌幅,昨收,今开,最高,最低,成交量,成交额
0,sh000001,上证指数,3400.1420,2.065,0.061,3398.0765,3397.2939,3418.9520,3388.3215,500488130,586090438939
1,sh000002,Ａ股指数,3564.1909,2.044,0.057,3562.1471,3561.3219,3583.9092,3551.7382,500037850,585347943108
2,sh000003,Ｂ股指数,269.2420,2.204,0.825,267.0382,267.1302,269.7887,267.1302,319799,127013756
3,sh000004,工业指数,2925.3930,-2.205,-0.075,2927.5978,2929.2120,2948.9912,2918.9401,285439591,415746099733
4,sh000005,商业指数,2683.3937,6.380,0.238,2677.0136,2672.3525,2703.0735,2662.6060,49294748,45839110151
...,...,...,...,...,...,...,...,...,...,...,...
555,sz980030,消费电子,5436.8200,-76.487,-1.387,5513.3070,5504.1830,5547.0620,5423.6190,2650952831,62535745674
556,sz980032,新能电池,9862.9650,12.523,0.127,9850.4420,9866.2780,9973.7190,9791.3340,563860724,19886428347
557,sz980035,化肥农药,1625.4290,22.297,1.391,1603.1320,1604.2790,1632.3530,1600.5710,1633025356,15204240837
558,sz980076,通用航空,2989.7060,21.474,0.723,2968.2320,3020.9700,3048.8160,2981.0570,1229405244,27741675698


In [6]:
sziss[sziss['代码'].str.contains("000510")]

Unnamed: 0,代码,名称,最新价,涨跌额,涨跌幅,昨收,今开,最高,最低,成交量,成交额
142,sh000510,中证A500,4678.6835,-6.159,-0.131,4684.843,4686.8211,4716.0068,4668.7906,244105315,459399515617


In [1]:
cn_index_list = [
    ("上证系列指数", "sh"),
    ("深证系列指数", "sz"),
    # ("指数成份", ""),
    ("中证系列指数", "csi"),
]


In [4]:
import akshare as ak
szise = ak.stock_zh_index_spot_em("中证系列指数")
# configure the jupyter notebook to display entire dataframe
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [5]:
szise

Unnamed: 0,序号,代码,名称,最新价,涨跌幅,涨跌额,成交量,成交额,振幅,最高,最低,今开,昨收,量比
0,1,H50047,180反两,448.44,1.84,8.12,120695300.0,207566400000.0,1.96,452.37,443.74,445.08,440.32,0.74
1,2,H30050,AMAC印刷,1927.14,1.84,34.84,2818596.0,2740858000.0,2.64,1932.75,1882.74,1882.74,1892.3,0.97
2,3,931160,通信设备,7005.01,1.31,90.7,24116050.0,65779150000.0,4.31,7161.69,6863.71,6897.44,6914.31,0.99
3,4,H30046,AMAC皮革,783.24,1.18,9.16,2345058.0,1279018000.0,2.69,790.06,769.26,776.39,774.08,1.43
4,5,H30083,300反向,1508.85,1.12,16.73,174724100.0,340486900000.0,1.1,1514.38,1497.9,1500.56,1492.12,0.68
5,6,H30404,300日报酬反向,1139.77,1.12,12.59,174724100.0,340486900000.0,1.1,1143.95,1131.5,1133.51,1127.18,0.68
6,7,931271,通信设备主题,1693.22,1.0,16.83,21453720.0,65507860000.0,3.87,1727.53,1662.72,1673.51,1676.39,0.96
7,8,931992,疫苗生物,1210.82,0.96,11.56,6398468.0,12772060000.0,2.17,1226.75,1200.73,1200.73,1199.26,1.06
8,9,H50046,180反向,3689.96,0.93,33.95,120695300.0,207566400000.0,0.98,3706.28,3670.45,3676.02,3656.01,0.74
9,10,H30044,AMAC纺织,1253.41,0.83,10.29,5847258.0,3399189000.0,1.47,1258.14,1239.85,1244.15,1243.12,1.22


In [35]:
cn_index_list = [
    ("上证系列指数", "sh"),
    ("深证系列指数", "sz"),
    # ("指数成份", ""),
    ("中证系列指数", "csi"),
]

def update_cn_indices_em(symbol, src):
    try:
        szise = ak.stock_zh_index_spot_em(symbol)
        szise = szise.rename(
            columns={
                "序号": "seq",
                "代码": "symbol",
                "名称": "name",
                "最新价": "close",
                "涨跌幅": "change_rate",
                "涨跌额": "change_amount",
                "成交量": "volume",
                "成交额": "amount",
                "振幅": "amplitude",
                "最高": "high",
                "最低": "low",
                "今开": "open",
                "昨收": "prev_close",
                "量比": "volume_ratio",
            }
        )
        szise["src"] = src
        with alchemyEngine.begin() as conn:
            update_on_conflict("index_spot_em", conn, szise, ["symbol"])

    except Exception:
        logging.error(f"failed to update index_spot_em for {symbol}", exc_info=True)
        return None
    return szise


# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_cn_indices_em, symbol, src) for symbol, src in cn_index_list
    ]
    results = [future.result() for future in futures]

In [5]:
# get daily historical data
def update_cn_indices(symbol, src):
    try:
        szide = ak.stock_zh_index_daily_em(f"{src}{symbol}")

        # if shide is empty, return immediately
        if szide.empty:
            return None

        szide["symbol"] = symbol
        with alchemyEngine.begin() as conn:
            ignore_on_conflict("index_daily_em", conn, szide, ["symbol", "date"])

    except Exception:
        logging.error(f"failed to update index_daily_em for {symbol}", exc_info=True)
        return None
    return szide


conn = alchemyEngine.connect()
cn_index_fulllist = pd.read_sql("SELECT src, symbol FROM index_spot_em", conn)
conn.close()

# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_cn_indices, symbol, src)
        for symbol, src in zip(cn_index_fulllist["symbol"], cn_index_fulllist["src"])
    ]
    results = [future.result() for future in futures]

ERROR:root:failed to update index_daily_em for 395033
Traceback (most recent call last):
  File "/var/folders/fb/phz99gmn7cld34rh4f6ppmlw0000gn/T/ipykernel_43190/5112864.py", line 4, in update_cn_indices
    szide = ak.stock_zh_index_daily_em(f"{src}{symbol}")
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/jx/ProgramData/python/akshare/akshare/index/index_stock_zh.py", line 361, in stock_zh_index_daily_em
    temp_df.columns = ["date", "open", "close", "high", "low", "volume", "amount", "_"]
    ^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/pandas/core/generic.py", line 6218, in __setattr__
    return object.__setattr__(self, name, value)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "properties.pyx", line 69, in pandas._libs.properties.AxisProperty.__set__
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/pandas/core/generic.py", line 767, in _set_axis
    s

In [3]:
ak.stock_zh_index_daily_em(f"sz159338", 20241201, 20241228)

Unnamed: 0,date,open,close,high,low,volume,amount
0,2024-12-02,0.954,0.965,0.968,0.952,30045818,2890296000.0
1,2024-12-03,0.966,0.964,0.969,0.958,25928823,2495512000.0
2,2024-12-04,0.963,0.957,0.966,0.955,26556581,2550768000.0
3,2024-12-05,0.956,0.957,0.961,0.953,25940648,2482138000.0
4,2024-12-06,0.957,0.971,0.976,0.956,32053013,3098645000.0
5,2024-12-09,0.97,0.968,0.976,0.962,28151659,2727136000.0
6,2024-12-10,1.001,0.976,1.006,0.973,39186399,3863707000.0
7,2024-12-11,0.975,0.976,0.978,0.969,28753230,2804462000.0
8,2024-12-12,0.976,0.984,0.985,0.973,30112638,2947205000.0
9,2024-12-13,0.979,0.964,0.979,0.961,38522987,3730307000.0


In [20]:
index_all_cni_df = ak.index_all_cni()
print(index_all_cni_df)

          指数代码          指数简称  样本数        收盘点位     涨跌幅     PE滚动           成交量  \
0       399001          深证成指  500  10673.9684  0.0067  22.0486  168675.47268   
1       399002          深成指R  500  14060.4834  0.0067  22.0486  168675.47268   
2       399003          成份Ｂ指   10   7978.7635  0.0003  10.0533      46.49350   
3       399004        深证100R  100   6603.9413  0.0040  19.9701   43832.93902   
4       399005         中小100  100   6532.8930  0.0092  20.9075   33011.66858   
...        ...           ...  ...         ...     ...      ...           ...   
1309  CNB20012  国证政金5-10(全价)    0    109.3994  0.0012      NaN   18570.39000   
1310  CNB20013   高等级绿色债券（全价）    0         NaN     NaN      NaN           NaN   
1311  CNB20014  高等级非贴标绿债（全价）    0         NaN     NaN      NaN           NaN   
1312  CNB20015   高等级贴标绿债（全价）    0         NaN     NaN      NaN           NaN   
1313   RETHKDG    中华房地产信托基金R   30   2543.4213  0.0060      NaN      87.98968   

              成交额            总市值       

# China Stock Markets

In [4]:
# Set proxy settings
proxy = "http://localhost:8089"
os.environ["http_proxy"] = proxy
os.environ["https_proxy"] = proxy

In [5]:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from requests.packages.urllib3 import disable_warnings

# Disable SSL warnings due to unverified HTTPS requests
disable_warnings(InsecureRequestWarning)

# Save the original requests.Session.send
orig_send = requests.Session.send
# Save the original Session.request method
original_request_method = requests.Session.request


def new_send(*args, **kwargs):
    # Set 'verify' to False to disable SSL verification
    kwargs["verify"] = False
    return orig_send(*args, **kwargs)


def custom_request(self, method, url, **kwargs):
    # Set a custom timeout if not already provided
    kwargs["timeout"] = 300  # Set your desired timeout here, in seconds
    # if "timeout" not in kwargs:
    #     kwargs["timeout"] = 300  # Set your desired timeout here, in seconds
    return original_request_method(self, method, url, **kwargs)


# Monkey-patch requests with our new_send function
requests.Session.send = new_send
# Monkey-patch the requests.Session.request method
requests.Session.request = custom_request

# Now, when the third-party library makes a request, SSL verification will be disabled

In [4]:
# add retry logic to the following action when exception occurs

retry_attempts = 3
retry_delay = 5  # seconds

for attempt in range(retry_attempts):
    try:
        stock_zh_a_spot_em_df = ak.stock_zh_a_spot_em()
        break
    except Exception as e:
        print(f'Attempt {attempt+1} failed with error: {e}')
        if attempt < retry_attempts - 1:
            print(f'Retrying in {retry_delay} seconds...')
            time.sleep(retry_delay)
        else:
            raise
# stock_zh_a_spot_em_df = ak.stock_zh_a_spot_em()


  0%|          | 0/57 [00:00<?, ?it/s]

In [5]:
stock_zh_a_spot_em_df

Unnamed: 0,序号,代码,名称,最新价,涨跌幅,涨跌额,成交量,成交额,振幅,最高,...,量比,换手率,市盈率-动态,市净率,总市值,流通市值,涨速,5分钟涨跌,60日涨跌幅,年初至今涨跌幅
0,1,301479,弘景光电,142.98,20.00,23.83,45797.0,6.302239e+08,13.00,142.98,...,1.79,32.24,65.60,7.46,9.085902e+09,2.030807e+09,0.00,0.00,241.24,241.24
1,2,301209,联合化学,82.92,20.00,13.82,53859.0,4.097076e+08,20.14,82.92,...,1.14,21.98,100.25,8.89,6.633600e+09,2.031540e+09,0.00,0.00,182.43,206.54
2,3,301212,联盛化学,28.34,19.98,4.72,87691.0,2.246427e+08,23.92,28.34,...,3.16,9.35,68.47,2.26,3.060720e+09,2.656875e+09,0.04,1.58,36.18,37.11
3,4,300157,新锦动力,3.19,19.92,0.53,588626.0,1.841945e+08,12.78,3.19,...,3.38,8.36,8.62,-10.02,2.314308e+09,2.246483e+09,0.00,0.00,14.34,10.00
4,5,688328,深科达,17.46,15.32,2.32,123690.0,2.151213e+08,7.66,17.96,...,7.32,13.09,28.82,2.00,1.649207e+09,1.649207e+09,-0.06,0.17,22.87,16.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5709,5710,000018,神城A退,,,,,,,,...,,0.00,,,,,,,0.00,0.00
5710,5711,000015,PT中浩A,,,,,,,,...,,0.00,,,,,,,0.00,0.00
5711,5712,000013,*ST石化A,,,,,,,,...,,0.00,,,,,,,0.00,0.00
5712,5713,000005,ST星源,,,,,,,,...,,0.00,,,,,,,0.00,0.00


In [None]:
stock_zh_a_spot_em_df.rename(
    columns={
        "序号": "serial_no",
        "代码": "code",
        "名称": "name",
        "最新价": "latest_price",
        "涨跌幅": "price_change_pct",
        "涨跌额": "price_change_amt",
        "成交量": "volume",
        "成交额": "turnover",
        "振幅": "amplitude",
        "最高": "highest",
        "最低": "lowest",
        "今开": "open_today",
        "昨收": "close_yesterday",
        "量比": "volume_ratio",
        "换手率": "turnover_rate",
        "市盈率-动态": "pe_ratio_dynamic",
        "市净率": "pb_ratio",
        "总市值": "total_market_value",
        "流通市值": "circulating_market_value",
        "涨速": "rise_speed",
        "5分钟涨跌": "five_min_change",
        "60日涨跌幅": "sixty_day_change_pct",
        "年初至今涨跌幅": "ytd_change_pct",
    },
    inplace=True,
)

with alchemyEngine.begin() as conn:
    update_on_conflict("stock_zh_a_spot_em", conn, stock_zh_a_spot_em_df, ["code"])

In [7]:
def retry_ak_stock_zh_a_hist(symbol, period, start_date, end_date, adjust):
    retry_attempts = 3
    retry_delay = 5  # seconds
    for attempt in range(retry_attempts):
        try:
            stock_zh_a_hist_df = ak.stock_zh_a_hist(
                symbol, period, start_date, end_date, adjust
            )
            return stock_zh_a_hist_df
        except Exception as e:
            print(f'Attempt {attempt+1} failed with error: {e}')
            if attempt < retry_attempts - 1:
                print(f'Retrying in {retry_delay} seconds...')
                time.sleep(retry_delay)
            else:
                raise

In [8]:
# get daily historical data
def stock_zh_a_hist(code):

    try:
        stock_zh_a_hist_df = retry_ak_stock_zh_a_hist(
            symbol=code,
            period="daily",
            start_date="19700101",   ### entire history
            end_date="20240401",
            adjust="hfq",
        )

        # if shide is empty, return immediately
        if stock_zh_a_hist_df.empty:
            return None

        stock_zh_a_hist_df["symbol"] = code
        ## rename stock_zh_a_hist_df columns from Chinese to English:
        stock_zh_a_hist_df.rename(
            columns={
                "日期": "date",
                "开盘": "open",
                "收盘": "close",
                "最高": "high",
                "最低": "low",
                "成交量": "volume",
                "成交额": "turnover",
                "振幅": "amplitude",
                "涨跌幅": "change_rate",
                "涨跌额": "change_amt",
                "换手率": "turnover_rate",
            },
            inplace=True,
        )

        with alchemyEngine.begin() as conn:
            ignore_on_conflict(
                "stock_zh_a_hist_em", conn, stock_zh_a_hist_df, ["symbol", "date"]
            )

    except Exception:
        logging.error(f"failed to update stock_zh_a_hist_em for {code}", exc_info=True)
        return None
    return stock_zh_a_hist_df


# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(stock_zh_a_hist, code) for code in stock_zh_a_spot_em_df["code"]
    ]
    results = [future.result() for future in futures]

In [1]:
# 限量: 单次返回当前交易日两网及退市的所有股票的行情数据
import akshare as ak

stock_zh_a_stop_em_df = ak.stock_zh_a_stop_em()
print(stock_zh_a_stop_em_df)

     序号      代码    名称     最新价    涨跌幅    涨跌额      成交量         成交额   振幅      最高  \
0     1  404004  汇车退债  54.961  11.72  5.767  27718.0  15233902.4  0.0  54.961   
1     2  400197   天润3   0.120   9.09  0.010   7354.0     88248.0  0.0   0.120   
2     3  400241   洪涛3   0.130   8.33  0.010  63566.0    826358.0  0.0   0.130   
3     4  400205   泛海3   0.130   8.33  0.010  33524.0    435810.7  0.0   0.130   
4     5  400191   粤泰5   0.130   8.33  0.010   4675.0     60775.0  0.0   0.130   
..  ...     ...   ...     ...    ...    ...      ...         ...  ...     ...   
95   96  400201   海投5   0.600   0.00  0.000  13923.0    835389.6  0.0   0.600   
96   97  400199  阳光城3   0.090   0.00  0.000  30530.0    274770.0  0.0   0.090   
97   98  400198   搜特3     NaN    NaN    NaN      NaN         NaN  NaN     NaN   
98   99  400196  R紫鑫1     NaN    NaN    NaN      NaN         NaN  NaN     NaN   
99  100  400195   泰禾3   0.120   0.00  0.000   4031.0     48372.0  0.0   0.120   

        最低      今开      昨收 

In [2]:
import akshare as ak

stock_staq_net_stop_df = ak.stock_staq_net_stop()
print(stock_staq_net_stop_df)

     序号      代码    名称
0     1  404004  汇车退债
1     2  400197   天润3
2     3  400241   洪涛3
3     4  400205   泛海3
4     5  400191   粤泰5
..  ...     ...   ...
95   96  400201   海投5
96   97  400199  阳光城3
97   98  400198   搜特3
98   99  400196  R紫鑫1
99  100  400195   泰禾3

[100 rows x 3 columns]


# Get HK Market Indices

In [4]:
# refresh the list

hk_index_list_df = ak.stock_hk_index_spot_em()
hk_index_list_df = hk_index_list_df.rename(
    columns={
        "序号": "seq",
        "内部编号": "internal_code",
        "代码": "symbol",
        "名称": "name",
        "最新价": "close",
        "涨跌额": "change_amount",
        "涨跌幅": "change_rate",
        "今开": "open",
        "最高": "high",
        "最低": "low",
        "昨收": "prev_close",
        "成交量": "volume",
        "成交额": "amount",
    }
)

# saveAsCsv("hk_index_spot_em", df)

with alchemyEngine.begin() as conn:
    update_on_conflict("hk_index_spot_em", conn, hk_index_list_df, ["symbol"])

In [3]:
import akshare as ak
df = ak.stock_hk_index_daily_em(symbol="HSSCFCF")

In [None]:
df

Unnamed: 0,date,open,high,low,latest
0,2025-02-28,3745.14,3748.59,3647.13,3663.66
1,2025-03-03,3676.52,3706.3,3633.67,3642.96
2,2025-03-04,3581.99,3607.43,3548.27,3600.07
3,2025-03-05,3638.58,3714.27,3638.58,3707.46
4,2025-03-06,3742.2,3778.07,3730.96,3770.53
5,2025-03-07,3751.94,3828.45,3744.78,3790.97
6,2025-03-10,3795.4,3819.58,3749.25,3784.51
7,2025-03-11,3738.05,3801.43,3727.0,3794.09
8,2025-03-12,3795.95,3816.3,3753.06,3772.79
9,2025-03-13,3783.54,3808.38,3749.43,3779.75


In [5]:
# get daily historical data
def update_hk_indices(symbol):
    try:
        shide = ak.stock_hk_index_daily_em(symbol=symbol)

        # if shide is empty, return immediately
        if shide.empty:
            return None
        
        shide["symbol"] = symbol
        shide = shide.rename(
            columns={
                "latest":"close",
            }
        )
        with alchemyEngine.begin() as conn:
            ignore_on_conflict("hk_index_daily_em", conn, shide, ["symbol", "date"])

    except Exception:
        logging.error(f"failed to update hk_index_daily_em for {symbol}", exc_info=True)
        return None
    return shide

# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_hk_indices, symbol) for symbol in hk_index_list_df["symbol"]
    ]
    results = [future.result() for future in futures]

# Get US market indices

In [10]:
idx_symbol_list = [".IXIC", ".DJI", ".INX", ".NDX"]


def update_us_indices(symbol):
    try:
        iuss = ak.index_us_stock_sina(symbol=symbol)
        iuss['symbol'] = symbol
        with alchemyEngine.begin() as conn:
            update_on_conflict("us_index_daily_sina", conn, iuss, ["symbol", "date"])

    except Exception:
        logging.error(
            f"failed to update us_index_daily_sina for {symbol}", exc_info=True
        )
        return None
    return iuss


# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [executor.submit(update_us_indices, symbol) for symbol in idx_symbol_list]
    results = [future.result() for future in futures]

# China Bond

In [1]:
import akshare as ak
bond_zh_hs_spot_df = ak.bond_zh_hs_spot()
print(bond_zh_hs_spot_df)

  0%|          | 0/10 [00:00<?, ?it/s]

JSONDecodeError: Can not decode value starting with character '<'

In [7]:
bond_zh_hs_daily_df = ak.bond_zh_hs_daily(symbol="sz149999")
print(bond_zh_hs_daily_df)

          date     open     high      low    close   volume
0   2022-08-03  100.000  100.000  100.000  100.000  1200000
1   2022-08-04  100.000  100.000  100.000  100.000   500000
2   2022-08-15  100.277  100.277  100.277  100.277   800000
3   2022-11-23   97.736   97.743   97.736   97.743  1000000
4   2022-12-07   97.779   97.779   97.779   97.779    50000
5   2022-12-08   97.779   97.780   97.779   97.780   100000
6   2023-01-12   96.769   96.769   96.769   96.769    20000
7   2023-02-02   96.804   96.804   96.804   96.804   280000
8   2023-02-08   96.881   96.882   96.881   96.882  2000000
9   2023-02-09   97.324   97.331   97.324   97.331   300000
10  2023-02-13   97.281   97.281   97.281   97.281   500000
11  2023-03-15   98.339   98.340   98.339   98.340   400000
12  2023-04-26   99.916   99.923   99.916   99.923   800000
13  2023-06-30  100.099  100.099  100.099  100.099   300000
14  2023-07-03  100.151  100.151  100.151  100.151   200000
15  2023-07-05  100.138  100.138  100.13

In [9]:
# indicator="财富"; choice of {"全价", "净价", "财富", "平均市值法久期", "平均现金流法久期", "平均市值法凸性", "平均现金流法凸性", "平均现金流法到期收益率", "平均市值法到期收益率", "平均基点价值", "平均待偿期", "平均派息率", "指数上日总市值", "财富指数涨跌幅", "全价指数涨跌幅", "净价指数涨跌幅", "现券结算量"}
# period="总值"; choice of {"总值", "1年以下", "1-3年", "3-5年", "5-7年", "7-10年", "10年以上", "0-3个月", "3-6个月", "6-9个月", "9-12个月", "0-6个月", "6-12个月"}
bond_new_composite_index_cbond_df = ak.bond_new_composite_index_cbond(
    indicator="全价", period="10年以上"
)
print(bond_new_composite_index_cbond_df)

            date     value
0     2002-01-04  100.8196
1     2002-01-07  101.0637
2     2002-01-08  100.3187
3     2002-01-09  100.5000
4     2002-01-10  100.6126
...          ...       ...
5580  2024-04-22  132.0262
5581  2024-04-23  132.5291
5582  2024-04-24  131.8973
5583  2024-04-25  131.4804
5584  2024-04-26  130.9411

[5585 rows x 2 columns]


In [23]:
bond_new_composite_index_cbond_df = ak.bond_new_composite_index_cbond(
    indicator="指数上日总市值", period="1年以下"
)
print(bond_new_composite_index_cbond_df)

            date        value
0     2002-01-04     449.1471
1     2002-01-07     449.0152
2     2002-01-08     450.5622
3     2002-01-09     451.1244
4     2002-01-10     451.1926
...          ...          ...
5580  2024-04-22  197981.8415
5581  2024-04-23  200107.2504
5582  2024-04-24  199722.0332
5583  2024-04-25  199308.4572
5584  2024-04-26  199274.9573

[5585 rows x 2 columns]


# Currencies

In [5]:
ret = ak.currency_name_code()
ret

AttributeError: 'NoneType' object has no attribute 'find_all'

In [12]:
ret = ak.currency_pair_map()
ret

  0%|          | 0/5 [00:00<?, ?it/s]

                                             

KeyError: '美元'

In [14]:
currency_hist_df = ak.currency_hist(
    symbol="usd-jpy", period="每日", start_date="20050101", end_date="20220808"
)
print(currency_hist_df)

AttributeError: 'NoneType' object has no attribute 'text'

In [10]:
currency_boc_safe_df = ak.currency_boc_safe()
print(currency_boc_safe_df)

              日期      美元      欧元      日元       港元      英镑      澳元    新西兰元  \
0     1994-01-01  870.00     NaN  7.7800  112.660     NaN     NaN     NaN   
1     1994-01-03  870.00     NaN  7.7800  112.660     NaN     NaN     NaN   
2     1994-01-04  870.00     NaN  7.7196  112.660     NaN     NaN     NaN   
3     1994-01-05  870.00     NaN  7.7196  112.660     NaN     NaN     NaN   
4     1994-01-06  870.00     NaN  7.7196  112.660     NaN     NaN     NaN   
...          ...     ...     ...     ...      ...     ...     ...     ...   
7459  2024-04-15  709.79  761.79  4.6677   90.577  891.63  464.51  426.80   
7460  2024-04-16  710.28  759.69  4.6419   90.705  889.83  461.51  422.88   
7461  2024-04-17  710.25  759.73  4.6333   90.690  889.00  460.63  422.18   
7462  2024-04-18  710.20  759.80  4.6363   90.707  888.54  459.29  421.49   
7463  2024-04-19  710.46  758.73  4.6329   90.711  888.70  458.70  421.03   

        新加坡元    瑞士法郎  ...     里亚尔       福林     兹罗提   丹麦克朗    瑞典克朗    挪威克朗  

In [12]:
currency_boc_safe_df.columns

Index(['日期', '美元', '欧元', '日元', '港元', '英镑', '澳元', '新西兰元', '新加坡元', '瑞士法郎', '加元',
       '林吉特', '卢布', '兰特', '韩元', '迪拉姆', '里亚尔', '福林', '兹罗提', '丹麦克朗', '瑞典克朗',
       '挪威克朗', '里拉', '比索', '泰铢', '澳门元'],
      dtype='object')

In [13]:
currency_boc_safe_df.rename(
    columns={
        "日期": "Date",
        "美元": "USD",
        "欧元": "EUR",
        "日元": "JPY",
        "港元": "HKD",
        "英镑": "GBP",
        "澳元": "AUD",
        "新西兰元": "NZD",
        "新加坡元": "SGD",
        "瑞士法郎": "CHF",
        "加元": "CAD",
        "林吉特": "MYR",
        "卢布": "RUB",
        "兰特": "ZAR",
        "韩元": "KRW",
        "迪拉姆": "AED",
        "里亚尔": "QAR",
        "福林": "HUF",
        "兹罗提": "PLN",
        "丹麦克朗": "DKK",
        "瑞典克朗": "SEK",
        "挪威克朗": "NOK",
        "里拉": "TRY",
        "比索": "PHP",
        "泰铢": "THB",
        "澳门元": "MOP",
    },
    inplace=True,
)

In [15]:
with alchemyEngine.begin() as conn:
    update_on_conflict("currency_boc_safe", conn, currency_boc_safe_df, ["date"])

# SGE Spot

In [3]:
ssts = ak.spot_symbol_table_sge()

In [4]:
ssts.rename(columns={"序号": "serial", "品种": "product"}, inplace=True)

In [5]:
ssts

Unnamed: 0,serial,product
0,1,Au99.99
1,2,Au99.95
2,3,Au100g
3,4,Pt99.95
4,5,Ag(T+D)
5,6,Au(T+D)
6,7,mAu(T+D)
7,8,Au(T+N1)
8,9,Au(T+N2)
9,10,Ag99.99


In [18]:
with alchemyEngine.begin() as conn:
    update_on_conflict("spot_symbol_table_sge", conn, ssts, ["product"])

In [11]:
symbol = "iAu100g"
spot_hist_sge_df = ak.spot_hist_sge(symbol=symbol)
spot_hist_sge_df

Unnamed: 0,date,open,close,low,high
0,2017-01-13,266.00,266.00,266.00,266.00
1,2017-01-26,245.01,245.01,245.01,245.01
2,2017-02-20,273.00,273.00,273.00,273.00
3,2017-02-27,276.00,276.00,276.00,276.00
4,2017-03-10,269.50,269.50,269.50,269.50
...,...,...,...,...,...
145,2023-01-06,400.00,399.40,399.00,400.00
146,2023-06-12,448.00,448.00,448.00,448.00
147,2023-06-16,441.90,441.90,441.90,441.90
148,2023-07-13,353.00,353.00,353.00,353.00


In [24]:
spot_hist_sge_df.insert(0, "symbol", symbol)

In [25]:
spot_hist_sge_df

Unnamed: 0,symbol,date,open,close,high,low
0,Au99.95,2016-12-19,262.90,262.80,262.30,262.94
1,Au99.95,2016-12-20,262.80,261.77,261.30,262.80
2,Au99.95,2016-12-21,259.10,260.85,259.10,261.00
3,Au99.95,2016-12-22,260.30,259.40,259.30,260.30
4,Au99.95,2016-12-23,259.65,259.98,259.01,259.99
...,...,...,...,...,...,...
1762,Au99.95,2024-04-15,561.00,560.36,550.01,561.50
1763,Au99.95,2024-04-16,567.00,564.26,562.00,568.00
1764,Au99.95,2024-04-17,566.10,564.50,562.00,566.10
1765,Au99.95,2024-04-18,562.00,563.17,562.00,563.30


In [26]:
with alchemyEngine.begin() as conn:
    update_on_conflict("spot_hist_sge", conn, spot_hist_sge_df, ["symbol", "date"])

# Global Spot Goods

In [9]:
# 波罗的海干散货指数    钢坯价格指数    澳大利亚粉矿价格
spot_goods_df = ak.spot_goods(symbol="澳大利亚粉矿价格")
print(spot_goods_df)

             日期      指数   涨跌额   涨跌幅
0    2020-12-28  1123.0   0.0  0.00
1    2020-12-29  1113.0 -10.0 -0.89
2    2020-12-30  1076.0 -37.0 -3.32
3    2020-12-31  1077.0   1.0  0.09
4    2021-01-04  1084.0   7.0  0.65
..          ...     ...   ...   ...
829  2024-04-29   870.0  -4.0 -0.46
830  2024-04-30   873.0   3.0  0.34
831  2024-05-06   875.0   2.0  0.23
832  2024-05-07   892.0  17.0  1.94
833  2024-05-08   882.0 -10.0 -1.12

[834 rows x 4 columns]


# fund flow

In [8]:
import akshare as ak
# only 100 days history is returned
stock_individual_fund_flow_df = ak.stock_individual_fund_flow(
    stock="159338", market="sz"
)
print(stock_individual_fund_flow_df)

            日期    收盘价   涨跌幅     主力净流入-净额  主力净流入-净占比     超大单净流入-净额  超大单净流入-净占比  \
0   2024-10-15  0.932 -2.31 -705559643.0     -22.38 -1.333374e+09      -42.29   
1   2024-10-16  0.923 -0.97 -130647988.0      -6.54 -3.234564e+08      -16.19   
2   2024-10-17  0.915 -0.87 -212742274.0     -14.24 -3.590518e+08      -24.04   
3   2024-10-18  0.952  4.04    -943280.0      -0.04 -2.257763e+08       -9.08   
4   2024-10-21  0.955  0.32 -218298336.0      -8.36 -3.771800e+08      -14.45   
5   2024-10-22  0.964  0.94  -98784964.0      -5.68 -2.610927e+08      -15.02   
6   2024-10-23  0.965  0.10 -156128041.0      -7.44 -3.397692e+08      -16.19   
7   2024-10-24  0.955 -1.04 -268649210.0     -14.09 -7.252321e+08      -38.04   
8   2024-10-25  0.965  1.05  -37405280.0      -1.76 -1.031548e+08       -4.85   
9   2024-10-28  0.967  0.21 -286574037.0     -10.58 -8.040532e+08      -29.67   
10  2024-10-29  0.958 -0.93 -169232224.0      -6.76 -2.994341e+08      -11.96   
11  2024-10-30  0.954 -0.42 

In [5]:
stock_sector_fund_flow_hist_df = ak.stock_sector_fund_flow_hist(symbol="电源设备")
print(stock_sector_fund_flow_hist_df)

             日期     主力净流入-净额  主力净流入-净占比   超大单净流入-净额  超大单净流入-净占比     大单净流入-净额  \
0    2023-10-26  -73910569.0      -2.44  -1913321.0       -0.06  -71997248.0   
1    2023-10-27   78430051.0       2.18   1727283.0        0.05   76702768.0   
2    2023-10-30 -129940087.0      -3.70 -79973607.0       -2.28  -49966480.0   
3    2023-10-31  -79302200.0      -2.09  -9708024.0       -0.26  -69594176.0   
4    2023-11-01  -19266774.0      -0.69 -15652038.0       -0.56   -3614736.0   
..          ...          ...        ...         ...         ...          ...   
116  2024-04-18 -146704271.0      -3.14 -40991199.0       -0.88 -105713072.0   
117  2024-04-19 -194834566.0      -5.00 -93215766.0       -2.39 -101618800.0   
118  2024-04-22 -154895774.0      -4.25 -29532302.0       -0.81 -125363472.0   
119  2024-04-23  -54391548.0      -1.81 -47213740.0       -1.57   -7177808.0   
120  2024-04-24 -155432353.0      -5.47 -36173745.0       -1.27 -119258608.0   

     大单净流入-净占比     中单净流入-净额  中单净流入-净占比 

# Fund Holding

In [8]:
fund_portfolio_hold_em_df = ak.fund_portfolio_hold_em(symbol="515450", date="2023")
print(fund_portfolio_hold_em_df)

      序号    股票代码  股票名称  占净值比例     持股数     持仓市值              季度
0      1  600177   雅戈尔   4.32  505.39  3310.33  2023年4季度股票投资明细
1      2  000895  双汇发展   3.95  113.16  3022.50  2023年4季度股票投资明细
2      3  601006  大秦铁路   3.57  378.92  2732.02  2023年4季度股票投资明细
3      4  603156  养元饮品   3.32  119.59  2542.40  2023年4季度股票投资明细
4      5  600019  宝钢股份   2.81  362.32  2148.56  2023年4季度股票投资明细
..   ...     ...   ...    ...     ...      ...             ...
222  223  688432   有研硅   0.06    0.70    11.48  2023年1季度股票投资明细
223  224  688084  晶品特装   0.04    0.09     7.31  2023年1季度股票投资明细
224  225  601061  中信金属   0.02    0.46     3.00  2023年1季度股票投资明细
225  226  688535  华海诚科   0.02    0.09     3.00  2023年1季度股票投资明细
226  227  601133  柏诚股份   0.01    0.15     1.75  2023年1季度股票投资明细

[227 rows x 7 columns]


In [15]:
fund_portfolio_hold_em_df = ak.fund_portfolio_hold_em(symbol="561790", date="2025")
print(fund_portfolio_hold_em_df)

KeyError: '占净值比例'

# Bond Holding

In [18]:
fund_portfolio_bond_hold_em_df = ak.fund_portfolio_bond_hold_em(
    symbol="511220", date="2024"
)
print(fund_portfolio_bond_hold_em_df)

   序号    债券代码    债券名称  占净值比例     持仓市值              季度
0   1  152414   20昆高新   1.28  6757.58  2024年1季度债券投资明细
1   2  127574   17宜城投   1.13  6005.41  2024年1季度债券投资明细
2   3  152883  21宜昌01   1.02  5400.66  2024年1季度债券投资明细
3   4  152840   21湖城债   1.02  5387.39  2024年1季度债券投资明细
4   5  152893  21开福02   1.02  5396.81  2024年1季度债券投资明细


# Carbon Emission

In [7]:
energy_carbon_domestic_df = ak.energy_carbon_domestic(symbol="深圳")
print(energy_carbon_domestic_df)

              日期    成交价      成交量           成交额  地点
0     2013-06-19  29.00      0.0      0.000000  深圳
1     2013-06-20  29.00      0.0      0.000000  深圳
2     2013-06-21  29.00      0.0      0.000000  深圳
3     2013-06-22  29.00      0.0      0.000000  深圳
4     2013-06-23  29.00      0.0      0.000000  深圳
...          ...    ...      ...           ...  ..
2154  2021-03-31  53.44      3.0     28.909999  深圳
2155  2021-04-01  51.48    229.0   1697.350049  深圳
2156  2021-04-02  50.47  12562.0  69828.731367  深圳
2157  2021-04-06  47.84   1007.0   5936.440137  深圳
2158  2021-04-07  44.48   1410.0   7453.210097  深圳

[2159 rows x 5 columns]


In [8]:
energy_carbon_bj_df = ak.energy_carbon_bj()
print(energy_carbon_bj_df)

SSLError: HTTPSConnectionPool(host='www.bjets.com.cn', port=443): Max retries exceeded with url: /article/jyxx/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)')))

In [9]:
energy_carbon_sz_df = ak.energy_carbon_sz()
print(energy_carbon_sz_df)

AttributeError: 'NoneType' object has no attribute 'find_all'

In [10]:
energy_carbon_eu_df = ak.energy_carbon_eu()
print(energy_carbon_eu_df)

AttributeError: 'NoneType' object has no attribute 'find_all'

# Consumer Oil

In [11]:
energy_oil_hist_df = ak.energy_oil_hist()
print(energy_oil_hist_df)

           调整日期  汽油价格  柴油价格   汽油涨跌   柴油涨跌
0    2000-06-06  2935  2430    NaN    NaN
1    2000-07-15  3135  2610  200.0  180.0
2    2000-08-18  3405  2770  270.0  160.0
3    2000-09-20  3615  3070  210.0  300.0
4    2000-10-20  3435  3440 -180.0  370.0
..          ...   ...   ...    ...    ...
276  2024-01-18  8780  7745  -50.0  -50.0
277  2024-02-01  8980  7945  200.0  200.0
278  2024-03-05  9105  8065  125.0  120.0
279  2024-04-02  9305  8255  200.0  190.0
280  2024-04-17  9505  8450  200.0  195.0

[281 rows x 5 columns]


In [12]:
energy_oil_detail_df = ak.energy_oil_detail(date="20240118")
print(energy_oil_detail_df)

            日期   地区       V_0      V_92      V_95      V_89     ZDE_0  \
0   2024-01-18   上海  7.300000  8.120000  7.630000  7.120000 -0.040000   
1   2024-01-18   云南  7.391245  8.382528  7.809826  7.192800 -0.042650   
2   2024-01-18  内蒙古  7.190000  8.110000  7.600000       NaN -0.040000   
3   2024-01-18   北京  7.370000  8.160000  7.660000  7.170000 -0.040000   
4   2024-01-18   吉林  7.240000  8.230000  7.630000       NaN -0.040000   
5   2024-01-18   四川  7.370000  8.290000  7.760000  7.200000 -0.040000   
6   2024-01-18   天津  7.320000  8.090000  7.660000  7.100000 -0.040000   
7   2024-01-18   宁夏  7.200000  7.990000  7.560000  7.140000 -0.050000   
8   2024-01-18   安徽  7.360000  8.160000  7.620000  7.140000 -0.040000   
9   2024-01-18   山东  7.230000  8.180000  7.630000  7.080000 -0.050000   
10  2024-01-18   山西  7.390000  8.220000  7.610000  7.130000 -0.040000   
11  2024-01-18   广东  7.330000  8.320000  7.680000  7.130000 -0.040000   
12  2024-01-18   广西  7.380000  8.340000  7.720000  

# Volatility

In [13]:
article_oman_rv_df = ak.article_oman_rv(symbol="FTSE", index="rk_th2")
print(article_oman_rv_df)

ConnectionError: HTTPSConnectionPool(host='realized.oxford-man.ox.ac.uk', port=443): Max retries exceeded with url: /theme/js/visualization-data.js?20191111113154 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x1255f20f0>: Failed to resolve 'realized.oxford-man.ox.ac.uk' ([Errno 8] nodename nor servname provided, or not known)"))

# Electricity

In [14]:
macro_china_society_electricity_df = ak.macro_china_society_electricity()
print(macro_china_society_electricity_df)

        统计时间       全社会用电量  全社会用电量同比     各行业用电量合计  各行业用电量合计同比    第一产业用电量  \
0    2003.12  188912117.0     15.29  166531685.0       15.77  5958327.0   
1    2004.10  175828690.0     15.17  156100929.0       16.03  4979893.0   
2    2004.11  194584023.0     15.13  172937132.0       15.87  5479353.0   
3     2004.3   48045510.0     15.70   42431023.0       16.41  1125688.0   
4     2004.9  157131146.0     14.92  139454527.0       15.71  4518047.0   
..       ...          ...       ...          ...         ...        ...   
210   2023.7  519650000.0      5.20          NaN         NaN  7160000.0   
211   2023.8  608260000.0      5.00          NaN         NaN  8590000.0   
212   2023.9  686370000.0      5.60          NaN         NaN  9760000.0   
213   2024.2  153160000.0     11.00          NaN         NaN  1920000.0   
214   2024.3  233730000.0      9.80          NaN         NaN  2880000.0   

     第一产业用电量同比      第二产业用电量  第二产业用电量同比      第三产业用电量  第三产业用电量同比  城乡居民生活用电量合计  \
0         0.95  1394

# Options

In [None]:
# 每日统计-上海证券交易所
# 目标地址: http://www.sse.com.cn/assortment/options/date/
# 描述: 上海证券交易所-产品-股票期权-每日统计
# 限量: 单次返回指定 date 的数据
option_daily_stats_sse_df = ak.option_daily_stats_sse(date="20230103")
print(option_daily_stats_sse_df)

   合约标的代码  合约标的名称  合约数量  总成交额     总成交量   认购成交量   认沽成交量   认沽/认购  未平仓合约总数  \
0  510050   50ETF   NaN   NaN  1880165  985368  894797   90.81  1734483   
1  510300  300ETF   NaN   NaN  1120252  584659  535593   91.61  1236419   
2  510500  500ETF   NaN   NaN   514148  249701  264447  105.91   531542   

   未平仓认购合约数  未平仓认沽合约数         交易日  
0    898542    835941  2023-01-03  
1    600556    635863  2023-01-03  
2    245894    285648  2023-01-03  


In [17]:
# 接口: option_daily_stats_szse
# 目标地址: https://investor.szse.cn/market/option/day/index.html
# 描述: 深圳证券交易所-市场数据-期权数据-日度概况
# 限量: 单次返回指定 date 的数据
option_daily_stats_szse_df = ak.option_daily_stats_szse(date="20241225")
print(option_daily_stats_szse_df)

   合约标的代码    合约标的名称     成交量   认购成交量   认沽成交量  认沽/认购持仓比  未平仓合约总数  未平仓认购合约数  \
0  159901  深证100ETF   47378   23427   23951     84.43    78262     42435   
1  159915    创业板ETF  989120  526910  462210     81.64   785530    432475   
2  159919  沪深300ETF  123436   71481   51955     78.29   134769     75590   
3  159922  中证500ETF  245909  100685  145224     89.66   190874    100640   

   未平仓认沽合约数         交易日  
0     35827  2024-12-25  
1    353055  2024-12-25  
2     59179  2024-12-25  
3     90234  2024-12-25  


In [18]:
# 期权行情日数据
# 接口: option_sse_daily_sina
# 目标地址: https://stock.finance.sina.com.cn/futures/view/optionsCffexDP.php
# 描述: 期权行情日数据
# 限量: 单次返回期权行情日数据
option_sse_daily_sina_df = ak.option_sse_daily_sina(symbol="10002273")
print(option_sse_daily_sina_df)

            日期      开盘      最高      最低      收盘       成交量
0   2020-02-04  0.2200  0.2870  0.2151  0.2850  13729899
1   2020-02-05  0.2868  0.3159  0.2711  0.3010  10716172
2   2020-02-06  0.3050  0.3581  0.2939  0.3420   8849637
3   2020-02-07  0.3265  0.3416  0.3110  0.3410   3538617
4   2020-02-10  0.3251  0.3389  0.3117  0.3390   3569910
5   2020-02-11  0.3402  0.3737  0.3391  0.3621   4518172
6   2020-02-12  0.3600  0.3776  0.3550  0.3721   1020605
7   2020-02-13  0.3756  0.3898  0.3556  0.3610   1956981
8   2020-02-14  0.3662  0.3848  0.3626  0.3833   1130476
9   2020-02-17  0.3800  0.4415  0.3800  0.4359   2606707
10  2020-02-18  0.4250  0.4334  0.4042  0.4121    827370
11  2020-02-19  0.4066  0.4300  0.4051  0.4122    720243
12  2020-02-20  0.4200  0.4666  0.4050  0.4585   2621812
13  2020-02-21  0.4550  0.4799  0.4477  0.4550   1892291
14  2020-02-24  0.4373  0.4457  0.4106  0.4157   2880770
15  2020-02-25  0.3822  0.4050  0.3660  0.3987    898663
16  2020-02-26  0.3725  0.4157 

In [19]:
# 期权价值分析-金融期权
# 接口: option_value_analysis_em
# 目标地址: https://data.eastmoney.com/other/valueAnal.html
# 描述: 东方财富网-数据中心-特色数据-期权价值分析
# 限量: 单次返回所有数据
option_value_analysis_em_df = ak.option_value_analysis_em()
print(option_value_analysis_em_df)

         期权代码           期权名称     最新价    时间价值   内在价值  隐含波动率    理论价格     标的名称  \
0    10008568   50ETF沽6月2950  0.2666  0.0706  0.196  20.80  0.2598  上证50ETF   
1    10008567   50ETF购6月2950  0.1038  0.1038  0.000  22.22  0.0866  上证50ETF   
2    10008558   50ETF沽6月2900  0.2250  0.0790  0.146  19.67  0.2265  上证50ETF   
3    10008557   50ETF沽6月2850  0.1943  0.0983  0.096  19.71  0.1954  上证50ETF   
4    10008556   50ETF沽6月2800  0.1665  0.1205  0.046  19.82  0.1668  上证50ETF   
..        ...            ...     ...     ...    ...    ...     ...      ...   
577  10008395  50ETF购1月2695A  0.0891  0.0301  0.059  17.70  0.0949  上证50ETF   
578  10008394  50ETF购1月2646A  0.1267  0.0187  0.108  18.41  0.1298  上证50ETF   
579  10008393  50ETF购1月2597A  0.1685  0.0115  0.157  19.04  0.1698  上证50ETF   
580  10008392  50ETF购1月2548A  0.2179  0.0119  0.206  23.58  0.2135  上证50ETF   
581  10008391  50ETF购1月2499A  0.2674  0.0124  0.255  28.22  0.2598  上证50ETF   

     标的最新价  标的近一年波动率         到期日  
0    2.754     1

In [40]:
option_cffex_sz50_list_sina_df = ak.option_cffex_sz50_list_sina()
print(option_cffex_sz50_list_sina_df)

{'上证50指数': ['ho2406', 'ho2405', 'ho2409', 'ho2412', 'ho2503', 'ho2407']}


In [48]:
option_cffex_sz50_spot_sina_df = ak.option_cffex_sz50_spot_sina(symbol="ho2212")
print(option_cffex_sz50_spot_sina_df)

KeyError: 'up'

In [39]:
# ho2303C2325   ho2303P2325
option_cffex_sz50_daily_sina_df = ak.option_cffex_sz50_daily_sina(symbol="ho2407P2550")
print(option_cffex_sz50_daily_sina_df)

         date   open   high    low  close  volume
0  2024-04-22  177.8  182.8  176.0  182.8       8
1  2024-04-23  186.2  189.4  186.2  188.8       3
2  2024-04-24  187.6  187.6  187.6  187.6       1


In [3]:
# 50ETF 期权波动率指数
# 接口: index_option_50etf_qvix
# 目标地址: http://1.optbbs.com/s/vix.shtml?50ETF
# 描述: 50ETF 期权波动率指数 QVIX; 又称中国版的恐慌指数
# 限量: 单次返回所有数据
# TODO add to ETL collection
index_option_50etf_qvix_df = ak.index_option_50etf_qvix()
print(index_option_50etf_qvix_df)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa1 in position 147398: invalid start byte

In [6]:
# 300ETF 期权波动率指数
# 接口: index_option_300etf_qvix
# 目标地址: https://1.optbbs.com/s/vix.shtml?300ETF
# 描述: 300ETF 期权波动率指数 QVIX
# 限量: 单次返回所有数据
# TODO add to ETL collection
index_option_300etf_qvix_df = ak.index_option_300etf_qvix()
print(index_option_300etf_qvix_df)

            date   open   high    low  close
0     2015-02-09    NaN    NaN    NaN    NaN
1     2015-02-10    NaN    NaN    NaN    NaN
2     2015-02-11    NaN    NaN    NaN    NaN
3     2015-02-12    NaN    NaN    NaN    NaN
4     2015-02-13    NaN    NaN    NaN    NaN
...          ...    ...    ...    ...    ...
2240  2024-04-29  16.27  18.20  16.20  16.88
2241  2024-04-30  17.27  17.29  15.86  15.91
2242  2024-05-06  17.23  17.30  17.00  17.00
2243  2024-05-07  16.65  16.77  16.20  16.20
2244  2024-05-08  16.15  16.15  15.83  16.04

[2245 rows x 5 columns]


# Interbank Interest Rate

In [1]:
import akshare as ak

rate_interbank_df = ak.rate_interbank(
    market="上海银行同业拆借市场", symbol="Shibor人民币", indicator="3月"
)
print(rate_interbank_df)

                                             

             报告日      利率    涨跌
0     2006-10-08  2.6110  0.00
1     2006-10-09  2.6248  1.38
2     2006-10-10  2.6325  0.77
3     2006-10-11  2.6338  0.13
4     2006-10-12  2.6380  0.42
...          ...     ...   ...
4389  2024-04-30  2.0020 -0.30
4390  2024-05-06  2.0010 -0.10
4391  2024-05-07  1.9960 -0.50
4392  2024-05-08  1.9900 -0.60
4393  2024-05-09  1.9910  0.10

[4394 rows x 3 columns]




# Fixing Repo Rate (回购定盘利率)

In [3]:
repo_rate_hist_df = ak.repo_rate_hist(start_date="20231001", end_date="20240509")
print(repo_rate_hist_df)

           date  FR001  FR007  FR014  FDR001  FDR007  FDR014
0    2023-10-07   1.70   1.85   1.82    1.70  1.8224  1.8200
1    2023-10-08   1.56   1.80   1.78    1.53  1.7500  1.7500
2    2023-10-09   1.81   2.01   1.85    1.74  1.8000  1.8000
3    2023-10-10   1.93   2.05   2.22    1.84  1.8600  1.9000
4    2023-10-11   1.96   2.14   2.20    1.87  1.9200  2.0000
..          ...    ...    ...    ...     ...     ...     ...
144  2024-04-30   2.05   2.10   2.10    1.96  2.1200  2.1500
145  2024-05-06   1.89   1.95   2.00    1.78  1.8700  1.9000
146  2024-05-07   1.85   1.90   1.95    1.75  1.8400  1.8500
147  2024-05-08   1.88   1.90   1.95    1.85  1.8600  1.8500
148  2024-05-09   1.83   1.90   1.95    1.73  1.8500  1.8929

[149 rows x 7 columns]


In [4]:
repo_rate_query_df = ak.repo_rate_query(symbol="回购定盘利率")
print(repo_rate_query_df)

           date  FR001  FR007  FR014
0    2021-05-10   1.67   1.90   2.05
1    2021-05-11   1.95   2.00   2.00
2    2021-05-12   1.96   2.00   2.10
3    2021-05-13   1.97   2.07   2.05
4    2021-05-14   1.82   2.05   2.10
..          ...    ...    ...    ...
745  2024-04-30   2.05   2.10   2.10
746  2024-05-06   1.89   1.95   2.00
747  2024-05-07   1.85   1.90   1.95
748  2024-05-08   1.88   1.90   1.95
749  2024-05-09   1.83   1.90   1.95

[750 rows x 4 columns]


# Finally

In [None]:
# calculate and print outthe time taken to execute all the codes above
print(f"Time taken: {time.time() - t_start} seconds")