# Import

In [2]:
import os
import sys
import logging
import time
import math
import multiprocessing
import pandas as pd
import numpy as np
import sqlalchemy
import exchange_calendars as xcals
from dotenv import load_dotenv

# import exchange_calendars as xcals
from datetime import datetime, timedelta

# import pytz
# import pandas as pd
# from IPython.display import display, HTML
from sqlalchemy import create_engine, text
from sqlalchemy.dialects.postgresql import insert
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache

# Init

In [3]:
t_start = time.time()

load_dotenv()  # take environment variables from .env.

# module_path = os.getenv("LOCAL_AKSHARE_DEV_MODULE")
# if module_path is not None and module_path not in sys.path:
#     sys.path.insert(0, module_path)
import akshare as ak  # noqa: E402

print(ak.__version__)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

# Create an engine instance
alchemyEngine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}",
    pool_recycle=3600,
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)

file_handler = logging.FileHandler("etl.log")
console_handler = logging.StreamHandler()

# Step 4: Create a formatter
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")

# Step 5: Attach the formatter to the handlers
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Step 6: Add the handlers to the logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

xshg = xcals.get_calendar("XSHG")

1.13.49


# Helper functions

In [4]:
def update_on_conflict(table, conn, df: pd.DataFrame, primary_keys):
    """
    Insert new records, update existing records without nullifying columns not included in the dataframe
    """
    # Load the table metadata
    table = sqlalchemy.Table(table, sqlalchemy.MetaData(), autoload_with=conn)

    # Create an insert statement from the DataFrame records
    insert_stmt = insert(table).values(df.to_dict(orient="records"))
    # Build a dictionary of column values to be updated, excluding primary keys and non-existent columns
    update_dict = {
        c.name: insert_stmt.excluded[c.name]
        for c in table.columns
        if c.name in df.columns and c.name not in primary_keys
    }
    # Construct the on_conflict_do_update statement
    on_conflict_stmt = insert_stmt.on_conflict_do_update(
        index_elements=primary_keys, set_=update_dict
    )
    # Execute the on_conflict_do_update statement
    conn.execute(on_conflict_stmt)


def ignore_on_conflict(table, conn, df, primary_keys):
    """
    Insert new records, ignore existing records
    """
    table = sqlalchemy.Table(table, sqlalchemy.MetaData(), autoload_with=conn)
    insert_stmt = insert(table).values(df.to_dict(orient="records"))
    on_conflict_stmt = insert_stmt.on_conflict_do_nothing(index_elements=primary_keys)
    conn.execute(on_conflict_stmt)


def saveAsCsv(file_name_main: str, df):
    """
    Save dataframe to CSV file
    """
    # save to file
    # Get the current timestamp to append to the filename
    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    # Save the dataframe to a csv file with timestamp as suffix. Need to properly encode and display Chinese characters.
    df.to_csv(f"{file_name_main}_{current_time}.csv", encoding="utf_8_sig", index=False)


@lru_cache()
def last_trade_date():
    current_date = datetime.now().date()
    # Iterate backwards from current_date until a valid session is found
    last_session = current_date
    while not xshg.is_session(last_session):
        last_session -= timedelta(days=1)
    return last_session

# fund_etf_spot_em 

In [4]:
# Get laste fund / ETF data set for today (or latest trading date), and persists into database.

df = ak.fund_etf_spot_em()
df = df[
    [
        "代码",
        "名称",
        "最新价",
        "IOPV实时估值",
        "基金折价率",
        "涨跌额",
        "涨跌幅",
        "成交量",
        "成交额",
        "开盘价",
        "最高价",
        "最低价",
        "昨收",
        "换手率",
        "量比",
        "委比",
        "外盘",
        "内盘",
        "主力净流入-净额",
        "主力净流入-净占比",
        "超大单净流入-净额",
        "超大单净流入-净占比",
        "大单净流入-净额",
        "大单净流入-净占比",
        "中单净流入-净额",
        "中单净流入-净占比",
        "小单净流入-净额",
        "小单净流入-净占比",
        "流通市值",
        "总市值",
        "最新份额",
        "数据日期",
        "更新时间",
    ]
]

saveAsCsv("fund_etf_spot_em", df)

# Rename the columns of df to match the table's column names
df = df.rename(
    columns={
        "数据日期": "date",
        "更新时间": "update_time",
        "代码": "code",
        "名称": "name",
        "最新价": "latest_price",
        "IOPV实时估值": "iopv",
        "基金折价率": "fund_discount_rate",
        "涨跌额": "change_amount",
        "涨跌幅": "change_rate",
        "成交量": "volume",
        "成交额": "turnover",
        "开盘价": "opening_price",
        "最高价": "highest_price",
        "最低价": "lowest_price",
        "昨收": "previous_close",
        "换手率": "turnover_rate",
        "量比": "volume_ratio",
        "委比": "order_ratio",
        "外盘": "external_disc",
        "内盘": "internal_disc",
        "主力净流入-净额": "main_force_net_inflow_amount",
        "主力净流入-净占比": "main_force_net_inflow_ratio",
        "超大单净流入-净额": "super_large_net_inflow_amount",
        "超大单净流入-净占比": "super_large_net_inflow_ratio",
        "大单净流入-净额": "large_net_inflow_amount",
        "大单净流入-净占比": "large_net_inflow_ratio",
        "中单净流入-净额": "medium_net_inflow_amount",
        "中单净流入-净占比": "medium_net_inflow_ratio",
        "小单净流入-净额": "small_net_inflow_amount",
        "小单净流入-净占比": "small_net_inflow_ratio",
        "流通市值": "circulating_market_value",
        "总市值": "total_market_value",
        "最新份额": "latest_shares",
    }
)

with alchemyEngine.begin() as conn:
    update_on_conflict("fund_etf_spot_em", conn, df, ["code", "date"])

# fund_etf_perf_em

In [9]:
fund_exchange_rank_em_df = ak.fund_exchange_rank_em()

saveAsCsv("fund_exchange_rank_em", fund_exchange_rank_em_df)

column_mapping = {
    "序号": "id",
    "基金代码": "fundcode",
    "基金简称": "fundname",
    "类型": "type",
    "日期": "date",
    "单位净值": "unitnav",
    "累计净值": "accumulatednav",
    "近1周": "pastweek",
    "近1月": "pastmonth",
    "近3月": "past3months",
    "近6月": "past6months",
    "近1年": "pastyear",
    "近2年": "past2years",
    "近3年": "past3years",
    "今年来": "ytd",
    "成立来": "sinceinception",
    "成立日期": "inceptiondate",
}
fund_exchange_rank_em_df.rename(columns=column_mapping, inplace=True)

# remove rows where `date` is null or NaT
fund_exchange_rank_em_df.dropna(subset=['date'], inplace=True)

with alchemyEngine.begin() as conn:
    update_on_conflict("fund_etf_perf_em", conn, fund_exchange_rank_em_df, ["fundcode"])

# Get a full list of ETF fund

In [6]:
# retrieve list from Sina
fund_etf_category_sina_df = ak.fund_etf_category_sina(symbol="ETF基金")

# keep only 2 columns from `fund_etf_category_sina_df`: 代码, 名称.
# split `代码` values by `exchange code` and `symbol` and store into 2 columns. No need to keep the `代码` column.
# for example: 代码=sz159998, split into `exch=sz`, `symbol=159998`.
df = fund_etf_category_sina_df[["代码", "名称"]].copy()
df.columns = ["code", "name"]
df[["exch", "symbol"]] = df["code"].str.extract(r"([a-z]+)(\d+)")
df.drop(columns=["code"], inplace=True)

# Now, use the update_on_conflict function to insert or update the data
with alchemyEngine.begin() as conn:
    update_on_conflict("fund_etf_list_sina", conn, df, ["exch", "symbol"])

# Get historical trades

In [8]:
end_date = datetime.now().strftime("%Y%m%d")
start_date = (datetime.now() - timedelta(days=20)).strftime("%Y%m%d")
# start_date = '19700101' # For entire history.


# Function to fetch and process ETF data
def fetch_and_process_etf(symbol):
    try:
        df = ak.fund_etf_hist_em(
            symbol=symbol,
            period="daily",
            start_date=start_date,
            end_date=end_date,
            adjust="qfq",
        )

        # if df contains no row at all, return immediately
        if df.empty:
            return None
        
        df["symbol"] = symbol
        df = df.rename(
            columns={
                "日期": "date",
                "开盘": "open",
                "收盘": "close",
                "最高": "high",
                "最低": "low",
                "成交量": "volume",
                "成交额": "turnover",
                "振幅": "amplitude",
                "涨跌幅": "change_rate",
                "涨跌额": "change_amount",
                "换手率": "turnover_rate",
            }
        )
        df = df[
            [
                "symbol",
                "date",
                "open",
                "close",
                "high",
                "low",
                "volume",
                "turnover",
                "amplitude",
                "change_rate",
                "change_amount",
                "turnover_rate",
            ]
        ]
        with alchemyEngine.begin() as conn:
            ignore_on_conflict("fund_etf_daily_em", conn, df, ["symbol", "date"])
    except Exception:
        logging.error(
            f"failed to get daily trade history data for {symbol}", exc_info=True
        )
        return None
    return df


# Fetch the ETF list
etf_list_df = pd.read_sql("SELECT symbol FROM fund_etf_list_sina", alchemyEngine)

# get the number of CPU cores
num_cores = multiprocessing.cpu_count()

# Use ThreadPoolExecutor to fetch data in parallel
with ThreadPoolExecutor(max_workers=num_cores) as executor:
    futures = [
        executor.submit(fetch_and_process_etf, symbol)
        for symbol in etf_list_df["symbol"]
    ]
    results = [future.result() for future in futures]

# Calculate ETF Performance Metrics

## Get historical bond rate (risk-free interest rate)

In [9]:
# start_date = (datetime.now() - timedelta(days=20)).strftime('%Y%m%d')
start_date = None  # For entire history.

bzur = ak.bond_zh_us_rate(start_date)
bzur = bzur.rename(
    columns={
        "日期": "date",
        "中国国债收益率2年": "china_yield_2y",
        "中国国债收益率5年": "china_yield_5y",
        "中国国债收益率10年": "china_yield_10y",
        "中国国债收益率30年": "china_yield_30y",
        "中国国债收益率10年-2年": "china_yield_spread_10y_2y",
        "中国GDP年增率": "china_gdp_growth",
        "美国国债收益率2年": "us_yield_2y",
        "美国国债收益率5年": "us_yield_5y",
        "美国国债收益率10年": "us_yield_10y",
        "美国国债收益率30年": "us_yield_30y",
        "美国国债收益率10年-2年": "us_yield_spread_10y_2y",
        "美国GDP年增率": "us_gdp_growth",
    }
)
with alchemyEngine.begin() as conn:
    ignore_on_conflict("bond_metrics_em", conn, bzur, ["date"])

                                               

## Calc / Update metrics in fund_etf_perf_em table

In [6]:
interval = 250  # assume 250 trading days annualy
end_date = last_trade_date()
# start_date = (end_date - timedelta(days=interval)).strftime('%Y%m%d')
# start_date = '19700101' # For entire history.

# load historical data from daily table and calc metrics, then update perf table
def update_etf_metrics(symbol):
    try:
        with alchemyEngine.begin() as conn:
            # load the latest (top) `interval` records of historical market data records from `fund_etf_daily_em` table for `symbol`, order by `date`.
            # select columns: date, change_rate
            query = """SELECT date, change_rate FROM fund_etf_daily_em WHERE symbol = '{}' ORDER BY date DESC LIMIT {}""".format(
                symbol, interval
            )
            df = pd.read_sql(query, conn, parse_dates=["date"])
    
            # get oldest df['date'] as state_date
            start_date = df['date'].iloc[-1]
            # get 2-years CN bond IR as risk-free IR from bond_metrics_em table. 1-year series (natural dates).
            # select date, china_yield_2y from table `bond_metrics_em`, where date is between start_date and end_date (inclusive). Load into a dataframe.
            query = """SELECT date, china_yield_2y FROM bond_metrics_em WHERE date BETWEEN '{}' AND '{}' and china_yield_2y <> 'nan'""".format(
                start_date, end_date
            )
            bme_df = pd.read_sql(query, conn, parse_dates=["date"])
            # Convert annualized rate to a daily rate
            bme_df["china_yield_2y_daily"] = bme_df["china_yield_2y"] / 365.25

            # merge df with bme_df by matching dates.
            df = pd.merge_asof(
                df.sort_values("date"),
                bme_df.sort_values("date"),
                on="date",
                direction="backward",
            ).dropna(subset=["change_rate"])

            # calculate the Sharpe ratio, Sortino ratio, and max drawdown with the time series data inside df.
            df["excess_return"] = df["change_rate"] - df["china_yield_2y_daily"]
            # Annualize the excess return
            annualized_excess_return = np.mean(df["excess_return"])

            # Calculate the standard deviation of the excess returns
            std_dev = df["excess_return"].std()

            # Sharpe ratio
            sharpe_ratio = annualized_excess_return / std_dev

            # Calculate the downside deviation (Sortino ratio denominator)
            downside_dev = df[df["excess_return"] < 0]["excess_return"].std()

            # Sortino ratio
            sortino_ratio = (
                annualized_excess_return / downside_dev if downside_dev > 0 else None
            )

            # To calculate max drawdown, get the cummulative_returns
            df["cumulative_returns"] = np.cumprod(1 + df["change_rate"]/100.) - 1
            # Calculate the maximum cumulative return up to each point
            peak = np.maximum.accumulate(df["cumulative_returns"])
            # Calculate drawdown as the difference between the current value and the peak
            drawdown = (df["cumulative_returns"] - peak) / (1 + peak) * 100
            # Calculate max drawdown
            max_drawdown = np.min(drawdown)  # This is a negative number

            # update the `sharperatio, sortinoratio, maxdrawdown` columns for `symbol` in the table `fund_etf_perf_em` using the calculated metrics.
            update_query = text(
                "UPDATE fund_etf_perf_em SET sharperatio = :sharperatio, sortinoratio = :sortinoratio, maxdrawdown = :maxdrawdown WHERE fundcode = :fundcode"
            )
            params = {
                "sharperatio": round(sharpe_ratio, 2)
                if sharpe_ratio is not None and math.isfinite(sharpe_ratio)
                else None,
                "sortinoratio": round(sortino_ratio, 2)
                if sortino_ratio is not None and math.isfinite(sortino_ratio)
                else None,
                "maxdrawdown": round(max_drawdown, 2)
                if math.isfinite(max_drawdown)
                else None,
                "fundcode": symbol,
            }
            conn.execute(update_query, params)

    except Exception:
        logging.error(f"failed to update ETF metrics for {symbol}", exc_info=True)
        return None
    return df


# Fetch the ETF list
etf_list_df = pd.read_sql("SELECT symbol FROM fund_etf_list_sina", alchemyEngine)

# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_etf_metrics, symbol) for symbol in etf_list_df["symbol"]
    ]
    results = [future.result() for future in futures]

# China Market Indices

In [35]:
cn_index_list = [
    ("上证系列指数", "sh"),
    ("深证系列指数", "sz"),
    # ("指数成份", ""),
    ("中证系列指数", "csi"),
]

def update_cn_indices_em(symbol, src):
    try:
        szise = ak.stock_zh_index_spot_em(symbol)
        szise = szise.rename(
            columns={
                "序号": "seq",
                "代码": "symbol",
                "名称": "name",
                "最新价": "close",
                "涨跌幅": "change_rate",
                "涨跌额": "change_amount",
                "成交量": "volume",
                "成交额": "amount",
                "振幅": "amplitude",
                "最高": "high",
                "最低": "low",
                "今开": "open",
                "昨收": "prev_close",
                "量比": "volume_ratio",
            }
        )
        szise["src"] = src
        with alchemyEngine.begin() as conn:
            update_on_conflict("index_spot_em", conn, szise, ["symbol"])

    except Exception:
        logging.error(f"failed to update index_spot_em for {symbol}", exc_info=True)
        return None
    return szise


# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_cn_indices_em, symbol, src) for symbol, src in cn_index_list
    ]
    results = [future.result() for future in futures]

In [5]:
# get daily historical data
def update_cn_indices(symbol, src):
    try:
        szide = ak.stock_zh_index_daily_em(f"{src}{symbol}")

        # if shide is empty, return immediately
        if szide.empty:
            return None

        szide["symbol"] = symbol
        with alchemyEngine.begin() as conn:
            ignore_on_conflict("index_daily_em", conn, szide, ["symbol", "date"])

    except Exception:
        logging.error(f"failed to update index_daily_em for {symbol}", exc_info=True)
        return None
    return szide


conn = alchemyEngine.connect()
cn_index_fulllist = pd.read_sql("SELECT src, symbol FROM index_spot_em", conn)
conn.close()

# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_cn_indices, symbol, src)
        for symbol, src in zip(cn_index_fulllist["symbol"], cn_index_fulllist["src"])
    ]
    results = [future.result() for future in futures]

ERROR:root:failed to update index_daily_em for 395033
Traceback (most recent call last):
  File "/var/folders/fb/phz99gmn7cld34rh4f6ppmlw0000gn/T/ipykernel_43190/5112864.py", line 4, in update_cn_indices
    szide = ak.stock_zh_index_daily_em(f"{src}{symbol}")
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/jx/ProgramData/python/akshare/akshare/index/index_stock_zh.py", line 361, in stock_zh_index_daily_em
    temp_df.columns = ["date", "open", "close", "high", "low", "volume", "amount", "_"]
    ^^^^^^^^^^^^^^^
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/pandas/core/generic.py", line 6218, in __setattr__
    return object.__setattr__(self, name, value)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "properties.pyx", line 69, in pandas._libs.properties.AxisProperty.__set__
  File "/Users/jx/.pyenv/versions/3.12.2/envs/venv_3.12.2/lib/python3.12/site-packages/pandas/core/generic.py", line 767, in _set_axis
    s

# China Stock Markets

In [4]:
# Set proxy settings
proxy = "http://localhost:8089"
os.environ["http_proxy"] = proxy
os.environ["https_proxy"] = proxy

In [5]:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from requests.packages.urllib3 import disable_warnings

# Disable SSL warnings due to unverified HTTPS requests
disable_warnings(InsecureRequestWarning)

# Save the original requests.Session.send
orig_send = requests.Session.send
# Save the original Session.request method
original_request_method = requests.Session.request


def new_send(*args, **kwargs):
    # Set 'verify' to False to disable SSL verification
    kwargs["verify"] = False
    return orig_send(*args, **kwargs)


def custom_request(self, method, url, **kwargs):
    # Set a custom timeout if not already provided
    kwargs["timeout"] = 300  # Set your desired timeout here, in seconds
    # if "timeout" not in kwargs:
    #     kwargs["timeout"] = 300  # Set your desired timeout here, in seconds
    return original_request_method(self, method, url, **kwargs)


# Monkey-patch requests with our new_send function
requests.Session.send = new_send
# Monkey-patch the requests.Session.request method
requests.Session.request = custom_request

# Now, when the third-party library makes a request, SSL verification will be disabled

In [5]:
# add retry logic to the following action when exception occurs

retry_attempts = 3
retry_delay = 5  # seconds

for attempt in range(retry_attempts):
    try:
        stock_zh_a_spot_em_df = ak.stock_zh_a_spot_em()
        break
    except Exception as e:
        print(f'Attempt {attempt+1} failed with error: {e}')
        if attempt < retry_attempts - 1:
            print(f'Retrying in {retry_delay} seconds...')
            time.sleep(retry_delay)
        else:
            raise
# stock_zh_a_spot_em_df = ak.stock_zh_a_spot_em()


In [6]:
stock_zh_a_spot_em_df

Unnamed: 0,序号,代码,名称,最新价,涨跌幅,涨跌额,成交量,成交额,振幅,最高,...,量比,换手率,市盈率-动态,市净率,总市值,流通市值,涨速,5分钟涨跌,60日涨跌幅,年初至今涨跌幅
0,1,301121,紫建电子,39.00,20.00,6.50,81730.0,3.042025e+08,20.43,39.00,...,2.46,27.65,62.87,1.68,2.761324e+09,1.152911e+09,0.00,0.00,16.45,-6.16
1,2,300891,惠云钛业,9.24,20.00,1.54,345683.0,3.152275e+08,12.60,9.24,...,9.28,10.40,90.06,2.82,3.696025e+09,3.071528e+09,0.00,0.00,9.74,-4.74
2,3,300644,南京聚隆,21.60,20.00,3.60,187403.0,4.012109e+08,9.06,21.60,...,4.18,21.94,25.47,2.63,2.328612e+09,1.845021e+09,0.00,0.00,28.27,4.10
3,4,300255,常山药业,11.77,19.98,1.96,452180.0,5.298323e+08,4.79,11.77,...,1.74,4.94,-20.89,4.06,1.081735e+10,1.077949e+10,0.00,0.00,19.25,-9.53
4,5,301395,仁信新材,17.42,19.97,2.90,59264.0,1.001740e+08,12.40,17.42,...,7.04,16.36,83.66,1.52,2.524506e+09,6.311266e+08,0.00,0.00,7.73,-7.39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5602,5603,300282,*ST三盛,0.97,-11.01,-0.12,269801.0,2.746221e+07,12.84,1.09,...,0.85,7.21,-6.90,0.28,3.630773e+08,3.630773e+08,1.04,0.00,-62.84,-62.11
5603,5604,300506,名家汇,1.57,-11.80,-0.21,553684.0,8.883187e+07,14.04,1.76,...,0.87,9.67,-7.57,2.65,1.092087e+09,8.988524e+08,0.00,0.00,-60.25,-68.09
5604,5605,300116,保力新,0.65,-12.16,-0.09,3436568.0,2.370547e+08,13.51,0.74,...,1.95,8.37,-28.41,11.41,2.876634e+09,2.667500e+09,0.00,-1.52,-52.90,-56.08
5605,5606,300799,*ST左江,7.06,-12.95,-1.05,114314.0,8.546251e+07,10.97,7.91,...,1.20,16.92,-5.55,1.24,7.203318e+08,4.770795e+08,0.00,-0.56,-84.02,-75.49


In [None]:
stock_zh_a_spot_em_df.rename(
    columns={
        "序号": "serial_no",
        "代码": "code",
        "名称": "name",
        "最新价": "latest_price",
        "涨跌幅": "price_change_pct",
        "涨跌额": "price_change_amt",
        "成交量": "volume",
        "成交额": "turnover",
        "振幅": "amplitude",
        "最高": "highest",
        "最低": "lowest",
        "今开": "open_today",
        "昨收": "close_yesterday",
        "量比": "volume_ratio",
        "换手率": "turnover_rate",
        "市盈率-动态": "pe_ratio_dynamic",
        "市净率": "pb_ratio",
        "总市值": "total_market_value",
        "流通市值": "circulating_market_value",
        "涨速": "rise_speed",
        "5分钟涨跌": "five_min_change",
        "60日涨跌幅": "sixty_day_change_pct",
        "年初至今涨跌幅": "ytd_change_pct",
    },
    inplace=True,
)

with alchemyEngine.begin() as conn:
    update_on_conflict("stock_zh_a_spot_em", conn, stock_zh_a_spot_em_df, ["code"])

In [7]:
def retry_ak_stock_zh_a_hist(symbol, period, start_date, end_date, adjust):
    retry_attempts = 3
    retry_delay = 5  # seconds
    for attempt in range(retry_attempts):
        try:
            stock_zh_a_hist_df = ak.stock_zh_a_hist(
                symbol, period, start_date, end_date, adjust
            )
            return stock_zh_a_hist_df
        except Exception as e:
            print(f'Attempt {attempt+1} failed with error: {e}')
            if attempt < retry_attempts - 1:
                print(f'Retrying in {retry_delay} seconds...')
                time.sleep(retry_delay)
            else:
                raise

In [8]:
# get daily historical data
def stock_zh_a_hist(code):

    try:
        stock_zh_a_hist_df = retry_ak_stock_zh_a_hist(
            symbol=code,
            period="daily",
            start_date="19700101",   ### entire history
            end_date="20240401",
            adjust="hfq",
        )

        # if shide is empty, return immediately
        if stock_zh_a_hist_df.empty:
            return None

        stock_zh_a_hist_df["symbol"] = code
        ## rename stock_zh_a_hist_df columns from Chinese to English:
        stock_zh_a_hist_df.rename(
            columns={
                "日期": "date",
                "开盘": "open",
                "收盘": "close",
                "最高": "high",
                "最低": "low",
                "成交量": "volume",
                "成交额": "turnover",
                "振幅": "amplitude",
                "涨跌幅": "change_rate",
                "涨跌额": "change_amt",
                "换手率": "turnover_rate",
            },
            inplace=True,
        )

        with alchemyEngine.begin() as conn:
            ignore_on_conflict(
                "stock_zh_a_hist_em", conn, stock_zh_a_hist_df, ["symbol", "date"]
            )

    except Exception:
        logging.error(f"failed to update stock_zh_a_hist_em for {code}", exc_info=True)
        return None
    return stock_zh_a_hist_df


# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(stock_zh_a_hist, code) for code in stock_zh_a_spot_em_df["code"]
    ]
    results = [future.result() for future in futures]

# Get HK Market Indices

In [4]:
# refresh the list

hk_index_list_df = ak.stock_hk_index_spot_em()
hk_index_list_df = hk_index_list_df.rename(
    columns={
        "序号": "seq",
        "内部编号": "internal_code",
        "代码": "symbol",
        "名称": "name",
        "最新价": "close",
        "涨跌额": "change_amount",
        "涨跌幅": "change_rate",
        "今开": "open",
        "最高": "high",
        "最低": "low",
        "昨收": "prev_close",
        "成交量": "volume",
        "成交额": "amount",
    }
)

# saveAsCsv("hk_index_spot_em", df)

with alchemyEngine.begin() as conn:
    update_on_conflict("hk_index_spot_em", conn, hk_index_list_df, ["symbol"])

In [5]:
# get daily historical data
def update_hk_indices(symbol):
    try:
        shide = ak.stock_hk_index_daily_em(symbol=symbol)

        # if shide is empty, return immediately
        if shide.empty:
            return None
        
        shide["symbol"] = symbol
        shide = shide.rename(
            columns={
                "latest":"close",
            }
        )
        with alchemyEngine.begin() as conn:
            ignore_on_conflict("hk_index_daily_em", conn, shide, ["symbol", "date"])

    except Exception:
        logging.error(f"failed to update hk_index_daily_em for {symbol}", exc_info=True)
        return None
    return shide

# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [
        executor.submit(update_hk_indices, symbol) for symbol in hk_index_list_df["symbol"]
    ]
    results = [future.result() for future in futures]

# Get US market indices

In [10]:
idx_symbol_list = [".IXIC", ".DJI", ".INX", ".NDX"]


def update_us_indices(symbol):
    try:
        iuss = ak.index_us_stock_sina(symbol=symbol)
        iuss['symbol'] = symbol
        with alchemyEngine.begin() as conn:
            update_on_conflict("us_index_daily_sina", conn, iuss, ["symbol", "date"])

    except Exception:
        logging.error(
            f"failed to update us_index_daily_sina for {symbol}", exc_info=True
        )
        return None
    return iuss


# get the number of CPU cores
num_proc = int((multiprocessing.cpu_count() + 1) / 2.0)

# Use ThreadPoolExecutor to calculate metrics in parallel
with ThreadPoolExecutor(max_workers=num_proc) as executor:
    futures = [executor.submit(update_us_indices, symbol) for symbol in idx_symbol_list]
    results = [future.result() for future in futures]

# China Bond

In [1]:
bond_zh_hs_spot_df = ak.bond_zh_hs_spot()
print(bond_zh_hs_spot_df)

  0%|          | 0/148 [00:00<?, ?it/s]

             代码      名称      最新价    涨跌额    涨跌幅     买入       卖出       昨收  \
0      sh010107   21国债⑺  100.010    0.0    0.0    0.0    0.000  100.010   
1      sh010303   03国债⑶  100.010    0.0    0.0    0.0    0.000  100.010   
2      sh010504   05国债⑷  103.017  0.267   0.26  100.0  103.095  102.750   
3      sh010512   05国债⑿  100.050    0.0    0.0    0.0    0.000  100.050   
4      sh010609   06国债⑼  100.000    0.0    0.0    0.0    0.000  100.000   
...         ...     ...      ...    ...    ...    ...      ...      ...   
11021  sz149995  22赣水Y1    0.000  0.000  0.000    0.0    0.000  100.629   
11022  sz149996  22华股02    0.000  0.000  0.000    0.0    0.000  100.706   
11023  sz149997  22穗交Y1    0.000  0.000  0.000    0.0    0.000  102.201   
11024  sz149998  22知投G2    0.000  0.000  0.000    0.0    0.000  100.361   
11025  sz149999  22电建04    0.000  0.000  0.000    0.0    0.000  100.769   

          今开     最高     最低   成交量     成交额  
0        0.0    0.0    0.0     0       0  
1        0.0 

In [7]:
bond_zh_hs_daily_df = ak.bond_zh_hs_daily(symbol="sz149999")
print(bond_zh_hs_daily_df)

          date     open     high      low    close   volume
0   2022-08-03  100.000  100.000  100.000  100.000  1200000
1   2022-08-04  100.000  100.000  100.000  100.000   500000
2   2022-08-15  100.277  100.277  100.277  100.277   800000
3   2022-11-23   97.736   97.743   97.736   97.743  1000000
4   2022-12-07   97.779   97.779   97.779   97.779    50000
5   2022-12-08   97.779   97.780   97.779   97.780   100000
6   2023-01-12   96.769   96.769   96.769   96.769    20000
7   2023-02-02   96.804   96.804   96.804   96.804   280000
8   2023-02-08   96.881   96.882   96.881   96.882  2000000
9   2023-02-09   97.324   97.331   97.324   97.331   300000
10  2023-02-13   97.281   97.281   97.281   97.281   500000
11  2023-03-15   98.339   98.340   98.339   98.340   400000
12  2023-04-26   99.916   99.923   99.916   99.923   800000
13  2023-06-30  100.099  100.099  100.099  100.099   300000
14  2023-07-03  100.151  100.151  100.151  100.151   200000
15  2023-07-05  100.138  100.138  100.13

In [9]:
# indicator="财富"; choice of {"全价", "净价", "财富", "平均市值法久期", "平均现金流法久期", "平均市值法凸性", "平均现金流法凸性", "平均现金流法到期收益率", "平均市值法到期收益率", "平均基点价值", "平均待偿期", "平均派息率", "指数上日总市值", "财富指数涨跌幅", "全价指数涨跌幅", "净价指数涨跌幅", "现券结算量"}
# period="总值"; choice of {"总值", "1年以下", "1-3年", "3-5年", "5-7年", "7-10年", "10年以上", "0-3个月", "3-6个月", "6-9个月", "9-12个月", "0-6个月", "6-12个月"}
bond_new_composite_index_cbond_df = ak.bond_new_composite_index_cbond(
    indicator="全价", period="10年以上"
)
print(bond_new_composite_index_cbond_df)

            date     value
0     2002-01-04  100.8196
1     2002-01-07  101.0637
2     2002-01-08  100.3187
3     2002-01-09  100.5000
4     2002-01-10  100.6126
...          ...       ...
5580  2024-04-22  132.0262
5581  2024-04-23  132.5291
5582  2024-04-24  131.8973
5583  2024-04-25  131.4804
5584  2024-04-26  130.9411

[5585 rows x 2 columns]


In [21]:
bond_new_composite_index_cbond_df = ak.bond_new_composite_index_cbond(
    indicator="指数上日总市值", period="0-3个月"
)
print(bond_new_composite_index_cbond_df)

KeyError: 'ZSZSZ_07'

# Currencies

In [5]:
ret = ak.currency_name_code()
ret

AttributeError: 'NoneType' object has no attribute 'find_all'

In [12]:
ret = ak.currency_pair_map()
ret

  0%|          | 0/5 [00:00<?, ?it/s]

                                             

KeyError: '美元'

In [14]:
currency_hist_df = ak.currency_hist(
    symbol="usd-jpy", period="每日", start_date="20050101", end_date="20220808"
)
print(currency_hist_df)

AttributeError: 'NoneType' object has no attribute 'text'

In [10]:
currency_boc_safe_df = ak.currency_boc_safe()
print(currency_boc_safe_df)

              日期      美元      欧元      日元       港元      英镑      澳元    新西兰元  \
0     1994-01-01  870.00     NaN  7.7800  112.660     NaN     NaN     NaN   
1     1994-01-03  870.00     NaN  7.7800  112.660     NaN     NaN     NaN   
2     1994-01-04  870.00     NaN  7.7196  112.660     NaN     NaN     NaN   
3     1994-01-05  870.00     NaN  7.7196  112.660     NaN     NaN     NaN   
4     1994-01-06  870.00     NaN  7.7196  112.660     NaN     NaN     NaN   
...          ...     ...     ...     ...      ...     ...     ...     ...   
7459  2024-04-15  709.79  761.79  4.6677   90.577  891.63  464.51  426.80   
7460  2024-04-16  710.28  759.69  4.6419   90.705  889.83  461.51  422.88   
7461  2024-04-17  710.25  759.73  4.6333   90.690  889.00  460.63  422.18   
7462  2024-04-18  710.20  759.80  4.6363   90.707  888.54  459.29  421.49   
7463  2024-04-19  710.46  758.73  4.6329   90.711  888.70  458.70  421.03   

        新加坡元    瑞士法郎  ...     里亚尔       福林     兹罗提   丹麦克朗    瑞典克朗    挪威克朗  

In [12]:
currency_boc_safe_df.columns

Index(['日期', '美元', '欧元', '日元', '港元', '英镑', '澳元', '新西兰元', '新加坡元', '瑞士法郎', '加元',
       '林吉特', '卢布', '兰特', '韩元', '迪拉姆', '里亚尔', '福林', '兹罗提', '丹麦克朗', '瑞典克朗',
       '挪威克朗', '里拉', '比索', '泰铢', '澳门元'],
      dtype='object')

In [13]:
currency_boc_safe_df.rename(
    columns={
        "日期": "Date",
        "美元": "USD",
        "欧元": "EUR",
        "日元": "JPY",
        "港元": "HKD",
        "英镑": "GBP",
        "澳元": "AUD",
        "新西兰元": "NZD",
        "新加坡元": "SGD",
        "瑞士法郎": "CHF",
        "加元": "CAD",
        "林吉特": "MYR",
        "卢布": "RUB",
        "兰特": "ZAR",
        "韩元": "KRW",
        "迪拉姆": "AED",
        "里亚尔": "QAR",
        "福林": "HUF",
        "兹罗提": "PLN",
        "丹麦克朗": "DKK",
        "瑞典克朗": "SEK",
        "挪威克朗": "NOK",
        "里拉": "TRY",
        "比索": "PHP",
        "泰铢": "THB",
        "澳门元": "MOP",
    },
    inplace=True,
)

In [15]:
with alchemyEngine.begin() as conn:
    update_on_conflict("currency_boc_safe", conn, currency_boc_safe_df, ["date"])

# SGE Spot

In [16]:
ssts = ak.spot_symbol_table_sge()

In [17]:
ssts.rename(columns={"序号": "serial", "品种": "product"}, inplace=True)

Unnamed: 0,序号,品种
0,1,Au99.99
1,2,Au99.95
2,3,Au100g
3,4,Pt99.95
4,5,Ag(T+D)
5,6,Au(T+D)
6,7,mAu(T+D)
7,8,Au(T+N1)
8,9,Au(T+N2)
9,10,Ag99.99


In [18]:
with alchemyEngine.begin() as conn:
    update_on_conflict("spot_symbol_table_sge", conn, ssts, ["product"])

In [23]:
symbol = "Au99.95"
spot_hist_sge_df = ak.spot_hist_sge(symbol=symbol)

In [24]:
spot_hist_sge_df.insert(0, "symbol", symbol)

In [25]:
spot_hist_sge_df

Unnamed: 0,symbol,date,open,close,high,low
0,Au99.95,2016-12-19,262.90,262.80,262.30,262.94
1,Au99.95,2016-12-20,262.80,261.77,261.30,262.80
2,Au99.95,2016-12-21,259.10,260.85,259.10,261.00
3,Au99.95,2016-12-22,260.30,259.40,259.30,260.30
4,Au99.95,2016-12-23,259.65,259.98,259.01,259.99
...,...,...,...,...,...,...
1762,Au99.95,2024-04-15,561.00,560.36,550.01,561.50
1763,Au99.95,2024-04-16,567.00,564.26,562.00,568.00
1764,Au99.95,2024-04-17,566.10,564.50,562.00,566.10
1765,Au99.95,2024-04-18,562.00,563.17,562.00,563.30


In [26]:
with alchemyEngine.begin() as conn:
    update_on_conflict("spot_hist_sge", conn, spot_hist_sge_df, ["symbol", "date"])

# fund flow

In [4]:
# only 100 days history is returned
stock_individual_fund_flow_df = ak.stock_individual_fund_flow(
    stock="600094", market="sh"
)
print(stock_individual_fund_flow_df)

             日期   收盘价   涨跌幅    主力净流入-净额  主力净流入-净占比  超大单净流入-净额  超大单净流入-净占比  \
0    2023-10-26  3.07 -1.92  -8999539.0      -9.56 -3145738.0       -3.34   
1    2023-10-27  3.09  0.65   4255258.0       6.13   237245.0        0.34   
2    2023-10-30  3.16  2.27  -4168290.0      -3.14   286991.0        0.22   
3    2023-10-31  3.20  1.27  -6544701.0      -6.33   629591.0        0.61   
4    2023-11-01  3.23  0.94  -4527106.0      -4.19 -4706897.0       -4.36   
..          ...   ...   ...         ...        ...        ...         ...   
116  2024-04-18  3.79  2.16  12136552.0       9.43  5920911.0        4.60   
117  2024-04-19  3.85  1.58  18959501.0      20.91  9479434.0       10.45   
118  2024-04-22  3.75 -2.60   8203747.0      12.07  3957814.0        5.82   
119  2024-04-23  3.60 -4.00  -9632473.0     -14.93 -5013468.0       -7.77   
120  2024-04-24  3.64  1.11  -3622226.0      -4.45 -2117372.0       -2.60   

      大单净流入-净额  大单净流入-净占比   中单净流入-净额  中单净流入-净占比    小单净流入-净额  小单净流入-净占比  
0 

In [5]:
stock_sector_fund_flow_hist_df = ak.stock_sector_fund_flow_hist(symbol="电源设备")
print(stock_sector_fund_flow_hist_df)

             日期     主力净流入-净额  主力净流入-净占比   超大单净流入-净额  超大单净流入-净占比     大单净流入-净额  \
0    2023-10-26  -73910569.0      -2.44  -1913321.0       -0.06  -71997248.0   
1    2023-10-27   78430051.0       2.18   1727283.0        0.05   76702768.0   
2    2023-10-30 -129940087.0      -3.70 -79973607.0       -2.28  -49966480.0   
3    2023-10-31  -79302200.0      -2.09  -9708024.0       -0.26  -69594176.0   
4    2023-11-01  -19266774.0      -0.69 -15652038.0       -0.56   -3614736.0   
..          ...          ...        ...         ...         ...          ...   
116  2024-04-18 -146704271.0      -3.14 -40991199.0       -0.88 -105713072.0   
117  2024-04-19 -194834566.0      -5.00 -93215766.0       -2.39 -101618800.0   
118  2024-04-22 -154895774.0      -4.25 -29532302.0       -0.81 -125363472.0   
119  2024-04-23  -54391548.0      -1.81 -47213740.0       -1.57   -7177808.0   
120  2024-04-24 -155432353.0      -5.47 -36173745.0       -1.27 -119258608.0   

     大单净流入-净占比     中单净流入-净额  中单净流入-净占比 

# Carbon Emission

In [7]:
energy_carbon_domestic_df = ak.energy_carbon_domestic(symbol="深圳")
print(energy_carbon_domestic_df)

              日期    成交价      成交量           成交额  地点
0     2013-06-19  29.00      0.0      0.000000  深圳
1     2013-06-20  29.00      0.0      0.000000  深圳
2     2013-06-21  29.00      0.0      0.000000  深圳
3     2013-06-22  29.00      0.0      0.000000  深圳
4     2013-06-23  29.00      0.0      0.000000  深圳
...          ...    ...      ...           ...  ..
2154  2021-03-31  53.44      3.0     28.909999  深圳
2155  2021-04-01  51.48    229.0   1697.350049  深圳
2156  2021-04-02  50.47  12562.0  69828.731367  深圳
2157  2021-04-06  47.84   1007.0   5936.440137  深圳
2158  2021-04-07  44.48   1410.0   7453.210097  深圳

[2159 rows x 5 columns]


In [8]:
energy_carbon_bj_df = ak.energy_carbon_bj()
print(energy_carbon_bj_df)

SSLError: HTTPSConnectionPool(host='www.bjets.com.cn', port=443): Max retries exceeded with url: /article/jyxx/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)')))

In [9]:
energy_carbon_sz_df = ak.energy_carbon_sz()
print(energy_carbon_sz_df)

AttributeError: 'NoneType' object has no attribute 'find_all'

In [10]:
energy_carbon_eu_df = ak.energy_carbon_eu()
print(energy_carbon_eu_df)

AttributeError: 'NoneType' object has no attribute 'find_all'

# Consumer Oil

In [11]:
energy_oil_hist_df = ak.energy_oil_hist()
print(energy_oil_hist_df)

           调整日期  汽油价格  柴油价格   汽油涨跌   柴油涨跌
0    2000-06-06  2935  2430    NaN    NaN
1    2000-07-15  3135  2610  200.0  180.0
2    2000-08-18  3405  2770  270.0  160.0
3    2000-09-20  3615  3070  210.0  300.0
4    2000-10-20  3435  3440 -180.0  370.0
..          ...   ...   ...    ...    ...
276  2024-01-18  8780  7745  -50.0  -50.0
277  2024-02-01  8980  7945  200.0  200.0
278  2024-03-05  9105  8065  125.0  120.0
279  2024-04-02  9305  8255  200.0  190.0
280  2024-04-17  9505  8450  200.0  195.0

[281 rows x 5 columns]


In [12]:
energy_oil_detail_df = ak.energy_oil_detail(date="20240118")
print(energy_oil_detail_df)

            日期   地区       V_0      V_92      V_95      V_89     ZDE_0  \
0   2024-01-18   上海  7.300000  8.120000  7.630000  7.120000 -0.040000   
1   2024-01-18   云南  7.391245  8.382528  7.809826  7.192800 -0.042650   
2   2024-01-18  内蒙古  7.190000  8.110000  7.600000       NaN -0.040000   
3   2024-01-18   北京  7.370000  8.160000  7.660000  7.170000 -0.040000   
4   2024-01-18   吉林  7.240000  8.230000  7.630000       NaN -0.040000   
5   2024-01-18   四川  7.370000  8.290000  7.760000  7.200000 -0.040000   
6   2024-01-18   天津  7.320000  8.090000  7.660000  7.100000 -0.040000   
7   2024-01-18   宁夏  7.200000  7.990000  7.560000  7.140000 -0.050000   
8   2024-01-18   安徽  7.360000  8.160000  7.620000  7.140000 -0.040000   
9   2024-01-18   山东  7.230000  8.180000  7.630000  7.080000 -0.050000   
10  2024-01-18   山西  7.390000  8.220000  7.610000  7.130000 -0.040000   
11  2024-01-18   广东  7.330000  8.320000  7.680000  7.130000 -0.040000   
12  2024-01-18   广西  7.380000  8.340000  7.720000  

# Volatility

In [13]:
article_oman_rv_df = ak.article_oman_rv(symbol="FTSE", index="rk_th2")
print(article_oman_rv_df)

ConnectionError: HTTPSConnectionPool(host='realized.oxford-man.ox.ac.uk', port=443): Max retries exceeded with url: /theme/js/visualization-data.js?20191111113154 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x1255f20f0>: Failed to resolve 'realized.oxford-man.ox.ac.uk' ([Errno 8] nodename nor servname provided, or not known)"))

# Electricity

In [14]:
macro_china_society_electricity_df = ak.macro_china_society_electricity()
print(macro_china_society_electricity_df)

        统计时间       全社会用电量  全社会用电量同比     各行业用电量合计  各行业用电量合计同比    第一产业用电量  \
0    2003.12  188912117.0     15.29  166531685.0       15.77  5958327.0   
1    2004.10  175828690.0     15.17  156100929.0       16.03  4979893.0   
2    2004.11  194584023.0     15.13  172937132.0       15.87  5479353.0   
3     2004.3   48045510.0     15.70   42431023.0       16.41  1125688.0   
4     2004.9  157131146.0     14.92  139454527.0       15.71  4518047.0   
..       ...          ...       ...          ...         ...        ...   
210   2023.7  519650000.0      5.20          NaN         NaN  7160000.0   
211   2023.8  608260000.0      5.00          NaN         NaN  8590000.0   
212   2023.9  686370000.0      5.60          NaN         NaN  9760000.0   
213   2024.2  153160000.0     11.00          NaN         NaN  1920000.0   
214   2024.3  233730000.0      9.80          NaN         NaN  2880000.0   

     第一产业用电量同比      第二产业用电量  第二产业用电量同比      第三产业用电量  第三产业用电量同比  城乡居民生活用电量合计  \
0         0.95  1394

# Finally

In [None]:
# calculate and print outthe time taken to execute all the codes above
print(f"Time taken: {time.time() - t_start} seconds")