# Analytics

## Import

In [1]:
import os
import sys
import logging
import multiprocessing
import pandas as pd
import numpy as np
import sqlalchemy
import exchange_calendars as xcals
from dotenv import load_dotenv

# import exchange_calendars as xcals
from datetime import datetime, timedelta

# import pytz
# import pandas as pd
# from IPython.display import display, HTML
from sqlalchemy import create_engine
from sqlalchemy.dialects.postgresql import insert, TEXT
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache

from neuralprophet import NeuralProphet, set_log_level

# Disable logging messages unless there is an error
set_log_level("ERROR")

# from prophet.plot import (
#     plot_plotly,
#     plot_components_plotly,
#     add_changepoints_to_plot,
#     plot_yearly,
#     plot_seasonality_plotly,
# )

Importing plotly failed. Interactive plots will not work.
Importing plotly failed. Interactive plots will not work.


## Init

In [2]:
load_dotenv()  # take environment variables from .env.

module_path = os.getenv("LOCAL_AKSHARE_DEV_MODULE")
if module_path is not None and module_path not in sys.path:
    sys.path.insert(0, module_path)
import akshare as ak  # noqa: E402

print(ak.__version__)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

# Create an engine instance
alchemyEngine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}",
    pool_recycle=3600,
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)

file_handler = logging.FileHandler("etl.log")
console_handler = logging.StreamHandler()

# Step 4: Create a formatter
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")

# Step 5: Attach the formatter to the handlers
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Step 6: Add the handlers to the logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

xshg = xcals.get_calendar("XSHG")

1.12.93


## Helper Functions

In [3]:
def predict(
    symbol, y_column, country=None, fourier_terms="auto", show_uncertainty=False
) -> (NeuralProphet, pd.DataFrame, pd.DataFrame):  # type: ignore
    return predict_generic(
        "fund_etf_daily_em", symbol, y_column, country, fourier_terms, show_uncertainty
    )


def predict_generic(
    table,
    symbol,
    y_column,
    country=None,
    fourier_terms="auto",
    show_uncertainty=False,
) -> (NeuralProphet, pd.DataFrame, pd.DataFrame):  # type: ignore
    query = f"SELECT * FROM {table} where symbol = '{symbol}' order by date"
    df = pd.read_sql(query, alchemyEngine, parse_dates=["date"])

    df = df.rename(
        columns={
            "date": "ds",
            y_column: "y",
        }
    )

    m = NeuralProphet(
        mcmc_samples=300 if show_uncertainty else 0,
        daily_seasonality=False,
        weekly_seasonality=False,
        yearly_seasonality=fourier_terms,
    )  # Prophet object can only be fit once. Instantiate a new object.
    if country is not None:
        m.add_country_holidays(country_name=country)
    m.fit(df)

    future = m.make_future_dataframe(periods=60)
    forecast = m.predict(future)

    return (m, df, forecast)

## Asset 1: Bond

### Trial 1 - Bond IR Spread

#### load data from table

In [None]:
# load all records from `bond_metrics_em` table into dataframe
query = "SELECT * FROM bond_metrics_em where china_yield_2y <> 'NaN'"
df = pd.read_sql(query, alchemyEngine, parse_dates=["date"])

# Display the first few rows of the dataframe
df.head()

In [None]:
df.describe()

#### transform DF to Prophet schema

In [None]:
df = df.rename(
    columns={
        "date": "ds",
        "china_yield_spread_10y_2y": "y",
        "us_yield_spread_10y_2y": "cov",
    }
)
# df = df[["ds", "y", "cov"]]
# fig = df.plot(x="ds", y=["y", "cov"], figsize=(10, 6))
df = df[["ds", "y"]]
fig = df.plot(x="ds", y=["y"], figsize=(10, 6))

#### fitting

In [None]:
confidence_level = 0.9

boundaries = round((1 - confidence_level) / 2, 2)
# NeuralProphet only accepts quantiles value in between 0 and 1
quantiles = [boundaries, confidence_level + boundaries]

m = NeuralProphet(
    quantiles=quantiles,
    # Disable change trendpoints
    n_changepoints=50,
    # Disable seasonality components
    yearly_seasonality=True,
    weekly_seasonality=False,
    daily_seasonality=False,
    ar_layers=[32, 32, 32, 32],
    n_lags=10,
)

# m.add_lagged_regressor("cov", n_lags=3)
# m = m.add_country_holidays("CN")

train_df, test_df = m.split_df(df, freq="D", valid_p=1.0 / 16)
train_df, cal_df = m.split_df(train_df, freq="D", valid_p=1.0 / 16)

train_df.shape, cal_df.shape, test_df.shape

In [None]:
metrics = m.fit(train_df, freq="D")
metrics

In [None]:
test_metrics = m.test(test_df)
test_metrics

#### predicting

In [None]:
future = m.make_future_dataframe(test_df, n_historic_predictions=True, periods=365)
forecast = m.predict(future)

#### plotting

In [None]:
m.set_plotting_backend("plotly")
m.highlight_nth_step_ahead_of_each_forecast(1)
m.plot(forecast)

In [None]:
m.plot_components(
    forecast,
    components=[
        "trend",
        "seasonality",
        "autoregression",
        "uncertainty",
        #   "lagged_regressors"
    ],
)

In [None]:
m.plot_parameters(
    components=["trend", "seasonality", "autoregression", 
                # "lagged_regressors"
                ]
)

#### Conformal Predict

In [None]:
method = "cqr"  # naive / cqr. ref: https://neuralprophet.com/how-to-guides/feature-guides/uncertainty_quantification.html
alpha = 1 - confidence_level

cfm_forecast = m.conformal_predict(
    test_df,
    calibration_df=cal_df,
    alpha=alpha,
    method=method,
    plotting_backend="plotly-static",
    show_all_PI=True,
)

In [None]:
cfm_forecast

In [None]:
# naive_qhat =   naive_forecast.iloc[-1]["yhat1"] - naive_forecast.iloc[-1]["yhat1 - qhat1"]
# naive_qhat

In [None]:
m.highlight_nth_step_ahead_of_each_forecast(1).plot(
    cfm_forecast, plotting_backend="plotly"
)

### 城投债ETF 511220

In [None]:
symbol = "511220"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
symbol = "511220"
m, df, forecast = predict(symbol, "change_rate")
plot_components_plotly(m, forecast)

### 30年国债ETF 511090

In [None]:
symbol = "511090"
m, df, forecast = predict(symbol, 'close')

In [None]:
fig1 = m.plot(forecast)

In [None]:
fig2 = m.plot_components(forecast)

## Asset 2: Domestic Stock

### 创业板50ETF 159949

In [None]:
symbol = "159949"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
symbol = "159949"
m, df, forecast = predict(symbol, "change_rate")
plot_components_plotly(m, forecast)

### 红利低波50ETF 515450

In [None]:
symbol = "515450"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict(symbol, "change_rate")
plot_seasonality_plotly(m=m, name="yearly", figsize=(1200, 400))

In [None]:
# plot_components_plotly(m, forecast)
m, df, forecast = predict(symbol, "change_rate", country="China")
plot_components_plotly(m, forecast)
# plot_seasonality_plotly(m=m, name="yearly", figsize=(1200, 400))

### 红利低波100ETF 515100

In [None]:
symbol = "515100"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict(symbol, "change_rate")
# plot_components_plotly(m, forecast)
plot_seasonality_plotly(m=m, name="yearly", figsize=(1200, 400))

In [None]:
m.predictive_samples(forecast)

## Asset 3: Commodities

### 黄金ETF 518880

In [None]:
symbol = "518880"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict(symbol, "change_rate")
plot_components_plotly(m, forecast)

### 大宗商品ETF 510170

In [None]:
symbol = "510170"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
symbol = "510170"
m, df, forecast = predict(symbol, "change_rate")
plot_components_plotly(m, forecast)

### 能源ETF 159930

In [None]:
symbol = "159930"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict(symbol, "change_rate")
plot_components_plotly(m, forecast)

### 能源化工ETF 159981

In [None]:
symbol = "159981"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict(symbol, "change_rate")
plot_components_plotly(m, forecast)

### 豆粕 159985

In [None]:
symbol = "159985"
m, df, forecast = predict(symbol, "close")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict(symbol, "change_rate")
plot_components_plotly(m, forecast)

## Asset 4: Overseas

### 纳斯达克100 513110

In [None]:
symbol = ".IXIC"
table = "us_index_daily_sina_view"
# we need to use the US index instead of ETF daily historical data per se, due to insufficient ETF market data
m, df, forecast = predict_generic(table, symbol, "close", "US")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict_generic(table, symbol, "change_rate", "US")
plot_components_plotly(m, forecast)

### 标普500ETF 513500

In [None]:
symbol = "513500"
m, df, forecast = predict(symbol, "close", "US")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
symbol = "513500"
m, df, forecast = predict(symbol, "change_rate", "US")
plot_components_plotly(m, forecast)

### 日本东证指数ETF 513800

In [None]:
symbol = "513800"
m, df, forecast = predict(symbol, "close", "JP")
df.describe()

In [None]:
plot_plotly(m, forecast)

In [None]:
m, df, forecast = predict(symbol, "change_rate", "JP")
plot_components_plotly(m, forecast)

# Output top-N TS component graph with highest Sortino Ratio

In [None]:
top_n = 100
since_inception = "6 months"
query = f"""
    SELECT fundcode, fundname, sortinoratio sortino 
    FROM fund_etf_perf_em 
    where 
        sortinoratio is not null 
        and sortinoratio <> 'nan'
        and inceptiondate <= CURRENT_DATE - interval '{since_inception}'
    order by sortinoratio desc limit {top_n}
"""
df = pd.read_sql(query, alchemyEngine)

# Ensure the directory for saving images exists
output_dir = '/Users/jx/Downloads/ETF_forecast'
os.makedirs(output_dir, exist_ok=True)

for index, row in df.iterrows():
    fundcode = row['fundcode']
    fundname = row['fundname'].replace(' ', '_')  # Replace spaces with underscores for filename
    sortino = row['sortino']
    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')

    # Predict and forecast 'close'
    m, df, forecast = predict(fundcode, 'close')
    fig1 = m.plot(forecast)
    fig1.savefig(f'{output_dir}/{fundcode}_{fundname}_{sortino}_forecast_{timestamp}.png')

    # Predict and forecast 'change_rate'
    m, df, forecast = predict(fundcode, 'change_rate')
    fig2 = m.plot_components(forecast)
    fig2.savefig(f'{output_dir}/{fundcode}_{fundname}_{sortino}_components_{timestamp}.png')