In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import qlib
from qlib.data.dataset import DataHandler
from qlib.data import D
import numpy as np

qlib.init()

[58817:MainThread](2023-02-12 19:09:19,606) INFO - qlib.Initialization - [config.py:416] - default_conf: client.
[58817:MainThread](2023-02-12 19:09:19,799) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[58817:MainThread](2023-02-12 19:09:19,800) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/Users/weiyuhui/.qlib/qlib_data/cn_data')}


In [2]:
test_dates = ['2019-01-02', '2022-06-30']
val_dates = ['2017-07-01', '2018-12-31']
train_dates = ['2010-01-01', '2016-12-31']
feature_map = {
    "$close": "close",
    "$open": "open",
    "$factor": "factor",
    "Ref($close,1)": "prev_close",
    "$close/$factor": "real_price",
}
codes = D.list_instruments(
    D.instruments('csi500'), start_time=test_dates[0], end_time=test_dates[-1], as_list=True
)
# Need all the prev data to avoid suspended code in the market during the time window
df = D.features(codes, list(feature_map.keys()), freq="day", end_time=test_dates[-1])
df.rename(feature_map, axis=1, inplace=True)
price = df

In [3]:
def get_price(instrument, date):
    return price.loc[instrument, date]


def get_volatility(df, history=30):
    return np.std(df['$close'][-30:])
    

def concat_df(dfs, df_names):
    dfs_new = []
    for df, name in zip(dfs, df_names):
        df['strategy'] = name
        dfs_new.append(df)
    dfs_new = pd.concat(dfs_new)
    return dfs_new

def get_benchmark(start_date, end_date):
    df = D.features(['SH000905'], fields=list(feature_map.keys()), freq="day", start_time=start_date, end_time=end_date)
    df = df.droplevel(0)
    df['nav'] = df['$close'] / df['$close'][0] * 1000000
    return df

def add_trading_days(df):
    acts = [a for a in df.columns if '_A' in a]
    act_val = df[acts].values
    act_val = np.nan_to_num(act_val)
    trad_days = np.all(act_val == np.roll(act_val, shift=-1, axis=0), axis=1).astype(int)
    df['has trades'] = trad_days
    return df

def turnover(df):
    pos = [p for p in df.columns if '_P' in p]
    df_pos = df[pos].fillna(0).values
    df_pos_1 = np.roll(df_pos, 1, axis=0)
    df_diff = df_pos - df_pos_1
    df_diff[0, :] = 0.
    df_diff_neg = np.sum(np.where(df_diff < 0, -df_diff, 0), axis=1)
    df_diff_pos = np.sum(np.where(df_diff > 0, df_diff, 0), axis=1)
    df['turnover'] = (df_diff_pos)
    df['cum_turnover'] = df['turnover'].cumsum()
    return df

In [126]:
b = get_benchmark(train_dates[0], val_dates[1])
b['volatility_30'] = b['$close'].rolling(30).std()
b['volatility_10'] = b['$close'].rolling(10).std()
fig = px.line(b, x=b.index, y='volatility_10')
fig.show()

In [5]:
for i in [0.3, 0.2, 0.15, 0.12, 0.1]:
    vol_th = len(b[b['volatility_10'] > i])
    rate = vol_th / len(b)
    print('threshold {}: transaction days {}/{} ({}%)'.format(i, vol_th, len(b), rate))         

threshold 0.3: transaction days 90/2187 (0.0411522633744856%)
threshold 0.2: transaction days 216/2187 (0.09876543209876543%)
threshold 0.15: transaction days 352/2187 (0.16095107453132146%)
threshold 0.12: transaction days 520/2187 (0.23776863283036123%)
threshold 0.1: transaction days 724/2187 (0.3310470964791952%)


In [79]:
df_names = [
    'sa25_vol10th25_top10',
    'sa25_vol10th20_top10',
    'sa25_vol10th15_top10',
    'sa25_wc_sl', 'sa25_evalsa90_wc_sl_asp_top10',
    'sa25_vol5_th10', 'sa25_vol5_th15', 'sa25_vol5_th18', 'sa25_vol5_th20',
]

dfs = [pd.read_csv('records/' + name + '.csv', index_col=0) for name in df_names]
dfs = [turnover(df) for df in dfs]
dfs.append(get_benchmark(test_dates[0], test_dates[1]))
df_vol = concat_df(dfs, df_names + ['SH000905'])
names = df_names + ['SH000905']
temp_df = df_vol.loc[df_vol['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='nav', color='strategy')
fig.show()

# volN = compute volatility on a window of length N
# thM = perform transactions only when vol([t-N, t]) > 0.M

'''
Volatility threshold logic computed with the following code
avg_vol_w = self._ds.volatility(date, self.vol_window)
if avg_vol_w < self.vol_threshold:
    return portfolio
return super().take_step(date, action, portfolio)
'''

In [28]:
names = ['sa25_wc_sl','sa25_evalsa90_wc_sl_asp_top10',
         'sa25_vol10th25_top10', 'sa25_vol10th20_top10', 'sa25_vol10th15_top10']
temp_df = df_vol.loc[df_vol['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='cum_turnover', color='strategy',
              labels={
                     "cum_turnover": "cum. turnover (%) - cum. positions bought",},
             )
fig.show()

In [125]:
import plotly.graph_objects as go

df_names = [
    'vw5_vt15',
    'vw5_vt18',
    'vw5_vt21',
    'vw5_vt24',
    #'vw5_vt20_top5',
    #'sa25_wc_sl', 'sa25_evalsa90_wc_sl_asp_top10',
]

def add_trading_days(df):
    acts = [a for a in df.columns if '_A' in a]
    act_val = df[acts].values
    act_val = np.nan_to_num(act_val)
    no_days = np.all(act_val == np.roll(act_val, shift=-1, axis=0), axis=1).astype(int)
    trad_days = (~no_days.astype(bool)).astype(int)
    df['has trades'] = trad_days
    return df

dfs = [pd.read_csv('records/' + name + '.csv', index_col=0) for name in df_names]
dfs = [turnover(df) for df in dfs]
dfs.append(get_benchmark(test_dates[0], test_dates[1]))
df_vol = concat_df(dfs, df_names + ['SH000905'])
add_trading_days(df_vol)
df_vol['has trades'] *= df_vol['nav']
bm = df_vol[df_vol['strategy'] == 'SH000905'][:-1]

for name in df_names:
    temp_df = df_vol[df_vol['strategy'] == name][:-1]
    vol_window = int(name.split('vw')[-1].split('_')[0])
    vol_th = float('.' + name.split('vt')[-1].split('_')[0])

    b = get_benchmark(test_dates[0], test_dates[1])
    b['volatility'] = b['$close'].rolling(vol_window).std()
    b['vol th'] = (vol_th-np.min(b['volatility']))/(np.max(b['volatility'])-np.min(b['volatility'])) * np.max(temp_df['nav'])
    b['volatility'] = (b['volatility']-np.min(b['volatility']))/(np.max(b['volatility'])-np.min(b['volatility']))
    b['volatility'] *= np.max(temp_df['nav'])

    # plotly setup
    fig=go.Figure(go.Scatter(y=temp_df['nav'], x=temp_df.index,
                             mode='lines', name="Nav"))
    fig.add_traces(go.Scatter(y=bm['nav'], x=temp_df.index,
                             mode='lines', name="Benchmark nav"))
    fig.add_traces(go.Scatter(y=b['volatility'], x=temp_df.index,
                             mode='lines', name="Volatility (scaled)"))
    fig.add_traces(go.Scatter(y=b['vol th'], x=temp_df.index,
                             mode='lines', name="Threshold (scaled)"))
    fig.add_traces(go.Scatter(y=temp_df['has trades'], x=temp_df.index,
                              mode='markers', marker=dict(color='black', size=8), name="Trading days"))
    fig.update_layout(
        title=name,
        xaxis_title="Dates",
    )
    fig.show()

'''
Volatility threshold logic computed with the following code
avg_vol_w = self._ds.volatility(date, self.vol_window)
if avg_vol_w < self.vol_threshold:
    if self.prev_action is not None:
        action = self.prev_action
self.prev_action = action
return super().take_step(date, action, portfolio)
'''

'\nVolatility threshold logic computed with the following code\navg_vol_w = self._ds.volatility(date, self.vol_window)\nif avg_vol_w < self.vol_threshold:\n    if self.prev_action is not None:\n        action = self.prev_action\nself.prev_action = action\nreturn super().take_step(date, action, portfolio)\n'

In [128]:
temp_df = df_vol.loc[df_vol['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='cum_turnover', color='strategy',
              labels={
                     "cum_turnover": "cum. turnover (%) - cum. positions bought",},
             )
fig.show()