In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import qlib
from qlib.data.dataset import DataHandler
from qlib.data import D
import numpy as np

qlib.init()

[99064:MainThread](2023-02-13 09:13:48,220) INFO - qlib.Initialization - [config.py:416] - default_conf: client.
[99064:MainThread](2023-02-13 09:13:48,536) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[99064:MainThread](2023-02-13 09:13:48,537) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/Users/weiyuhui/.qlib/qlib_data/cn_data')}


In [2]:
test_dates = ['2019-01-02', '2022-06-30']
val_dates = ['2017-07-01', '2018-12-31']
train_dates = ['2010-01-01', '2016-12-31']
feature_map = {
    "$close": "close",
    "$open": "open",
    "$factor": "factor",
    "Ref($close,1)": "prev_close",
    "$close/$factor": "real_price",
}
codes = D.list_instruments(
    D.instruments('csi500'), start_time=test_dates[0], end_time=test_dates[-1], as_list=True
)
# Need all the prev data to avoid suspended code in the market during the time window
df = D.features(codes, list(feature_map.keys()), freq="day", end_time=test_dates[-1])
df.rename(feature_map, axis=1, inplace=True)
price = df

In [3]:
def get_price(instrument, date):
    return price.loc[instrument, date]
    

def concat_df(dfs, df_names):
    dfs_new = []
    for df, name in zip(dfs, df_names):
        df['strategy'] = name
        dfs_new.append(df)
    dfs_new = pd.concat(dfs_new)
    return dfs_new

def get_benchmark(start_date, end_date):
    df = D.features(['SH000905'], fields=list(feature_map.keys()), freq="day", start_time=start_date, end_time=end_date)
    df = df.droplevel(0)
    df['nav'] = df['$close'] / df['$close'][0] * 1000000
    return df

def add_trading_days(df):
    acts = [a for a in df.columns if '_A' in a]
    act_val = df[acts].values
    act_val = np.nan_to_num(act_val)
    trad_days = np.all(act_val == np.roll(act_val, shift=-1, axis=0), axis=1).astype(int)
    df['has trades'] = trad_days
    return df

def turnover(df):
    pos = [p for p in df.columns if '_P' in p]
    df_pos = df[pos].fillna(0).values
    df_pos_1 = np.roll(df_pos, 1, axis=0)
    df_diff = df_pos - df_pos_1
    df_diff[0, :] = 0.
    df_diff_neg = np.sum(np.where(df_diff < 0, -df_diff, 0), axis=1)
    df_diff_pos = np.sum(np.where(df_diff > 0, df_diff, 0), axis=1)
    df['turnover'] = (df_diff_pos)
    df['cum_turnover'] = df['turnover'].cumsum()
    return df

In [18]:
df_names = [
    #'sa25_nc_sl0', 'sa25_nc_sl',
    'sa25_wc_sl', #'sa25_wc_sl0',
    #'sa50_nc_sl0', 'sa50_nc_sl',
    'sa50_wc_sl', #'sa50_wc_sl0',
    #'sa75_nc_sl0', 'sa75_nc_sl',
    'sa75_wc_sl', 'sa75_wc_sl0',
    'sa25_evalsa80_wc_sl', 'sa25_evalsa90_wc_sl', 'sa25_evalsa90_wc_sl_asp',
    'sa25_evalsa100_wc_sl', 'sa25_evalsa90_wc_sl_asp_top10', 'sa25_evalsa90_wc_sl_asp_top20',
    #'sa25_wc_sl_buy_new', 'sa25_evalsa90_wc_sl_asp_top10_buy_new',
    'sa25_evalsa90_wc_sl_asp_top10_ss50', 'sa25_evalsa90_wc_sl_asp_top10_ss50_s3', 'sa25_evalsa90_wc_sl_asp_top10_ss50_s6',
        ]

dfs_wc = [pd.read_csv('records/sticky_actions/' + name + '.csv', index_col=0) for name in df_names if 'wc' in name]
dfs_wc = [turnover(df) for df in dfs_wc]
dfs = [pd.read_csv('records/sticky_actions/' + name + '.csv', index_col=0) for name in df_names]
dfs = [turnover(df) for df in dfs]
dfs.append(get_benchmark(test_dates[0], test_dates[1]))
df = concat_df(dfs, df_names + ['SH000905'])
df_no_bench = concat_df(dfs[:-1], df_names)
df_wc = concat_df(dfs_wc, [name for name in df_names if 'wc' in name])

In [19]:
names = ['sa25_wc_sl', 'SH000905', 'sa25_evalsa80_wc_sl', 'sa25_evalsa90_wc_sl', 'sa25_evalsa90_wc_sl_asp',
         'sa25_evalsa100_wc_sl', 'sa25_evalsa90_wc_sl_asp_top10', 'sa25_evalsa90_wc_sl_asp_top20',
        'sa50_wc_sl', 'sa75_wc_sl']
temp_df = df.loc[df['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='nav', color='strategy')
fig.show()

In [20]:
names = ['sa25_wc_sl', 'SH000905',
        'sa50_wc_sl', 'sa75_wc_sl']
temp_df = df.loc[df['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='nav', color='strategy',
              title="Sticky actions only in training")
fig.show()

In [7]:
names = ['sa25_wc_sl', 'sa25_evalsa90_wc_sl_asp_top10', 'SH000905',
         'sa25_evalsa90_wc_sl_asp_top10_ss50', 'sa25_evalsa90_wc_sl_asp_top10_ss50_s3', 'sa25_evalsa90_wc_sl_asp_top10_ss50_s6',]
temp_df = df.loc[df['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='nav', color='strategy',
             title="90% sticky actions in eval and topk=10 + sample 50 stocks")
fig.show()

In [8]:
fig = px.line(df_no_bench, x=df_no_bench.index, y='turnover', color='strategy',
              labels={
                     "turnover": "turnover (%) - n. positions bought per day",},
             )
fig.show()

In [9]:
names = ['sa25_wc_sl', 'sa25_evalsa80_wc_sl', 'sa25_evalsa90_wc_sl', 'sa25_evalsa90_wc_sl_asp',
         'sa25_evalsa100_wc_sl', 'sa25_evalsa90_wc_sl_asp_top10', 'sa25_evalsa90_wc_sl_asp_top20',
        'sa50_wc_sl', 'sa75_wc_sl']
temp_df = df.loc[df['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='cum_turnover', color='strategy',
              labels={
                     "cum_turnover": "cum. turnover (%) - cum. positions bought",},
             )
fig.show()

In [34]:
temp_df = df_vol.loc[df_vol['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='cum_turnover', color='strategy',
              labels={
                     "cum_turnover": "cum. turnover (%) - cum. positions bought",},
             )
fig.show()

In [12]:
df_names_train = [
    'train_sa25_wc_sl',
    'train_sa25_wc_sl_top10',
    'train_sa25_wc_sl_evalsa90',
    'train_sa25_wc_sl_evalsa90_top10',
    'train_sa75_wc_sl',
    'train_sa75_wc_sl_top10',
    'train_sa75_wc_sl_evalsa90',
    'train_sa75_wc_sl_evalsa90_top10',
]

dfs = [pd.read_csv('records/train/' + name + '.csv', index_col=0) for name in df_names_train]
dfs = [turnover(df) for df in dfs]
dfs.append(get_benchmark(train_dates[0], train_dates[1]))
df = concat_df(dfs, df_names_train + ['SH000905'])

In [13]:
names = df_names_train + ['SH000905']
temp_df = df.loc[df['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='nav', color='strategy')
fig.show()

In [14]:
df_names_val = [
    'val_sa25_wc_sl',
    'val_sa25_wc_sl_top10',
    'val_sa25_wc_sl_evalsa90',
    'val_sa25_wc_sl_evalsa90_top10',
    'val_sa75_wc_sl',
    'val_sa75_wc_sl_top10',
    'val_sa75_wc_sl_evalsa90',
    'val_sa75_wc_sl_evalsa90_top10',
]
dfs = [pd.read_csv('records/val/' + name + '.csv', index_col=0) for name in df_names_val]
dfs = [turnover(df) for df in dfs]
dfs.append(get_benchmark(val_dates[0], val_dates[1]))
df = concat_df(dfs, df_names_val + ['SH000905'])

In [15]:
names = df_names_val + ['SH000905']
temp_df = df.loc[df['strategy'].isin(names)]
fig = px.line(temp_df, x=temp_df.index, y='nav', color='strategy')
fig.show()