In [1]:
import warnings
import sys
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.dates as dates
import matplotlib.pyplot as plt
import datetime
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)
sys.path.append("C:/dev/pycmqlib3/")
sys.path.append("C:/dev/pycmqlib3/misc_scripts/")
warnings.filterwarnings('ignore')
import misc
import data_handler as dh
import plotly_examples
import backtest
import alphalens as al
%matplotlib inline

In [2]:
ferrous_products_mkts = ['rb', 'hc', 'i', 'j', 'jm']
ferrous_mixed_mkts = ['ru', 'FG', 'ZC', 'SM', "SF"] # 'UR', 
base_metal_mkts = ['cu', 'al', 'zn', 'pb', 'ni', 'sn'] # , 'ss'
precious_metal_mkts = ['au', 'ag']
ind_metal_mkts = ferrous_products_mkts + ferrous_mixed_mkts + base_metal_mkts
petro_chem_mkts = ['l', 'pp', 'v', 'TA', 'MA', 'bu', 'sc', 'fu', ] # 'pg', 'eb', 'eg', 'lu'  
ind_all_mkts = ind_metal_mkts + petro_chem_mkts
ags_oil_mkts = ['m', 'RM', 'y', 'p', 'OI', 'a', 'c', 'cs'] #, 'b']
ags_soft_mkts = ['CF', 'SR', 'jd', 'AP'] # , 'sp', 'rr', 'SA' , 'CJ'
ags_all_mkts = ags_oil_mkts + ags_soft_mkts
eq_fut_mkts = ['IF', 'IH', 'IC']
bond_fut_mkts = ['T', 'TF'] # 'TS'
fin_all_mkts = eq_fut_mkts + bond_fut_mkts
commod_all_mkts = ind_all_mkts + ags_all_mkts + precious_metal_mkts
all_markets = commod_all_mkts + fin_all_mkts


In [5]:
field_list = ['open', 'high', 'low', 'close', 'volume', 'openInterest', 'contract', 'shift']

start_date = datetime.date(2014,7,1)
end_date = datetime.date(2020,7,10)

sim_products = all_markets
need_shift = 2
freq = 'd'
delim = '-'

args = {'n': 1, 'roll_rule': '-35b', 'freq': freq, 'need_shift': need_shift}
ferrous_products_args = args
ferro_mixed_mkt_args = args
base_args = {'n': 1, 'roll_rule': '-30b', 'freq': freq, 'need_shift': need_shift}
eq_args = {'n': 1, 'roll_rule': '-1b', 'freq': freq, 'need_shift': need_shift}
bond_args = {'n': 1, 'roll_rule': '-30b', 'freq': freq, 'need_shift': need_shift}
precious_args = {'n': 1, 'roll_rule': '-25b', 'freq': freq, 'need_shift': need_shift}

df_list = []
for idx, asset in enumerate(sim_products):
    use_args = args
    if asset in eq_fut_mkts:
        use_args = eq_args
    elif asset in ['cu', 'al', 'zn', 'pb', 'sn']:
        use_args = base_args
    elif asset in bond_fut_mkts:
        use_args = bond_args
    elif asset in precious_metal_mkts:
        use_args = precious_args
    use_args['start_date'] = max(backtest.sim_start_dict[asset], start_date)
    use_args['end_date'] = end_date
    print("loading mkt = %s, args = %s" % (asset, use_args))
    df = misc.nearby(asset, **use_args)
    if freq == 'm':
        df = misc.cleanup_mindata(df, asset)
    if (idx == 0) and (freq == 'm'):
        df_list.append(df[['date', 'min_id']])    
    xdf = df[field_list]
    xdf.columns = [(asset, col) for col in xdf.columns]
    df_list.append(xdf)

all_df = pd.concat(df_list, axis = 1, sort = False)

loading mkt = rb, args = {'n': 1, 'roll_rule': '-35b', 'freq': 'd', 'need_shift': 2, 'start_date': datetime.date(2014, 7, 1), 'end_date': datetime.date(2020, 7, 10)}
loading mkt = hc, args = {'n': 1, 'roll_rule': '-35b', 'freq': 'd', 'need_shift': 2, 'start_date': datetime.date(2014, 7, 1), 'end_date': datetime.date(2020, 7, 10)}
loading mkt = i, args = {'n': 1, 'roll_rule': '-35b', 'freq': 'd', 'need_shift': 2, 'start_date': datetime.date(2014, 7, 1), 'end_date': datetime.date(2020, 7, 10)}
loading mkt = j, args = {'n': 1, 'roll_rule': '-35b', 'freq': 'd', 'need_shift': 2, 'start_date': datetime.date(2014, 7, 1), 'end_date': datetime.date(2020, 7, 10)}
loading mkt = jm, args = {'n': 1, 'roll_rule': '-35b', 'freq': 'd', 'need_shift': 2, 'start_date': datetime.date(2014, 7, 1), 'end_date': datetime.date(2020, 7, 10)}
loading mkt = ru, args = {'n': 1, 'roll_rule': '-35b', 'freq': 'd', 'need_shift': 2, 'start_date': datetime.date(2014, 7, 1), 'end_date': datetime.date(2020, 7, 10)}
loadin

In [12]:
product_list = all_markets
if freq == 'm':
    prod_fields = ['date', 'min_id'] + [(asset, field) for field in field_list for asset in product_list]
    df = all_df[prod_fields].copy()

    bar_freq = '85m'

    split_dict = {'s1': [300, 2100],
                 's2': [300, 1500, 2100],
                 's3': [300, 1500, 1930, 2100],
                 's4': [300, 1500, 1630, 1930, 2100],}
    if 's' in bar_freq:
        df = dh.day_split1(df, split_dict[bar_freq])
    else:
        df = dh.conv_ohlc_freq1(df, bar_freq) 
else:
    prod_fields = [(asset, field) for field in field_list for asset in product_list]
    df = all_df[prod_fields].copy()
for asset in product_list:
    df[(asset, 'lr')] = np.log(df[(asset, 'close')].astype('float'))\
                          -np.log(df[(asset, 'close')].shift(1))  
log_ret = df[[(asset, 'lr') for asset in product_list]].copy()
print(log_ret[-5:])

            (rb, lr)  (hc, lr)   (i, lr)   (j, lr)  (jm, lr)  (ru, lr)  \
2020-07-06 -0.000276 -0.000277  0.006014 -0.001872 -0.001679  0.011851   
2020-07-07  0.004413  0.006635  0.021747  0.005073 -0.003366 -0.007094   
2020-07-08  0.017999  0.018835  0.027011  0.016900  0.008393  0.010857   
2020-07-09  0.006734  0.005394  0.001268 -0.001310  0.017810  0.004216   
2020-07-10 -0.009440 -0.005664  0.002532 -0.016123 -0.017392 -0.018877   

            (FG, lr)  (ZC, lr)  (SM, lr)  (SF, lr)  ...  (SR, lr)  (jd, lr)  \
2020-07-06  0.008687 -0.011408  0.017737  0.017410  ...  0.005456  0.004063   
2020-07-07  0.015842  0.003579 -0.013734 -0.004069  ... -0.000778  0.010086   
2020-07-08  0.001963 -0.001430  0.020381 -0.012652  ...  0.021167 -0.000753   
2020-07-09  0.008461 -0.004303 -0.001507 -0.006906  ... -0.001524 -0.021572   
2020-07-10 -0.000648  0.000718  0.005114 -0.010449  ... -0.006313  0.001026   

            (AP, lr)  (au, lr)  (ag, lr)  (IF, lr)  (IH, lr)  (IC, lr)  \
2020-0

In [31]:
start_date = datetime.date(2017,1,1)
end_date = datetime.date(2020,7,10)
log_ret = df[[(asset, 'lr') for asset in product_list]].copy()
log_ret.columns = product_list
selected_ret = log_ret[(log_ret.index >= start_date) & (log_ret.index <= end_date)].copy()

stats = ['mean', 'median', 'std', 'skew', 'kurt']
stats_df = pd.DataFrame(0, index = product_list, columns = stats)

for asset in product_list:
    for func in stats:        
        stats_df[func].loc[asset] = getattr(selected_ret[asset], func)()

print(stats_df)

print(selected_ret.corr())

    
    
    

        mean    median       std      skew       kurt
rb  0.001043  0.001081  0.015525 -0.364604   2.912296
hc  0.000705  0.000815  0.014967 -0.451322   3.207996
i   0.001281  0.001104  0.021319 -0.075854   0.958822
j   0.000767  0.001002  0.020495 -0.286044   1.677116
jm  0.000602  0.000973  0.018809 -0.261771   2.617953
ru -0.001266 -0.000426  0.017590 -0.628481   3.248586
FG  0.000568  0.000688  0.012017 -0.051896   4.182972
ZC  0.000590  0.000158  0.011435  0.061404   1.692298
SM  0.000457  0.000000  0.014718  0.050441   3.167991
SF  0.000342  0.000390  0.013535  0.075230   4.004065
cu  0.000056  0.000210  0.009953 -0.899055   6.893127
al  0.000041  0.000323  0.009127 -0.242621   4.893156
zn  0.000126  0.000000  0.012603 -0.170487   1.374773
pb  0.000120  0.000273  0.011540 -0.310654   3.084594
ni  0.000210  0.001159  0.015481  0.027521   1.636824
sn -0.000137  0.000000  0.010522 -0.526983  10.094866
l  -0.000208  0.000000  0.011524 -0.046126   4.034986
pp  0.000036  0.000108  0.01