In [1]:
# import libraries
import os
from os.path import isfile, isdir, join
import numpy as np
import pandas as pd
from datetime import datetime, date
from dateutil.relativedelta import relativedelta
from bs4 import BeautifulSoup
import re
from IPython.display import display
from zipfile import ZipFile
import pickle
import unicodedata
import pytz
from joblib import Parallel, delayed
import shutil
import difflib
import random
import math
from shutil import copyfile
import itertools

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import QuantileTransformer, MinMaxScaler, StandardScaler

import matplotlib as mpl
from matplotlib import pyplot as plt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

!pip install cvxpy
import cvxpy as cp

import warnings
warnings.filterwarnings("ignore")

Collecting cvxpy
  Downloading cvxpy-1.2.0-cp37-cp37m-manylinux_2_24_x86_64.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 4.2 MB/s 
Collecting scs>=1.1.6
  Downloading scs-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.7 MB)
[K     |████████████████████████████████| 10.7 MB 48.1 MB/s 
Collecting osqp>=0.4.1
  Downloading osqp-0.6.2.post5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (295 kB)
[K     |████████████████████████████████| 295 kB 68.1 MB/s 
[?25hCollecting qdldl
  Downloading qdldl-0.1.5.post2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 69.4 MB/s 
[?25hInstalling collected packages: qdldl, scs, osqp, cvxpy
Successfully installed cvxpy-1.2.0 osqp-0.6.2.post5 qdldl-0.1.5.post2 scs-3.2.0
(CVXPY) May 15 05:36:38 AM: Encountered unexpected exception importing solver GLOP:
AttributeError("module 'ortools'

In [2]:
# log
def log(msg):
    now = datetime.strftime(datetime.now(tz=pytz.timezone('Hongkong')), '%Y-%m-%d %H:%M:%S')
    print(f'[{now}] {msg}')
    
# pickle
def save_pkl(obj, filename):
    pickle.dump(obj, open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
    return
def load_pkl(filename):
    return pickle.load(open(filename, 'rb'))

def get_size(path='.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    return total_size

def dl_txt(full_path):
    copyfile(full_path, full_path.split('/')[-1])
    print(f"Copied {full_path.split('/')[-1]}")
    return

def clear_output():
    for file in os.listdir():
        os.remove(file)
    return

fig_num = 0
def new_plot():
    global fig_num
    fig_num += 1
    plt.figure(fig_num)
    
import signal as signal_
class TimeoutException(Exception):   # Custom exception class
    pass
def timeout_handler(signum, frame):   # Custom signal handler
    raise TimeoutException
signal_.signal(signal_.SIGALRM, timeout_handler)

# function to remove any rows/columns with all NaN
def df_drop_na(df):
    df = df.loc[lambda x: x.notnull().sum(axis=1) > 0]
    df = df[df.notnull().sum(axis=0).loc[lambda x: x>0].index.tolist()]
    return df

def align_index(dfs):
    for i in range(len(dfs)):
        if i==0:
            idx, col = dfs[i].index, dfs[i].columns
        else:
            idx, col = idx & dfs[i].index, col & dfs[i].columns
    idx, col = idx.sort_values().tolist(), col.sort_values().tolist()
    new_dfs = tuple([df.reindex(index=idx, columns=col) for df in list(dfs)])
    return new_dfs

In [3]:
# params
params = dict()
params['bt_start_date'] = '2014-01-01' # bt - back testing
params['bt_end_date'] = '2018-03-27'
params['seed'] = 0
horizons = {'6m':21*6, '9m':21*9, '12m':21*12}

In [4]:
# load returns
ret = pd.read_csv('../input/hkml-download-returns/ret.csv').assign(date = lambda x: pd.to_datetime(x.date)).set_index('date')
exret = pd.read_csv('../input/hkml-download-returns/exret.csv').assign(date = lambda x: pd.to_datetime(x.date)).set_index('date')

# filter returns to testing period
exret = exret.loc[lambda x: (x.index>=params['bt_start_date']) & (x.index<=params['bt_end_date'])]
exret = df_drop_na(exret)
ret, exret = align_index((ret, exret))
log(f'Shape of ret: {ret.shape}')
log(f'Shape of exret: {exret.shape}')

# load 10-K signals
feats_10k = pd.merge(pd.read_csv('../input/hkml-signal-extraction-10k-cpu/feats.csv'),
                 pd.read_csv('../input/hkml-signal-extraction-gpu/feats.csv'),
                 how='inner', on=['doc_id','cik','entity','filing_date','stock']) \
    .rename(columns={'filing_date':'date'}) \
    .assign(date = lambda x: pd.to_datetime(x.date),
            cik = lambda x: x.cik.astype(str).str.zfill(10)) \
    .replace([np.inf, -np.inf], np.nan)
feat_names = [c for c in feats_10k.columns if 'feat' in c]
feats_10k = feats_10k.rename(columns={c:c+'_10k' for c in feat_names})
feats_10k = feats_10k.loc[:,['stock','date'] + [c for c in feats_10k.columns if 'feat' in c]]
log(f'Shape of 10-K feats: {feats_10k.shape}')

# load 10-Q signals
feats_10q = pd.read_csv('../input/hkml-signal-extraction-10q-cpu/feats.csv') \
    .rename(columns={'filing_date':'date'}) \
    .assign(date = lambda x: pd.to_datetime(x.date),
            cik = lambda x: x.cik.astype(str).str.zfill(10)) \
    .replace([np.inf, -np.inf])
feat_names = [c for c in feats_10q.columns if 'feat' in c]
feats_10q = feats_10q.rename(columns={c:c+'_10q' for c in feat_names})
feats_10q = feats_10q.loc[:,['stock','date'] + [c for c in feats_10q.columns if 'feat' in c]]
log(f'Shape of 10-Q feats: {feats_10q.shape}')

# load 8-k signal
feats_8k = load_pkl('../input/hkml-signal-extraction-pre/feats_8k')
log(f'Shape of 8-K feats: {feats_8k.shape}')

# load LTR signal
feats_lgbm_ltr = load_pkl(f'../input/hkml-lightgbm-ltr/pred_val_out').rename(columns={0:'feat_lgbm_ltr_12m'})
log(f'Shape of LGBM Learning-to-Rank feats: {feats_lgbm_ltr.shape}')

# load LTR signal
feats_lgbm_binary_clf = load_pkl(f'../input/hkml-lightgbm-binary-clf/pred_prob_test_out')
log(f'Shape of LGBM binary classifier feats: {feats_lgbm_binary_clf.shape}')

# combine all signals into single df
feats = feats_10k \
    .merge(feats_10q, how='outer', on=['stock','date']) \
    .merge(feats_8k, how='outer', on=['stock','date']) \
    .merge(feats_lgbm_ltr, how='outer', on=['stock','date']) \
    .merge(feats_lgbm_binary_clf, how='outer', on=['stock','date']) \
    .sort_values(['stock','date']) \
    .groupby('stock') \
    .apply(lambda x: x.ffill()) \
    .loc[lambda x: (x.date>=params['bt_start_date']) & (x.date<=params['bt_end_date'])] \
    .reset_index(drop=True)

[2022-05-15 13:36:40] Shape of ret: (1064, 596)
[2022-05-15 13:36:40] Shape of exret: (1064, 596)
[2022-05-15 13:36:40] Shape of 10-K feats: (5219, 19)
[2022-05-15 13:36:40] Shape of 10-Q feats: (13457, 7)
[2022-05-15 13:36:41] Shape of 8-K feats: (1261848, 4)
[2022-05-15 13:36:41] Shape of LGBM Learning-to-Rank feats: (639899, 3)
[2022-05-15 13:36:41] Shape of LGBM binary classifier feats: (861573, 3)


In [5]:
# summary DQ
feat_names = [c for c in feats.columns if 'feat' in c]
log(f'Shape of combined feats: {feats.shape}')
display(feats.head())

[2022-05-15 13:36:46] Shape of combined feats: (647996, 28)


Unnamed: 0,stock,date,feat_ch_full_len_10k,feat_ch_item_1a_len_10k,feat_ch_item_1b_len_10k,feat_ch_item_3_len_10k,feat_full_cos_1gram_10k,feat_full_cos_2gram_10k,feat_full_jac_1gram_10k,feat_full_jac_2gram_10k,feat_item_1a_lev_10k,feat_item_7_lev_10k,feat_lm_postive_10k,feat_lm_uncertainty_10k,feat_lm_litigious_10k,feat_word2vec_10k,feat_sen_enc_10k,feat_item_sentiment_10k,feat_fls_sentiment_10k,feat_ch_full_len_10q,feat_full_cos_1gram_10q,feat_full_jac_1gram_10q,feat_word2vec_10q,feat_lm_postive_10q,feat_cnt_8k,feat_cnt_8k_diff,feat_lgbm_ltr_12m,feat_lgbm_binary_clf_12m
0,A,2014-01-02,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,236.0,0.527291
1,A,2014-01-03,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291
2,A,2014-01-06,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291
3,A,2014-01-07,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291
4,A,2014-01-08,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291


In [6]:
def get_portfolio_ret(signal, f_ret, n_day, div_vol=False):
    ranks = signal.rank(axis=1) - 1 / 2
    weights = 2 * ranks.divide(ranks.count(axis=1), axis=0) - 1
    std = f_ret.shift(n_day).rolling(252, 252//2).std() if div_vol else 1
    port_ret = (weights / std * f_ret).sum(axis=1)
    return port_ret


def get_sharpe(port, n_day):
    return port.mean() * np.sqrt(252/n_day) / port.std()


def gen_metric(signal, ret, exret, n_day):
    # future returns
    f_ret = (1+ret).rolling(n_day).apply(np.prod, raw=True).shift(-n_day) - 1
    f_exret = (1+exret).rolling(n_day).apply(np.prod, raw=True).shift(-n_day) - 1
    f_ret, f_exret = df_drop_na(f_ret), df_drop_na(f_exret)
    signal, f_ret, f_exret = align_index((signal, f_ret, f_exret))
    signal = signal.mask(f_ret.isnull())
    # ranked signal, returns
    signal_rnk, f_ret_rnk, f_exret_rnk = signal.rank(axis=1), f_ret.rank(axis=1), f_exret.rank(axis=1)
    # average correlation between signal and excess return (both ranked)
    avg_rnk_corr = pd.Series(signal_rnk.index).apply(lambda x: pd.concat([signal_rnk.loc[x], f_exret_rnk.loc[x]], axis=1).corr().iloc[0,1]).mean()
    # construct uniform weight portfolio
    port_ret = get_portfolio_ret(signal, f_ret, n_day)
    port_exret = get_portfolio_ret(signal, f_exret, n_day)
    port_ret_vol = get_portfolio_ret(signal, f_ret, n_day, div_vol=True)
    # calculate sharpe ratios
    sharpe_ret = get_sharpe(port_ret, n_day)
    sharpe_exret = get_sharpe(port_exret, n_day)
    sharpe_ret_vol = get_sharpe(port_ret_vol, n_day)
    return avg_rnk_corr, sharpe_exret, sharpe_ret, sharpe_ret_vol


def analyze_feat(feats, ret, exret, selected_feat):
    # selected_feat = 'feat_full_jac_2gram'

    # create signal table as pivot
    signal = feats[['stock', 'date', selected_feat]] \
        .drop_duplicates() \
        .pivot('date', 'stock', selected_feat)

    # forward-fill and align index with returns
    dates = (signal.index | ret.index).sort_values().tolist()
    signal = signal.reindex(index=dates).ffill()
    signal = df_drop_na(signal)
    signal, ret_, exret_ = align_index((signal, ret, exret))

    log(f'Shape of ret: {ret_.shape}')
    log(f'Shape of exret: {exret_.shape}')
    log(f'Shape of signal: {signal.shape}')

    # calculate metrics per investiment horizon
    metric = []
    for h in horizons:
        metric.append([selected_feat, h, horizons[h]] + list(gen_metric(signal, ret, exret, horizons[h])))
    metric = pd.DataFrame(metric, columns=['feat', 'horizon', 'n_day', 'avg_rnk_corr', 'sharpe_exret', 'sharpe_ret', 'sharpe_ret_vol'])
    return metric

In [7]:
# loop through all signals to generate metrics
feat_metric = [analyze_feat(feats, ret, exret, f) for f in feat_names]
feat_metric = pd.concat(feat_metric, axis=0).reset_index(drop=True)

[2022-05-15 13:36:46] Shape of ret: (1064, 557)
[2022-05-15 13:36:46] Shape of exret: (1064, 557)
[2022-05-15 13:36:46] Shape of signal: (1064, 557)
[2022-05-15 13:37:08] Shape of ret: (1064, 554)
[2022-05-15 13:37:08] Shape of exret: (1064, 554)
[2022-05-15 13:37:08] Shape of signal: (1064, 554)
[2022-05-15 13:37:31] Shape of ret: (1064, 550)
[2022-05-15 13:37:31] Shape of exret: (1064, 550)
[2022-05-15 13:37:31] Shape of signal: (1064, 550)
[2022-05-15 13:37:53] Shape of ret: (1064, 554)
[2022-05-15 13:37:53] Shape of exret: (1064, 554)
[2022-05-15 13:37:53] Shape of signal: (1064, 554)
[2022-05-15 13:38:15] Shape of ret: (1064, 557)
[2022-05-15 13:38:15] Shape of exret: (1064, 557)
[2022-05-15 13:38:15] Shape of signal: (1064, 557)
[2022-05-15 13:38:37] Shape of ret: (1064, 557)
[2022-05-15 13:38:37] Shape of exret: (1064, 557)
[2022-05-15 13:38:37] Shape of signal: (1064, 557)
[2022-05-15 13:39:00] Shape of ret: (1064, 557)
[2022-05-15 13:39:00] Shape of exret: (1064, 557)
[2022-05

In [8]:
# based on first round analysis, compute various weighted averages of signals
s_list = [0, 0.4]
k_list = [0.05, 0.10, 0.15, 0.20, -0.05, -0.10, -0.15, -0.20, 0]
t_dict = {'minmax': MinMaxScaler(),
          'uniform': QuantileTransformer(output_distribution='uniform', random_state=0),
          'normal': QuantileTransformer(output_distribution='normal', random_state=0)}

list1 = list(itertools.product(s_list, [-0.1,-0.05, 0, 0.05, 0.1], ['normal']))
list2 = list(itertools.product(s_list, [0,0.5,1,2,3], ['minmax','uniform']))

for s, k, t in list1 + list2:
    weights = feat_metric \
        .loc[lambda x: (x.horizon=='12m') & (x.sharpe_ret>s) & (~x.feat.str.contains('avg')) & (~x.feat.str.contains('lgbm'))] \
        .loc[:, ['feat','sharpe_ret']] \
        .sort_values('sharpe_ret', ascending=False)
    weights['imp'] = np.exp(k * weights['sharpe_ret'])
    weights['weight'] = weights['imp'] / np.sum(weights['imp'])
    df = feats[weights.feat.tolist()]
    df = pd.DataFrame(t_dict[t].fit_transform(df), columns=df.columns)
    df = df.multiply(weights.weight.tolist(), axis=1)
    feats[f'feat_weighted_avg_s{s}_k{k}_{t}'] = df.sum(axis=1)

# summary DQ
feat_names = [c for c in feats.columns if 'feat' in c]
log(f'Shape of combined feats: {feats.shape}')
display(feats.head())

# output combined signal
display(feats[feat_names].sum())
save_pkl(feats, 'feats')

[2022-05-15 13:46:51] Shape of combined feats: (647996, 58)


Unnamed: 0,stock,date,feat_ch_full_len_10k,feat_ch_item_1a_len_10k,feat_ch_item_1b_len_10k,feat_ch_item_3_len_10k,feat_full_cos_1gram_10k,feat_full_cos_2gram_10k,feat_full_jac_1gram_10k,feat_full_jac_2gram_10k,feat_item_1a_lev_10k,feat_item_7_lev_10k,feat_lm_postive_10k,feat_lm_uncertainty_10k,feat_lm_litigious_10k,feat_word2vec_10k,feat_sen_enc_10k,feat_item_sentiment_10k,feat_fls_sentiment_10k,feat_ch_full_len_10q,feat_full_cos_1gram_10q,feat_full_jac_1gram_10q,feat_word2vec_10q,feat_lm_postive_10q,feat_cnt_8k,feat_cnt_8k_diff,feat_lgbm_ltr_12m,feat_lgbm_binary_clf_12m,feat_weighted_avg_s0_k-0.1_normal,feat_weighted_avg_s0_k-0.05_normal,feat_weighted_avg_s0_k0_normal,feat_weighted_avg_s0_k0.05_normal,feat_weighted_avg_s0_k0.1_normal,feat_weighted_avg_s0.4_k-0.1_normal,feat_weighted_avg_s0.4_k-0.05_normal,feat_weighted_avg_s0.4_k0_normal,feat_weighted_avg_s0.4_k0.05_normal,feat_weighted_avg_s0.4_k0.1_normal,feat_weighted_avg_s0_k0_minmax,feat_weighted_avg_s0_k0_uniform,feat_weighted_avg_s0_k0.5_minmax,feat_weighted_avg_s0_k0.5_uniform,feat_weighted_avg_s0_k1_minmax,feat_weighted_avg_s0_k1_uniform,feat_weighted_avg_s0_k2_minmax,feat_weighted_avg_s0_k2_uniform,feat_weighted_avg_s0_k3_minmax,feat_weighted_avg_s0_k3_uniform,feat_weighted_avg_s0.4_k0_minmax,feat_weighted_avg_s0.4_k0_uniform,feat_weighted_avg_s0.4_k0.5_minmax,feat_weighted_avg_s0.4_k0.5_uniform,feat_weighted_avg_s0.4_k1_minmax,feat_weighted_avg_s0.4_k1_uniform,feat_weighted_avg_s0.4_k2_minmax,feat_weighted_avg_s0.4_k2_uniform,feat_weighted_avg_s0.4_k3_minmax,feat_weighted_avg_s0.4_k3_uniform
0,A,2014-01-02,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,236.0,0.527291,-0.199322,-0.190936,-0.182292,-0.173387,-0.164218,0.174803,0.181425,0.188138,0.194938,0.201821,0.660996,0.456432,0.678704,0.488549,0.704022,0.52966,0.766086,0.619521,0.808923,0.676174,0.714022,0.563569,0.730947,0.5901,0.752083,0.618068,0.793892,0.665844,0.818243,0.690923
1,A,2014-01-03,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291,-0.199322,-0.190936,-0.182292,-0.173387,-0.164218,0.174803,0.181425,0.188138,0.194938,0.201821,0.660996,0.456432,0.678704,0.488549,0.704022,0.52966,0.766086,0.619521,0.808923,0.676174,0.714022,0.563569,0.730947,0.5901,0.752083,0.618068,0.793892,0.665844,0.818243,0.690923
2,A,2014-01-06,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291,-0.199322,-0.190936,-0.182292,-0.173387,-0.164218,0.174803,0.181425,0.188138,0.194938,0.201821,0.660996,0.456432,0.678704,0.488549,0.704022,0.52966,0.766086,0.619521,0.808923,0.676174,0.714022,0.563569,0.730947,0.5901,0.752083,0.618068,0.793892,0.665844,0.818243,0.690923
3,A,2014-01-07,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291,-0.199322,-0.190936,-0.182292,-0.173387,-0.164218,0.174803,0.181425,0.188138,0.194938,0.201821,0.660996,0.456432,0.678704,0.488549,0.704022,0.52966,0.766086,0.619521,0.808923,0.676174,0.714022,0.563569,0.730947,0.5901,0.752083,0.618068,0.793892,0.665844,0.818243,0.690923
4,A,2014-01-08,0.003344,-0.120401,-0.0,-1.781048,0.96672,0.920556,0.896534,0.787751,0.820244,0.807044,-0.000257,-0.000106,-9.8e-05,0.999092,0.998004,-0.050531,-0.020592,-0.071944,0.99643,0.832324,0.99801,-0.000593,-10.0,1.0,237.0,0.527291,-0.199322,-0.190936,-0.182292,-0.173387,-0.164218,0.174803,0.181425,0.188138,0.194938,0.201821,0.660996,0.456432,0.678704,0.488549,0.704022,0.52966,0.766086,0.619521,0.808923,0.676174,0.714022,0.563569,0.730947,0.5901,0.752083,0.618068,0.793892,0.665844,0.818243,0.690923


feat_ch_full_len_10k                   -9.995012e+03
feat_ch_item_1a_len_10k                -2.179835e+04
feat_ch_item_1b_len_10k                 4.624800e+03
feat_ch_item_3_len_10k                  5.643059e+04
feat_full_cos_1gram_10k                 5.684832e+05
feat_full_cos_2gram_10k                 5.519919e+05
feat_full_jac_1gram_10k                 5.161512e+05
feat_full_jac_2gram_10k                 4.408573e+05
feat_item_1a_lev_10k                    4.978536e+05
feat_item_7_lev_10k                     4.737510e+05
feat_lm_postive_10k                    -1.236898e+01
feat_lm_uncertainty_10k                -5.436695e+00
feat_lm_litigious_10k                   3.817757e-01
feat_word2vec_10k                       5.952284e+05
feat_sen_enc_10k                        5.935209e+05
feat_item_sentiment_10k                -1.530911e+03
feat_fls_sentiment_10k                 -1.667474e+03
feat_ch_full_len_10q                   -1.133872e+04
feat_full_cos_1gram_10q                 6.0156

In [9]:
# loop through all signals to generate metrics
feat_avg_names = [c for c in feats.columns if 'feat_weighted_avg' in c]
feat_metric_avg = [analyze_feat(feats, ret, exret, f) for f in feat_avg_names]
feat_metric_avg = pd.concat(feat_metric_avg, axis=0).reset_index(drop=True)
feat_metric = pd.concat([feat_metric, feat_metric_avg], axis=0).reset_index(drop=True)
feat_metric.to_csv('feat_metric.csv', index=False)

[2022-05-15 13:46:53] Shape of ret: (1064, 560)
[2022-05-15 13:46:53] Shape of exret: (1064, 560)
[2022-05-15 13:46:53] Shape of signal: (1064, 560)
[2022-05-15 13:47:16] Shape of ret: (1064, 560)
[2022-05-15 13:47:16] Shape of exret: (1064, 560)
[2022-05-15 13:47:16] Shape of signal: (1064, 560)
[2022-05-15 13:47:39] Shape of ret: (1064, 560)
[2022-05-15 13:47:39] Shape of exret: (1064, 560)
[2022-05-15 13:47:39] Shape of signal: (1064, 560)
[2022-05-15 13:48:01] Shape of ret: (1064, 560)
[2022-05-15 13:48:01] Shape of exret: (1064, 560)
[2022-05-15 13:48:01] Shape of signal: (1064, 560)
[2022-05-15 13:48:23] Shape of ret: (1064, 560)
[2022-05-15 13:48:23] Shape of exret: (1064, 560)
[2022-05-15 13:48:23] Shape of signal: (1064, 560)
[2022-05-15 13:48:46] Shape of ret: (1064, 560)
[2022-05-15 13:48:46] Shape of exret: (1064, 560)
[2022-05-15 13:48:46] Shape of signal: (1064, 560)
[2022-05-15 13:49:08] Shape of ret: (1064, 560)
[2022-05-15 13:49:08] Shape of exret: (1064, 560)
[2022-05

In [10]:
# display all signal metric output
feat_names = [c for c in feats.columns if 'feat' in c]
for feat in feat_names:
    display(feat_metric.loc[lambda x: x.feat==feat])

Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
0,feat_ch_full_len_10k,6m,126,0.011788,0.41071,0.515007,0.384209
1,feat_ch_full_len_10k,9m,189,0.027996,0.665152,0.686653,0.733436
2,feat_ch_full_len_10k,12m,252,0.039372,0.827734,0.776643,0.83537


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
3,feat_ch_item_1a_len_10k,6m,126,0.012262,0.417152,0.268661,0.435046
4,feat_ch_item_1a_len_10k,9m,189,0.021461,0.431898,0.290181,0.515214
5,feat_ch_item_1a_len_10k,12m,252,0.032616,0.504538,0.349152,0.87088


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
6,feat_ch_item_1b_len_10k,6m,126,-0.019051,-0.508632,-0.444818,-0.780796
7,feat_ch_item_1b_len_10k,9m,189,-0.015362,-0.451767,-0.368533,-0.673344
8,feat_ch_item_1b_len_10k,12m,252,-0.009736,-0.289043,-0.272941,-0.32277


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
9,feat_ch_item_3_len_10k,6m,126,-0.008015,-0.3843,-0.454176,-0.493154
10,feat_ch_item_3_len_10k,9m,189,-0.003299,-0.301118,-0.454885,-0.566054
11,feat_ch_item_3_len_10k,12m,252,0.000248,-0.114657,-0.246312,-0.118548


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
12,feat_full_cos_1gram_10k,6m,126,-0.0044,-0.038721,-0.142918,0.408696
13,feat_full_cos_1gram_10k,9m,189,-0.007394,-0.151657,-0.245219,0.53506
14,feat_full_cos_1gram_10k,12m,252,-0.009598,-0.224323,-0.305044,0.663217


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
15,feat_full_cos_2gram_10k,6m,126,-0.000673,0.036706,-0.107043,0.22
16,feat_full_cos_2gram_10k,9m,189,-0.001787,-0.066987,-0.184731,0.361802
17,feat_full_cos_2gram_10k,12m,252,-0.005572,-0.187003,-0.282568,0.525939


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
18,feat_full_jac_1gram_10k,6m,126,0.024508,0.510797,0.371989,0.555492
19,feat_full_jac_1gram_10k,9m,189,0.027407,0.458494,0.306863,0.366912
20,feat_full_jac_1gram_10k,12m,252,0.027495,0.367103,0.157476,0.324046


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
21,feat_full_jac_2gram_10k,6m,126,0.022109,0.464897,0.308634,0.529626
22,feat_full_jac_2gram_10k,9m,189,0.027276,0.440475,0.274652,0.383474
23,feat_full_jac_2gram_10k,12m,252,0.030028,0.385059,0.180153,0.380524


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
24,feat_item_1a_lev_10k,6m,126,-0.010372,-0.334835,-0.652151,-0.038572
25,feat_item_1a_lev_10k,9m,189,-0.008037,-0.391028,-0.726726,0.299741
26,feat_item_1a_lev_10k,12m,252,-0.005365,-0.433543,-0.788406,0.363716


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
27,feat_item_7_lev_10k,6m,126,0.012026,0.468035,0.214736,0.347403
28,feat_item_7_lev_10k,9m,189,0.016051,0.588871,0.245309,0.35709
29,feat_item_7_lev_10k,12m,252,0.029349,0.974585,0.400035,0.489612


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
30,feat_lm_postive_10k,6m,126,6.8e-05,-0.08141,-0.163808,0.234737
31,feat_lm_postive_10k,9m,189,0.012058,0.048724,0.008581,0.61718
32,feat_lm_postive_10k,12m,252,0.021105,0.114037,0.095475,0.573362


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
33,feat_lm_uncertainty_10k,6m,126,-0.01511,-0.702888,-0.835882,-0.68531
34,feat_lm_uncertainty_10k,9m,189,-0.021344,-0.791858,-0.795963,-0.367919
35,feat_lm_uncertainty_10k,12m,252,-0.025409,-0.713773,-0.698186,-0.166775


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
36,feat_lm_litigious_10k,6m,126,0.028775,0.422199,0.400606,0.447241
37,feat_lm_litigious_10k,9m,189,0.03952,0.580242,0.572594,0.742904
38,feat_lm_litigious_10k,12m,252,0.052553,0.830855,0.837159,0.778994


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
39,feat_word2vec_10k,6m,126,0.008363,0.249031,0.212833,0.653331
40,feat_word2vec_10k,9m,189,0.013411,0.218972,0.186448,0.55762
41,feat_word2vec_10k,12m,252,0.019685,0.18051,0.128023,0.614434


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
42,feat_sen_enc_10k,6m,126,0.011024,0.301619,0.34115,0.39398
43,feat_sen_enc_10k,9m,189,0.016364,0.314186,0.342085,0.366006
44,feat_sen_enc_10k,12m,252,0.018022,0.276645,0.33242,0.54857


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
45,feat_item_sentiment_10k,6m,126,-0.001109,-0.171257,-0.568685,-0.063716
46,feat_item_sentiment_10k,9m,189,0.013942,0.257978,-0.143547,0.049756
47,feat_item_sentiment_10k,12m,252,0.022481,0.455717,0.163679,0.167466


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
48,feat_fls_sentiment_10k,6m,126,0.002667,-0.040659,-0.044246,0.157756
49,feat_fls_sentiment_10k,9m,189,0.007946,0.114549,0.124019,0.22875
50,feat_fls_sentiment_10k,12m,252,0.014272,0.252551,0.282665,0.371098


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
51,feat_ch_full_len_10q,6m,126,0.017587,0.261035,0.371179,0.16815
52,feat_ch_full_len_10q,9m,189,0.022077,0.45051,0.574732,0.357258
53,feat_ch_full_len_10q,12m,252,0.0265,0.480357,0.623441,0.463231


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
54,feat_full_cos_1gram_10q,6m,126,0.051459,1.091222,1.09653,0.693253
55,feat_full_cos_1gram_10q,9m,189,0.071439,1.257505,1.346055,0.43017
56,feat_full_cos_1gram_10q,12m,252,0.07979,1.22495,1.342652,0.526472


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
57,feat_full_jac_1gram_10q,6m,126,0.063545,1.176574,1.230645,0.807484
58,feat_full_jac_1gram_10q,9m,189,0.08721,1.478095,1.639033,0.516341
59,feat_full_jac_1gram_10q,12m,252,0.099962,1.682216,1.919405,0.614249


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
60,feat_word2vec_10q,6m,126,0.046244,0.753357,0.800742,0.56266
61,feat_word2vec_10q,9m,189,0.063708,0.874911,0.954484,0.404319
62,feat_word2vec_10q,12m,252,0.072492,0.930166,0.974367,0.560177


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
63,feat_lm_postive_10q,6m,126,0.008207,0.043975,0.000982,0.254461
64,feat_lm_postive_10q,9m,189,0.00634,0.147597,0.116404,0.118319
65,feat_lm_postive_10q,12m,252,0.01338,0.377742,0.258223,-0.067167


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
66,feat_cnt_8k,6m,126,0.022928,0.663539,0.588935,0.566888
67,feat_cnt_8k,9m,189,0.028707,0.635441,0.567721,0.40892
68,feat_cnt_8k,12m,252,0.030607,0.586476,0.500547,0.182304


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
69,feat_cnt_8k_diff,6m,126,0.007775,0.457594,0.451822,0.15739
70,feat_cnt_8k_diff,9m,189,0.012991,0.519617,0.544772,0.059314
71,feat_cnt_8k_diff,12m,252,0.013878,0.349575,0.335921,0.198913


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
72,feat_lgbm_ltr_12m,6m,126,0.010878,0.197085,0.093949,0.090191
73,feat_lgbm_ltr_12m,9m,189,0.010937,0.125098,0.028631,-0.155178
74,feat_lgbm_ltr_12m,12m,252,0.008711,0.022523,-0.083482,-0.254724


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
75,feat_lgbm_binary_clf_12m,6m,126,-0.007203,-0.260794,-0.374389,-0.311605
76,feat_lgbm_binary_clf_12m,9m,189,-0.006327,-0.366629,-0.512953,-0.056013
77,feat_lgbm_binary_clf_12m,12m,252,-0.002939,-0.23771,-0.36419,-0.375705


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
78,feat_weighted_avg_s0_k-0.1_normal,6m,126,0.044271,0.936266,0.938626,0.87463
79,feat_weighted_avg_s0_k-0.1_normal,9m,189,0.065743,1.28982,1.340327,0.755592
80,feat_weighted_avg_s0_k-0.1_normal,12m,252,0.085748,1.714781,1.802285,0.793367


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
81,feat_weighted_avg_s0_k-0.05_normal,6m,126,0.045267,0.950759,0.955251,0.877628
82,feat_weighted_avg_s0_k-0.05_normal,9m,189,0.067068,1.307111,1.360246,0.753577
83,feat_weighted_avg_s0_k-0.05_normal,12m,252,0.087059,1.732468,1.821679,0.791371


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
84,feat_weighted_avg_s0_k0_normal,6m,126,0.046262,0.965562,0.97254,0.881023
85,feat_weighted_avg_s0_k0_normal,9m,189,0.068345,1.325312,1.380911,0.751091
86,feat_weighted_avg_s0_k0_normal,12m,252,0.088379,1.753135,1.844972,0.78983


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
87,feat_weighted_avg_s0_k0.05_normal,6m,126,0.047201,0.977986,0.987316,0.882554
88,feat_weighted_avg_s0_k0.05_normal,9m,189,0.069591,1.340965,1.398968,0.748858
89,feat_weighted_avg_s0_k0.05_normal,12m,252,0.089629,1.771566,1.864856,0.78791


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
90,feat_weighted_avg_s0_k0.1_normal,6m,126,0.048206,0.992687,1.004094,0.88596
91,feat_weighted_avg_s0_k0.1_normal,9m,189,0.070892,1.358369,1.418249,0.74725
92,feat_weighted_avg_s0_k0.1_normal,12m,252,0.090989,1.793649,1.887574,0.786837


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
93,feat_weighted_avg_s0.4_k-0.1_normal,6m,126,0.060052,1.374535,1.43854,0.972418
94,feat_weighted_avg_s0.4_k-0.1_normal,9m,189,0.086574,1.664474,1.735806,0.749679
95,feat_weighted_avg_s0.4_k-0.1_normal,12m,252,0.107943,1.839479,1.945714,0.768321


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
96,feat_weighted_avg_s0.4_k-0.05_normal,6m,126,0.060451,1.371038,1.43497,0.965222
97,feat_weighted_avg_s0.4_k-0.05_normal,9m,189,0.087021,1.662069,1.735032,0.739462
98,feat_weighted_avg_s0.4_k-0.05_normal,12m,252,0.108269,1.839196,1.948293,0.764395


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
99,feat_weighted_avg_s0.4_k0_normal,6m,126,0.060902,1.367745,1.431458,0.95976
100,feat_weighted_avg_s0.4_k0_normal,9m,189,0.087548,1.659012,1.733689,0.73024
101,feat_weighted_avg_s0.4_k0_normal,12m,252,0.108663,1.836757,1.947535,0.760031


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
102,feat_weighted_avg_s0.4_k0.05_normal,6m,126,0.061315,1.364787,1.429153,0.954568
103,feat_weighted_avg_s0.4_k0.05_normal,9m,189,0.088143,1.659183,1.735142,0.722493
104,feat_weighted_avg_s0.4_k0.05_normal,12m,252,0.10907,1.838542,1.95062,0.757097


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
105,feat_weighted_avg_s0.4_k0.1_normal,6m,126,0.061772,1.36098,1.425718,0.948497
106,feat_weighted_avg_s0.4_k0.1_normal,9m,189,0.088721,1.660168,1.737854,0.715746
107,feat_weighted_avg_s0.4_k0.1_normal,12m,252,0.109436,1.839969,1.953208,0.75319


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
108,feat_weighted_avg_s0_k0_minmax,6m,126,0.04398,0.985384,0.878269,0.990193
109,feat_weighted_avg_s0_k0_minmax,9m,189,0.059995,1.36516,1.245535,0.755415
110,feat_weighted_avg_s0_k0_minmax,12m,252,0.075018,1.871318,1.748433,0.809243


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
111,feat_weighted_avg_s0_k0_uniform,6m,126,0.045717,0.99812,0.930895,0.923347
112,feat_weighted_avg_s0_k0_uniform,9m,189,0.06687,1.407824,1.345895,0.839742
113,feat_weighted_avg_s0_k0_uniform,12m,252,0.084554,2.037354,2.008238,0.834213


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
114,feat_weighted_avg_s0_k0.5_minmax,6m,126,0.051679,1.089168,1.041225,0.968598
115,feat_weighted_avg_s0_k0.5_minmax,9m,189,0.071704,1.468866,1.451276,0.705075
116,feat_weighted_avg_s0_k0.5_minmax,12m,252,0.0878,1.876939,1.952212,0.765703


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
117,feat_weighted_avg_s0_k0.5_uniform,6m,126,0.054295,1.105516,1.068285,0.926282
118,feat_weighted_avg_s0_k0.5_uniform,9m,189,0.078365,1.541251,1.522187,0.791864
119,feat_weighted_avg_s0_k0.5_uniform,12m,252,0.096214,2.135297,2.168049,0.800629


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
120,feat_weighted_avg_s0_k1_minmax,6m,126,0.057718,1.16232,1.158103,0.936417
121,feat_weighted_avg_s0_k1_minmax,9m,189,0.079958,1.507694,1.549627,0.653181
122,feat_weighted_avg_s0_k1_minmax,12m,252,0.095927,1.832339,2.000243,0.732745


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
123,feat_weighted_avg_s0_k1_uniform,6m,126,0.060106,1.169045,1.155923,0.90384
124,feat_weighted_avg_s0_k1_uniform,9m,189,0.085422,1.585499,1.601783,0.717097
125,feat_weighted_avg_s0_k1_uniform,12m,252,0.102627,2.060769,2.154968,0.75627


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
126,feat_weighted_avg_s0_k2_minmax,6m,126,0.060937,1.18407,1.204703,0.862399
127,feat_weighted_avg_s0_k2_minmax,9m,189,0.083429,1.481401,1.553973,0.573245
128,feat_weighted_avg_s0_k2_minmax,12m,252,0.097219,1.695954,1.887645,0.679068


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
129,feat_weighted_avg_s0_k2_uniform,6m,126,0.063237,1.220966,1.231428,0.858511
130,feat_weighted_avg_s0_k2_uniform,9m,189,0.087129,1.568953,1.634341,0.613033
131,feat_weighted_avg_s0_k2_uniform,12m,252,0.101613,1.84247,2.032256,0.697414


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
132,feat_weighted_avg_s0_k3_minmax,6m,126,0.06031,1.183896,1.203871,0.828753
133,feat_weighted_avg_s0_k3_minmax,9m,189,0.081634,1.462534,1.543538,0.546184
134,feat_weighted_avg_s0_k3_minmax,12m,252,0.094192,1.653114,1.859844,0.658045


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
135,feat_weighted_avg_s0_k3_uniform,6m,126,0.061884,1.209096,1.224655,0.839743
136,feat_weighted_avg_s0_k3_uniform,9m,189,0.084115,1.51063,1.587967,0.57074
137,feat_weighted_avg_s0_k3_uniform,12m,252,0.09718,1.725798,1.936127,0.669685


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
138,feat_weighted_avg_s0.4_k0_minmax,6m,126,0.052446,1.230456,1.194295,1.0436
139,feat_weighted_avg_s0.4_k0_minmax,9m,189,0.071814,1.577204,1.598946,0.775551
140,feat_weighted_avg_s0.4_k0_minmax,12m,252,0.089273,1.942452,2.076843,0.808573


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
141,feat_weighted_avg_s0.4_k0_uniform,6m,126,0.061519,1.372558,1.383918,0.945336
142,feat_weighted_avg_s0.4_k0_uniform,9m,189,0.086951,1.711666,1.756116,0.776223
143,feat_weighted_avg_s0.4_k0_uniform,12m,252,0.105312,1.936971,2.057962,0.776506


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
144,feat_weighted_avg_s0.4_k0.5_minmax,6m,126,0.056887,1.236543,1.23279,1.003322
145,feat_weighted_avg_s0.4_k0.5_minmax,9m,189,0.078365,1.583014,1.638461,0.701703
146,feat_weighted_avg_s0.4_k0.5_minmax,12m,252,0.095242,1.90528,2.108029,0.766921


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
147,feat_weighted_avg_s0.4_k0.5_uniform,6m,126,0.063479,1.33601,1.350893,0.899359
148,feat_weighted_avg_s0.4_k0.5_uniform,9m,189,0.089341,1.683241,1.741716,0.705091
149,feat_weighted_avg_s0.4_k0.5_uniform,12m,252,0.106858,1.885974,2.035761,0.74403


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
150,feat_weighted_avg_s0.4_k1_minmax,6m,126,0.059399,1.209079,1.226106,0.941075
151,feat_weighted_avg_s0.4_k1_minmax,9m,189,0.082034,1.543773,1.618185,0.636823
152,feat_weighted_avg_s0.4_k1_minmax,12m,252,0.097699,1.841065,2.06236,0.729841


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
153,feat_weighted_avg_s0.4_k1_uniform,6m,126,0.064651,1.306861,1.323557,0.872857
154,feat_weighted_avg_s0.4_k1_uniform,9m,189,0.09005,1.645441,1.712431,0.653241
155,feat_weighted_avg_s0.4_k1_uniform,12m,252,0.106225,1.848528,2.019676,0.717011


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
156,feat_weighted_avg_s0.4_k2_minmax,6m,126,0.061071,1.191455,1.215955,0.85435
157,feat_weighted_avg_s0.4_k2_minmax,9m,189,0.083035,1.480369,1.566072,0.562264
158,feat_weighted_avg_s0.4_k2_minmax,12m,252,0.096569,1.694656,1.914352,0.674362


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
159,feat_weighted_avg_s0.4_k2_uniform,6m,126,0.063526,1.245046,1.263638,0.84483
160,feat_weighted_avg_s0.4_k2_uniform,9m,189,0.087131,1.557423,1.636979,0.593139
161,feat_weighted_avg_s0.4_k2_uniform,12m,252,0.101092,1.763872,1.971272,0.6831


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
162,feat_weighted_avg_s0.4_k3_minmax,6m,126,0.060039,1.178564,1.198972,0.823003
163,feat_weighted_avg_s0.4_k3_minmax,9m,189,0.081209,1.45392,1.538295,0.542891
164,feat_weighted_avg_s0.4_k3_minmax,12m,252,0.093816,1.644078,1.859264,0.656731


Unnamed: 0,feat,horizon,n_day,avg_rnk_corr,sharpe_exret,sharpe_ret,sharpe_ret_vol
165,feat_weighted_avg_s0.4_k3_uniform,6m,126,0.061888,1.212381,1.229577,0.836859
166,feat_weighted_avg_s0.4_k3_uniform,9m,189,0.084001,1.505562,1.585987,0.566693
167,feat_weighted_avg_s0.4_k3_uniform,12m,252,0.096825,1.705747,1.919094,0.665988


In [11]:
# signal correlation plot
corr = feats[feat_names].corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,feat_ch_full_len_10k,feat_ch_item_1a_len_10k,feat_ch_item_1b_len_10k,feat_ch_item_3_len_10k,feat_full_cos_1gram_10k,feat_full_cos_2gram_10k,feat_full_jac_1gram_10k,feat_full_jac_2gram_10k,feat_item_1a_lev_10k,feat_item_7_lev_10k,feat_lm_postive_10k,feat_lm_uncertainty_10k,feat_lm_litigious_10k,feat_word2vec_10k,feat_sen_enc_10k,feat_item_sentiment_10k,feat_fls_sentiment_10k,feat_ch_full_len_10q,feat_full_cos_1gram_10q,feat_full_jac_1gram_10q,feat_word2vec_10q,feat_lm_postive_10q,feat_cnt_8k,feat_cnt_8k_diff,feat_lgbm_ltr_12m,feat_lgbm_binary_clf_12m,feat_weighted_avg_s0_k-0.1_normal,feat_weighted_avg_s0_k-0.05_normal,feat_weighted_avg_s0_k0_normal,feat_weighted_avg_s0_k0.05_normal,feat_weighted_avg_s0_k0.1_normal,feat_weighted_avg_s0.4_k-0.1_normal,feat_weighted_avg_s0.4_k-0.05_normal,feat_weighted_avg_s0.4_k0_normal,feat_weighted_avg_s0.4_k0.05_normal,feat_weighted_avg_s0.4_k0.1_normal,feat_weighted_avg_s0_k0_minmax,feat_weighted_avg_s0_k0_uniform,feat_weighted_avg_s0_k0.5_minmax,feat_weighted_avg_s0_k0.5_uniform,feat_weighted_avg_s0_k1_minmax,feat_weighted_avg_s0_k1_uniform,feat_weighted_avg_s0_k2_minmax,feat_weighted_avg_s0_k2_uniform,feat_weighted_avg_s0_k3_minmax,feat_weighted_avg_s0_k3_uniform,feat_weighted_avg_s0.4_k0_minmax,feat_weighted_avg_s0.4_k0_uniform,feat_weighted_avg_s0.4_k0.5_minmax,feat_weighted_avg_s0.4_k0.5_uniform,feat_weighted_avg_s0.4_k1_minmax,feat_weighted_avg_s0.4_k1_uniform,feat_weighted_avg_s0.4_k2_minmax,feat_weighted_avg_s0.4_k2_uniform,feat_weighted_avg_s0.4_k3_minmax,feat_weighted_avg_s0.4_k3_uniform
feat_ch_full_len_10k,1.0,0.236127,0.057164,0.119494,0.089829,0.066416,-0.038341,0.006713,-0.036918,0.040787,0.145966,0.199059,-0.323651,-0.26743,-0.077245,-0.184135,-0.067629,0.137331,0.017321,0.010928,0.062491,0.064683,-0.049795,0.007418,0.003906,0.065206,0.133103,0.133537,0.133883,0.134136,0.134287,0.147473,0.146872,0.146118,0.145208,0.144143,0.07287,0.138651,0.071856,0.131767,0.060435,0.111692,0.031734,0.057879,0.017209,0.026442,0.106443,0.154236,0.087467,0.128333,0.064329,0.098341,0.030988,0.049103,0.016882,0.023966
feat_ch_item_1a_len_10k,0.236127,1.0,0.01762,0.044747,0.021486,0.017516,-0.022813,-0.005144,-0.065097,-0.083034,0.175409,0.163327,-0.03501,-0.13914,-0.047611,-0.109811,-0.036874,0.082566,0.045371,0.04366,0.061639,0.044849,-0.007635,0.004549,-0.006108,-0.021677,0.167593,0.167224,0.166776,0.166244,0.165623,0.063558,0.064791,0.065964,0.067074,0.068119,0.099405,0.152585,0.094453,0.140092,0.081952,0.118019,0.057165,0.069982,0.046701,0.045428,0.013672,0.080069,0.029116,0.075913,0.038619,0.068114,0.044067,0.051279,0.043576,0.040678
feat_ch_item_1b_len_10k,0.057164,0.01762,1.0,0.013936,-0.011899,-0.006902,-0.003332,0.014141,0.001761,-0.018669,0.010657,0.005741,-0.032918,-0.020068,0.001737,-0.039437,-0.091367,0.069908,-0.005259,0.01541,-0.001046,0.013723,-0.004506,0.014695,-0.000402,0.028462,0.022537,0.022577,0.022605,0.022621,0.022624,0.025889,0.025661,0.025417,0.025158,0.024883,0.000409,0.029355,0.004115,0.027127,0.007814,0.023052,0.012133,0.014962,0.013758,0.012096,0.004879,0.03134,0.008079,0.024992,0.01038,0.019401,0.012762,0.013028,0.013885,0.011578
feat_ch_item_3_len_10k,0.119494,0.044747,0.013936,1.0,0.006001,-0.022739,-0.035712,-0.049052,0.012677,-0.003893,0.244701,-0.001561,0.202264,-0.060851,-0.028925,-0.030841,-0.034972,0.13427,0.027884,-0.02724,0.035536,0.130769,-0.011625,0.021238,-0.016915,-0.08758,0.095081,0.094923,0.094689,0.094373,0.093971,0.099345,0.098651,0.097826,0.096869,0.095781,0.051003,0.090984,0.042892,0.082361,0.027863,0.063104,-0.000831,0.014544,-0.015001,-0.014606,0.049113,0.104515,0.036819,0.08214,0.021189,0.055252,-0.003328,0.009121,-0.015574,-0.016062
feat_full_cos_1gram_10k,0.089829,0.021486,-0.011899,0.006001,1.0,0.898812,0.488807,0.475444,0.158964,0.162709,0.033581,0.026576,-0.079196,0.347334,0.372497,0.035192,0.038019,0.079067,0.221054,0.182426,0.160587,0.017523,0.005865,-0.001632,0.101255,0.055206,0.360576,0.359221,0.357771,0.356219,0.354558,0.188334,0.190087,0.191774,0.193386,0.194918,0.381937,0.348666,0.347962,0.326634,0.298028,0.29102,0.219108,0.215079,0.191063,0.176284,0.224146,0.225283,0.22362,0.218609,0.214639,0.207871,0.195844,0.184698,0.186224,0.168881
feat_full_cos_2gram_10k,0.066416,0.017516,-0.006902,-0.022739,0.898812,1.0,0.611757,0.603343,0.187358,0.251318,0.042916,0.020651,-0.085643,0.427132,0.462587,0.025706,0.030582,0.059074,0.266749,0.240111,0.207182,0.002406,-0.028729,-0.017701,0.122182,0.074934,0.431256,0.429836,0.428322,0.426708,0.424987,0.231635,0.234068,0.236433,0.238722,0.240928,0.477716,0.413786,0.435555,0.391271,0.374229,0.354411,0.27857,0.274047,0.245686,0.232228,0.28966,0.271612,0.286158,0.26883,0.273537,0.260996,0.250434,0.24009,0.239849,0.22399
feat_full_jac_1gram_10k,-0.038341,-0.022813,-0.003332,-0.035712,0.488807,0.611757,1.0,0.947173,0.288408,0.379325,0.055461,0.003767,-0.05796,0.646114,0.639239,0.000599,0.028305,-0.010326,0.173427,0.334644,0.179914,-0.013895,0.009641,-0.038806,0.132928,0.04744,0.535286,0.532062,0.52875,0.525348,0.52185,0.207464,0.211183,0.214914,0.21865,0.222379,0.693348,0.514917,0.609328,0.478202,0.503576,0.432019,0.360333,0.354022,0.322387,0.326891,0.366352,0.274277,0.352418,0.285608,0.334393,0.295605,0.314596,0.309183,0.313033,0.3166
feat_full_jac_2gram_10k,0.006713,-0.005144,0.014141,-0.049052,0.475444,0.603343,0.947173,1.0,0.322817,0.42677,0.056366,0.000261,-0.080581,0.501801,0.553485,-0.004481,0.034951,-0.039305,0.213117,0.406526,0.176649,-0.003745,0.022331,-0.060872,0.123937,0.063285,0.573521,0.570744,0.567901,0.564988,0.562001,0.256842,0.260816,0.264819,0.268841,0.272871,0.718773,0.560722,0.646967,0.52762,0.552598,0.485984,0.421433,0.416036,0.387599,0.393834,0.42433,0.326405,0.413183,0.338389,0.396619,0.350902,0.378692,0.371143,0.378851,0.383489
feat_item_1a_lev_10k,-0.036918,-0.065097,0.001761,0.012677,0.158964,0.187358,0.288408,0.322817,1.0,0.211652,0.090978,0.075122,0.045319,0.154256,0.129705,0.069699,0.023598,0.029163,0.096948,0.176261,0.077412,0.006745,0.024827,-0.0214,0.071659,-0.035426,0.258098,0.256927,0.255699,0.254412,0.253063,0.161093,0.16135,0.161579,0.161779,0.161949,0.292187,0.25103,0.267748,0.233792,0.232412,0.20998,0.180075,0.167764,0.165606,0.153109,0.214869,0.166256,0.200341,0.158774,0.184195,0.15281,0.166107,0.147566,0.16271,0.148325
feat_item_7_lev_10k,0.040787,-0.083034,-0.018669,-0.003893,0.162709,0.251318,0.379325,0.42677,0.211652,1.0,0.029408,0.022234,-0.082712,0.208912,0.27425,-0.048432,-0.021894,-0.035442,0.103327,0.189865,0.064251,0.025497,0.120924,-0.003659,0.117746,0.074063,0.367342,0.365823,0.364179,0.362403,0.360492,0.39931,0.394026,0.388643,0.38317,0.377613,0.592469,0.295897,0.514306,0.280309,0.403682,0.256063,0.241232,0.208762,0.190693,0.189872,0.65529,0.304603,0.501965,0.262648,0.367513,0.230092,0.226076,0.195929,0.187406,0.18666
