## Entropy Analysis

This notebook is depending on the model_eval.ipynb and raw data

In [107]:
import pandas as pd
import numpy as np
import os

In [108]:
list_of_file = os.listdir('../data/')
list_of_file = [i for i in list_of_file if '_full' in i]

In [109]:
import antropy as ant
import random
def timothy_entropy(x):
    x = [i for i in x if str(i) != 'nan']
    if(len(x) >= 10000): nbins = 20
    elif(len(x) >= 1000): nbins = 10
    elif(len(x) >= 100): nbins = 5
    else: nbins = 3
    hist, edges = np.histogram(x, bins=nbins)
    ent_sum = 0
    for i in hist:
        if(i != 0):
            p = i/len(x)
            ent_sum -= p * np.log2(p)
    return(ent_sum)

def structual_entropy(x):
    _, _count = np.unique(x, return_counts=True)
    _countnorm = _count / _count.sum()
    _entropy = -(_countnorm*np.log2(_countnorm)).sum()
    return(_entropy)

indicator_col = [
       'RSI_20','RSI_20_ta', 'RSI_25','RSI_25_ta',
       'DT_RSI_2_20', 'STO_20_1', 'MADIFF_10_100_0', 'MADIFF_10_100',
       'MACD_10_100_5', 'LINTRND_10', 'PR_INT_0', 'PR_INT_20', 'CMMA_10_252',
       'ENT_2_10', 'ENT_4_16', 'FTI_LP', 'FTI_BP', 'FTI_BF'
]

def product_entropy_df(df, indicator_col):

    entropy_list = []
    for i in indicator_col:
        tim = timothy_entropy(df[i])
        ent = ant.perm_entropy(df[i], normalize=True)
        stu = structual_entropy(df[i])
        res_dict = {'col':i, 'timothy':tim, 'antropy':ent, 'structual':stu}
        entropy_list.append(res_dict)

    df_entropy = pd.DataFrame(entropy_list)
    df_entropy.sort_values(['antropy'], ascending=False)
    return(df_entropy)




## RSI entropy

In [110]:
entropy_df_list = []

for file in list_of_file:
    indicator_col = ['RSI_20','RSI_25','RSI_20_ta','RSI_25_ta']

    df = pd.read_csv(f'../data/{file}')
    df['Date'] = pd.to_datetime(df['Date'], format = '%Y%m%d')

    df = df.set_index('Date')

    for n in list(range(1,30)):
        name = 'ret' + str(n)
        df[name] = df['Open'].pct_change(periods=n)#for trading with open

    df['retFut1'] = df['Open'].pct_change(1).shift(-1).fillna(0)
    import talib as ta
    df['RSI_20_ta'] = ta.RSI(np.array(df['Open']), timeperiod = 20)
    df['RSI_25_ta'] = ta.RSI(np.array(df['Open']), timeperiod = 25)
    df_entropy = product_entropy_df(df, indicator_col)
    df_entropy['timothy_rank'] = df_entropy['timothy'].rank(ascending=False)
    df_entropy['antropy_rank'] = df_entropy['antropy'].rank(ascending=False)
    df_entropy['structual_rank'] = df_entropy['structual'].rank(ascending=False)
    entropy_df_list.append(df_entropy)

In [111]:
print(list_of_file[2])
entropy_df_list[2]

RUT_full.csv


Unnamed: 0,col,timothy,antropy,structual,timothy_rank,antropy_rank,structual_rank
0,RSI_20,2.552644,0.938986,10.833812,3.0,4.0,3.0
1,RSI_25,2.508748,0.93909,10.83315,4.0,3.0,4.0
2,RSI_20_ta,2.709189,0.961339,11.52955,1.0,2.0,1.0
3,RSI_25_ta,2.669462,0.961476,11.520395,2.0,1.0,2.0


In [112]:
for ent in entropy_df_list:

    temp = ent[ent['col'].isin(['RSI_20','RSI_25'])].to_dict('series')
    
    if(1 in temp['timothy_rank'].values or 1 in temp['antropy_rank'].values or 1 in temp['structual_rank'].values):
        print(ent)

         col   timothy   antropy  structual  timothy_rank  antropy_rank  \
0     RSI_20  2.552644  0.938986  10.833812           1.0           4.0   
1     RSI_25  2.508748  0.939090  10.833150           2.0           3.0   
2  RSI_20_ta  2.500119  0.958062  11.529550           3.0           2.0   
3  RSI_25_ta  2.456400  0.958279  11.517746           4.0           1.0   

   structual_rank  
0             3.0  
1             4.0  
2             1.0  
3             2.0  


Only one table out of all files can have a larger entropy for timothy rsi.

# Entropy on one file RUT

In [114]:
df = pd.read_csv(f'../data/RUT_full.csv')

df['Date'] = pd.to_datetime(df['Date'], format = '%Y%m%d')

indicator_col = ['RSI_25', 'RSI_20_ta', 'CMMA_10_252', 'MACD_10_100_5', 
 'MADIFF_10_100', 'MADIFF_10_100_0', 'LINTRND_10',
 'FTI_BP', 'FTI_LP', 'RSI_20', 'RSI_25_ta', 'STO_20_1', 'FTI_BF', 'PR_INT_0',
 'DT_RSI_2_20', 'ENT_4_16', 'ENT_2_10', 'PR_INT_20']

df = df.set_index('Date')

for n in list(range(1,30)):
    name = 'ret' + str(n)
    df[name] = df['Open'].pct_change(periods=n)#for trading with open

df['retFut1'] = df['Open'].pct_change(1).shift(-1).fillna(0)
import talib as ta
df['RSI_20_ta'] = ta.RSI(np.array(df['Open']), timeperiod = 20)
df['RSI_25_ta'] = ta.RSI(np.array(df['Open']), timeperiod = 25)
df_entropy = product_entropy_df(df, indicator_col)
df_entropy['timothy_rank'] = df_entropy['timothy'].rank(ascending=False)
df_entropy['antropy_rank'] = df_entropy['antropy'].rank(ascending=False)
df_entropy['structual_rank'] = df_entropy['structual'].rank(ascending=False)

df_entropy.sort_values(['timothy'],ascending=False)



Unnamed: 0,col,timothy,antropy,structual,timothy_rank,antropy_rank,structual_rank
13,PR_INT_0,3.303025,0.991735,10.828016,1.0,1.0,13.0
16,ENT_2_10,3.269491,0.724362,5.715922,2.0,14.0,17.0
11,STO_20_1,3.032049,0.820284,10.835136,3.0,12.0,7.0
15,ENT_4_16,3.028723,0.824092,10.489674,4.0,11.0,16.0
5,MADIFF_10_100_0,2.955412,0.656678,10.835798,5.0,16.0,4.5
8,FTI_LP,2.95487,0.714244,10.835798,6.0,15.0,4.5
17,PR_INT_20,2.954636,0.95554,10.835798,7.0,4.0,4.5
4,MADIFF_10_100,2.934715,0.651896,10.835798,8.0,17.0,4.5
12,FTI_BF,2.858774,0.854505,10.809367,9.0,9.0,15.0
14,DT_RSI_2_20,2.791886,0.93606,10.834474,10.0,8.0,8.0
