# Ch04 4.5.3 Numerical Example

In [21]:
import numpy as np 
import pandas as pd 

import matplotlib as mpl
import matplotlib.pyplot as plt

import datetime as dt
from adv_finance import bars, labeling, utils, sampling

from matplotlib.patches import Rectangle
import  matplotlib.dates as mdates

from tqdm import tqdm
from scipy.sparse import coo_matrix, csr_matrix

In [66]:
def get_ind_matrix(bar_ix, t1): 
    try: 
        n_row = len(bar_ix)
        n_col = len(t1)
        mat = csr_matrix((n_row, n_col), dtype='b')
        
#         for i, (t0, t1) in enumerate(t1.iteritems()): 
        for i, (t0, t1) in tqdm(enumerate(t1.iteritems()), position=0): 
            mat[t0:t1+1, i] = 1
            
    except Exception as e:
        print(e)
        
    return mat


def get_avg_uniqueness(ind_mat): 
    try: 
        # Average uniqueness from indicator matrix
        c = ind_mat[:, -1].sum(axis=1) # concurrency
        m0 = ind_mat[:, :-1].todense().sum(axis=1) + 1
        u = ind_mat[:, -1] / m0
        avg_u = u.sum(axis=0)[0,0] / c.sum(axis=0)[0,0]
        return avg_u
    
    except Exception as e:
        print(e)
            
    return avg_u




In [67]:
t1 = pd.Series([2,3,5], index=[0,2,4])
barIx = range(t1.max()+1) # index of bars
indM = get_ind_matrix(barIx, t1)
indM

  if __name__ == '__main__':
3it [00:00, 713.68it/s]


<6x3 sparse matrix of type '<class 'numpy.int8'>'
	with 7 stored elements in Compressed Sparse Row format>

## phi = [1]

In [64]:
phi = [1]        # phi = [1] corresponds to column 1, feature 2

# Following statements are taken from the SNIPPET 4.5 - function seqBootstrap(indM,sLength=None)
avg_u = pd.Series()

for i in np.arange(indM.shape[1]): 
    indM_ = indM[:, phi+[i]] # reduce indM
    avg_u.loc[i] = get_avg_uniqueness(indM_)

print('Average Uniqueness: \n',avg_u)

prob2 = avg_u/avg_u.sum()
print('Feature draw probabilities: \n', prob2)


Average Uniqueness: 
 0    0.833333
1    0.500000
2    1.000000
dtype: float64
Feature draw probabilities: 
 0    0.357143
1    0.214286
2    0.428571
dtype: float64


## phi = [1, 2]

In [65]:
phi = [1,2]        # phi = [1] corresponds to column 1, feature 2

# Following statements are taken from the SNIPPET 4.5 - function seqBootstrap(indM,sLength=None)
avg_u = pd.Series()

for i in np.arange(indM.shape[1]): 
    indM_ = indM[:, phi+[i]] # reduce indM
    avg_u.loc[i] = get_avg_uniqueness(indM_)

print('Average Uniqueness: \n',avg_u)

prob2 = avg_u/avg_u.sum()
print('Feature draw probabilities: \n', prob2)


Average Uniqueness: 
 0    0.833333
1    0.500000
2    0.500000
dtype: float64
Feature draw probabilities: 
 0    0.454545
1    0.272727
2    0.272727
dtype: float64


# APPENDIX