In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import yfinance as yf

from mlfinlab.labeling.matrix_flags import MatrixFlagLabels

In [2]:
msft = yf.Ticker("MSFT")
hist = msft.history(start='2020-1-1', end='2020-2-20')
data1 = hist['Close']
data1.head()

Date
2020-01-02    159.74
2020-01-03    157.75
2020-01-06    158.16
2020-01-07    156.71
2020-01-08    159.21
Name: Close, dtype: float64

In [3]:
data1.shape

(33,)

The papers use a 60 or 120 day window. I use a 30 day window here for easier readability. The default template it the one in the paper, though user may define their own template with the set_template() method. 

In [4]:
# How a user would use it.
Flags = MatrixFlagLabels(data1, window=30)
labels = Flags.apply_labeling_matrix()
labels

Date
2020-02-14   -1.666667
2020-02-18   -1.166667
2020-02-19   -1.833333
dtype: float64

Now let's look at this step by step.

In [5]:
Flags = MatrixFlagLabels(data1, window=30)

# Computes the weights matrix for 1 day, using the current day and the preceding window number of days. 
matrix_day30 = Flags._transform_data(row_num=30)
matrix_day31 = Flags._transform_data(row_num=31)
print(matrix_day30)
print('\n')
print(matrix_day31)

# You can see that each column sums to 3, as there are 3 days in each tenth of 30 days.

          0         1         2         3         4         5    6    7  \
0  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.0  0.0   
1  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.0  0.0   
2  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.0  1.0   
3  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  1.0  0.0   
4  0.000000  0.000000  0.000000  0.333333  0.666667  0.000000  0.0  0.0   
5  0.000000  0.000000  0.000000  0.333333  0.333333  0.333333  0.0  0.0   
6  0.000000  0.000000  0.333333  0.333333  0.000000  0.333333  0.0  0.0   
7  0.000000  0.333333  0.333333  0.000000  0.000000  0.333333  0.0  0.0   
8  0.333333  0.333333  0.333333  0.000000  0.000000  0.000000  0.0  0.0   
9  0.666667  0.333333  0.000000  0.000000  0.000000  0.000000  0.0  0.0   

          8         9  
0  0.333333  0.666667  
1  0.666667  0.333333  
2  0.000000  0.000000  
3  0.000000  0.000000  
4  0.000000  0.000000  
5  0.000000  0.000000  
6  0.0

In [6]:
# Multiply element wise the matrix with the template. Then sum the columns. Then sum the sum of columns to finally get 
# the overall weight for the day. More positive means better match with the bull flag template.

Flags._apply_template_to_matrix(matrix_day30, Flags.template)

-1.666666666666667

Finally, the apply_labeling_matrix method, which is what the user would actually use, combines the 2 above steps and iterates
down the entire series, starting from the first possible point, the self.window-th index.

In [7]:
test = MatrixFlagLabels(data1[0:20], window=77)
test.apply_labeling_matrix()

AssertionError: Window cannot be greater than length of data.