In [None]:
import warnings, os
import datetime as dt
import itertools as it
from numpy import nan
import numpy as np
import pandas as pd

warnings.filterwarnings('ignore')
pd.set_option('display.width',       None)
pd.set_option('display.max_columns', None)

# Load

In [None]:
Raw = pd.read_csv('Storage/Raw.csv', parse_dates=['datetime'])
Raw.head()

In [None]:
def QUERY_SELECT(Raw, SYMBOLS, TFRAMES):
    return Raw[Raw['symbol'].isin(SYMBOLS) & Raw['tf'].isin(TFRAMES)] .reset_index(drop=1)

Query = QUERY_SELECT(Raw, SYMBOLS=['WIN$'], TFRAMES=['D1'])
Query.head()

# Data

In [None]:
def FORMAT_SOURCE(Query):
    Src = pd.DataFrame()
    Src[['A','Z']]                      = Query[['a','z']]
    Src[['Symbol','TF']]                = Query[['symbol','tf']]
    Src['Datetime']                     = Query['datetime']
    Src[['Ticks','Volume']]             = Query[['tick_volume','real_volume']]
    Src[['Open','High','Low','Close']]  = Query[['open','high','low','close']]
    Src[['Price']]                      = Query[['close']]

    Src['Change']       = (Src['Close'] - Src['Open'])
    Src['Hilo']         = (Src['High']  - Src['Low'])

    Src['Sign Bull']    = Src['Change'].apply(lambda x: +1 if (x >= 0) else nan)
    Src['Sign Bear']    = Src['Change'].apply(lambda x: -1 if (x <  0) else nan)
    return Src 

Src = FORMAT_SOURCE(Query)
Src.head()

In [None]:
def CALCULATIONS(Src):
    Calc = pd.DataFrame(Src)
    Calc['Chg Abs'] = Calc['Change'].abs()
    Calc['Chg Pos'] = Calc['Chg Abs'] * Calc['Sign Bull']
    Calc['Chg Neg'] = Calc['Chg Abs'] * Calc['Sign Bear']

    Calc['HL Pos']  = Calc['Hilo'] * Calc['Sign Bull']
    Calc['HL Neg']  = Calc['Hilo'] * Calc['Sign Bear']


    BINS_CHG = [100, 200, 500, 800, 1000, 1200, 1500, 1800, 2000, 2200, 2500, 2800, 3000, 3200, 3500, 3800, 4000, 4200, 4500, 4800, 5000, 5200, 5500]
    BINS_HL  = [1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000]

    for x in BINS_CHG:  Calc[f'Chg Abs < {x}'] = (Calc['Chg Abs'] < x)
    for x in BINS_HL:   Calc[f'Hilo < {x}']    = (Calc['Hilo']    < x)


    FUTS = [1, 2]
    for x in FUTS:  Calc[f'Chg +{x}']       = Calc['Change'].shift(-x)
    for x in FUTS:  Calc[f'Chg Abs +{x}']   = Calc['Chg Abs'].shift(-x)
    for x in FUTS:  Calc[f'Chg Pos +{x}']   = Calc['Chg Pos'].shift(-x)
    for x in FUTS:  Calc[f'Chg Neg +{x}']   = Calc['Chg Neg'].shift(-x)

    for x in FUTS:  Calc[f'Hilo +{x}']      = Calc['Hilo'].shift(-x)
    for x in FUTS:  Calc[f'HL Pos +{x}']    = Calc['HL Pos'].shift(-x)
    for x in FUTS:  Calc[f'HL Neg +{x}']    = Calc['HL Neg'].shift(-x)
    return Calc

Calc = CALCULATIONS(Src)
Calc.head()

# Stats

In [98]:
def MARKOV_CHAIN(Calc, when, _next):
    pipe = []
    for x in when: 
        Df = Calc[Calc[x]==1]

        for y in _next: 
            pipe.append({ 'Previous':x, 'Next':y, 
                'Count':          Df[y].count(), 
                'Avg':   np.round(Df[y].mean(),        0).astype(int),
                'Max':   np.round(Df[y].max(),         0).astype(int),
                'Q3':    np.round(Df[y].quantile(.75), 0).astype(int),
                'Med':   np.round(Df[y].median(),      0).astype(int),
                'Q1':    np.round(Df[y].quantile(.25), 0).astype(int),
                'Min':   np.round(Df[y].min(),         0).astype(int),
            })
        pass
    pass
    return pd.DataFrame(pipe)

In [99]:
MARKOV_CHAIN(Calc, when=Calc.loc[:, f'Chg Abs < {100}':f'Chg Abs < {5500}'].columns, _next=['Chg Abs +1'])

Unnamed: 0,Previous,Next,Count,Avg,Max,Q3,Med,Q1,Min
0,Chg Abs < 100,Chg Abs +1,61,1207,3536,1774,1030,333,74
1,Chg Abs < 200,Chg Abs +1,130,1247,5178,1691,1102,437,6
2,Chg Abs < 500,Chg Abs +1,307,1315,5678,1772,1100,550,6
3,Chg Abs < 800,Chg Abs +1,465,1331,5935,1840,1104,540,0
4,Chg Abs < 1000,Chg Abs +1,597,1308,5935,1804,1075,507,0
5,Chg Abs < 1200,Chg Abs +1,688,1286,5935,1800,1043,498,0
6,Chg Abs < 1500,Chg Abs +1,815,1311,5935,1829,1045,506,0
7,Chg Abs < 1800,Chg Abs +1,919,1301,5935,1836,1045,500,0
8,Chg Abs < 2000,Chg Abs +1,974,1304,5935,1831,1047,503,0
9,Chg Abs < 2200,Chg Abs +1,1015,1293,5935,1814,1045,496,0


In [100]:
MARKOV_CHAIN(Calc, when=Calc.loc[:, f'Chg Abs < {100}':f'Chg Abs < {5500}'].columns, _next=['Hilo +1'])

Unnamed: 0,Previous,Next,Count,Avg,Max,Q3,Med,Q1,Min
0,Chg Abs < 100,Hilo +1,61,2271,5258,2829,1989,1646,1006
1,Chg Abs < 200,Hilo +1,130,2406,7479,2874,2182,1663,1006
2,Chg Abs < 500,Hilo +1,307,2420,7479,2873,2160,1660,752
3,Chg Abs < 800,Hilo +1,465,2440,7479,2980,2215,1662,752
4,Chg Abs < 1000,Hilo +1,597,2418,7479,2902,2219,1660,655
5,Chg Abs < 1200,Hilo +1,688,2415,7479,2916,2216,1645,655
6,Chg Abs < 1500,Hilo +1,815,2447,7864,2983,2225,1661,593
7,Chg Abs < 1800,Hilo +1,919,2454,7864,2994,2240,1665,593
8,Chg Abs < 2000,Hilo +1,974,2447,7864,2988,2234,1660,593
9,Chg Abs < 2200,Hilo +1,1015,2441,7864,2977,2231,1658,593


In [101]:
MARKOV_CHAIN(Calc, when=Calc.loc[:, f'Hilo < {1000}':f'Hilo < {8000}'].columns, _next=['Chg Abs +1'])

Unnamed: 0,Previous,Next,Count,Avg,Max,Q3,Med,Q1,Min
0,Hilo < 1000,Chg Abs +1,29,1051,3536,1475,1030,333,17
1,Hilo < 1500,Chg Abs +1,200,1039,5678,1488,908,327,0
2,Hilo < 2000,Chg Abs +1,466,1138,5678,1619,964,436,0
3,Hilo < 2500,Chg Abs +1,703,1189,5935,1666,980,446,0
4,Hilo < 3000,Chg Abs +1,894,1228,5935,1689,1000,472,0
5,Hilo < 3500,Chg Abs +1,1032,1251,5935,1730,1006,476,0
6,Hilo < 4000,Chg Abs +1,1101,1258,5935,1751,1006,480,0
7,Hilo < 4500,Chg Abs +1,1154,1287,5935,1816,1036,488,0
8,Hilo < 5000,Chg Abs +1,1195,1301,7641,1818,1043,491,0
9,Hilo < 5500,Chg Abs +1,1219,1311,7641,1828,1045,500,0


In [102]:
MARKOV_CHAIN(Calc, when=Calc.loc[:, f'Hilo < {1000}':f'Hilo < {8000}'].columns, _next=['Hilo +1'])

Unnamed: 0,Previous,Next,Count,Avg,Max,Q3,Med,Q1,Min
0,Hilo < 1000,Hilo +1,29,1902,3969,2135,1833,1376,752
1,Hilo < 1500,Hilo +1,200,1929,5976,2301,1719,1379,655
2,Hilo < 2000,Hilo +1,466,2106,5976,2582,1900,1492,593
3,Hilo < 2500,Hilo +1,703,2207,7278,2664,2038,1550,593
4,Hilo < 3000,Hilo +1,894,2290,7278,2748,2109,1600,593
5,Hilo < 3500,Hilo +1,1032,2362,7731,2871,2190,1641,593
6,Hilo < 4000,Hilo +1,1101,2393,7864,2897,2211,1657,593
7,Hilo < 4500,Hilo +1,1154,2442,7864,2987,2242,1671,593
8,Hilo < 5000,Hilo +1,1195,2482,8487,3024,2259,1688,593
9,Hilo < 5500,Hilo +1,1219,2504,8487,3057,2289,1694,593


# Snippets

In [103]:
def FORMAT_SOURCE(Query):
    Src = pd.DataFrame()
    Src['Change']       = (Src['Close'] - Src['Open'])
    Src['Hilo']         = (Src['High']  - Src['Low'])
    Src['Sign Bull']    = Src['Change'].apply(lambda x: +1 if x >= 0 else nan)
    Src['Sign Bear']    = Src['Change'].apply(lambda x: -1 if x <  0 else nan)
    return Src 


def CALCULATIONS(Src):
    Calc = pd.DataFrame(Src)
    Calc['Chg Abs'] = Calc['Change'].abs()
    Calc['Chg Pos'] = Calc['Chg Abs'] * Calc['Sign Bull']
    Calc['Chg Neg'] = Calc['Chg Abs'] * Calc['Sign Bear']

    Calc['HL Pos']  = Calc['Hilo'] * Calc['Sign Bull']
    Calc['HL Neg']  = Calc['Hilo'] * Calc['Sign Bear']

    BINS_CHG = [100, 200, 500, 800, 1000, 1200, 1500, 1800, 2000, 2200, 2500, ...]
    BINS_HL  = [1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, ...] 

    for x in BINS_CHG:  Calc[f'Chg Abs < {x}'] = (Calc['Chg Abs'] < x)
    for x in BINS_HL:   Calc[f'Hilo < {x}']    = (Calc['Hilo']    < x)

    FUTS = [1, 2]
    for x in FUTS:  Calc[f'Chg +{x}']       = Calc['Change'].shift(-x)
    for x in FUTS:  Calc[f'Chg Abs +{x}']   = Calc['Chg Abs'].shift(-x)
    for x in FUTS:  Calc[f'Chg Pos +{x}']   = Calc['Chg Pos'].shift(-x)
    for x in FUTS:  Calc[f'Chg Neg +{x}']   = Calc['Chg Neg'].shift(-x)

    for x in FUTS:  Calc[f'Hilo +{x}']      = Calc['Hilo'].shift(-x)
    for x in FUTS:  Calc[f'HL Pos +{x}']    = Calc['HL Pos'].shift(-x)
    for x in FUTS:  Calc[f'HL Neg +{x}']    = Calc['HL Neg'].shift(-x)
    return Calc

In [None]:
def MARKOV_CHAIN(Calc, when, _next):
    pipe = []
    for x in when: 
        Df = Calc[Calc[x]==1]

        for y in _next: 
            pipe.append({ 'Previous':x, 'Next':y, 
                'Count':          Df[y].count(), 
                'Avg':   np.round(Df[y].mean(),        0).astype(int),
                'Max':   np.round(Df[y].max(),         0).astype(int),
                'Q3':    np.round(Df[y].quantile(.75), 0).astype(int),
                'Med':   np.round(Df[y].median(),      0).astype(int),
                'Q1':    np.round(Df[y].quantile(.25), 0).astype(int),
                'Min':   np.round(Df[y].min(),         0).astype(int),
            })
        pass
    pass
    return pd.DataFrame(pipe)


MARKOV_CHAIN(Calc, when=Calc.loc[:,    f'Hilo < {1000}':   f'Hilo < {8000}'].columns, _next=['Hilo +1'])
MARKOV_CHAIN(Calc, when=Calc.loc[:,    f'Hilo < {1000}':   f'Hilo < {8000}'].columns, _next=['Chg Abs +1'])
MARKOV_CHAIN(Calc, when=Calc.loc[:, f'Chg Abs < {100}' :f'Chg Abs < {5500}'].columns, _next=['Hilo +1'])
MARKOV_CHAIN(Calc, when=Calc.loc[:, f'Chg Abs < {100}' :f'Chg Abs < {5500}'].columns, _next=['Chg Abs +1'])