In [None]:
import warnings, os
import datetime as dt
import itertools as it
from numpy import nan
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')
pd.set_option('display.width',       None)
pd.set_option('display.max_columns', None)

# Load

In [None]:
Raw = pd.read_csv('Storage/Raw.csv', parse_dates=['datetime'])
Raw.head()

In [None]:
def QUERY_SELECT(Raw, SYMBOLS, TFRAMES):
    return Raw[Raw['symbol'].isin(SYMBOLS) & Raw['tf'].isin(TFRAMES)] .reset_index(drop=1)

Query = QUERY_SELECT(Raw, SYMBOLS=['WIN$'], TFRAMES=['M5','M15','H1','H3','D1','W1'])
Query.head()

In [None]:
def FORMAT_SOURCE(Raw):
    Src = pd.DataFrame()
    Src['A']        = Raw['a']
    Src['Z']        = Raw['z']
    Src['Symbol']   = Raw['symbol']
    Src['TF']       = Raw['tf']
    
    Src['Datetime'] = Raw['datetime']
    Src['Date']     = Raw['datetime'].dt.date
    Src['Time']     = Raw['datetime'].dt.time
    
    Src['Ticks']    = Raw['tick_volume']
    Src['Volume']   = Raw['real_volume']
    Src['Price']    = Raw['close']
    
    Src['Open']     = Raw['open']
    Src['High']     = Raw['high']
    Src['Low']      = Raw['low']
    Src['Close']    = Raw['close']

    Src['Change']   = Src['Close'] - Src['Open']
    Src['HL']       = Src['High']  - Src['Low']
    return Src 

Src = FORMAT_SOURCE(Raw=Query)
Src.head()

In [None]:
def CALCULATIONS(Src):
    Calc = pd.DataFrame(Src)
    Calc['Change Abs'] = Src['Change'].abs()
    Calc['Change Pos'] = Src['Change'].apply(lambda x: x if (x >= 0) else nan)
    Calc['Change Neg'] = Src['Change'].apply(lambda x: x if (x <  0) else nan)
    return Calc

Calc = CALCULATIONS(Src)
Calc.head()

# Stats

In [29]:
Calc.groupby(['Symbol','TF'], sort=0)[['Change']].describe().round(0).astype(int)

Unnamed: 0_level_0,Unnamed: 1_level_0,Change,Change,Change,Change,Change,Change,Change,Change
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Symbol,TF,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
WIN$,M5,99999,0,142,-2670,-67,0,68,3070
WIN$,M15,46083,0,273,-2962,-129,0,128,5312
WIN$,H1,11857,-2,540,-4266,-261,5,269,5561
WIN$,D1,1245,-19,1709,-7641,-1083,65,1020,5768


In [30]:
Calc.groupby(['Symbol','TF'], sort=0)[['Change Abs']].describe().round(0).astype(int)

Unnamed: 0_level_0,Unnamed: 1_level_0,Change Abs,Change Abs,Change Abs,Change Abs,Change Abs,Change Abs,Change Abs,Change Abs
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Symbol,TF,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
WIN$,M5,99999,98,103,0,30,68,130,3070
WIN$,M15,46083,188,197,0,58,129,252,5312
WIN$,H1,11857,379,386,0,114,265,508,5561
WIN$,D1,1245,1326,1079,0,505,1049,1843,7641


In [31]:
Calc.groupby(['Symbol','TF'], sort=0)[['Change Pos']].describe().round(0).astype(int)

Unnamed: 0_level_0,Unnamed: 1_level_0,Change Pos,Change Pos,Change Pos,Change Pos,Change Pos,Change Pos,Change Pos,Change Pos
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Symbol,TF,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
WIN$,M5,51535,95,104,0,28,65,126,3070
WIN$,M15,23425,185,199,0,54,126,248,5312
WIN$,H1,6020,371,382,0,110,264,499,5561
WIN$,D1,645,1261,1022,0,453,983,1840,5768


In [32]:
Calc.groupby(['Symbol','TF'], sort=0)[['Change Neg']].describe().round(0).astype(int)

Unnamed: 0_level_0,Unnamed: 1_level_0,Change Neg,Change Neg,Change Neg,Change Neg,Change Neg,Change Neg,Change Neg,Change Neg
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Symbol,TF,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
WIN$,M5,48464,-101,103,-2670,-134,-70,-33,-5
WIN$,M15,22658,-192,196,-2962,-255,-132,-60,-5
WIN$,H1,5837,-387,389,-4266,-518,-266,-118,-5
WIN$,D1,600,-1396,1133,-7641,-1854,-1115,-576,-6


In [33]:
Calc.groupby(['Symbol','TF'], sort=0)[['HL']].describe().round(0).astype(int)

Unnamed: 0_level_0,Unnamed: 1_level_0,HL,HL,HL,HL,HL,HL,HL,HL
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Symbol,TF,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
WIN$,M5,99999,202,131,0,115,169,252,3565
WIN$,M15,46083,392,251,0,221,331,494,6275
WIN$,H1,11857,773,482,75,439,660,989,7753
WIN$,D1,1245,2547,1155,593,1708,2325,3123,8487


In [None]:
raise Exception('STOP')

# Chart

In [None]:
for SYMBOL, Asset in Src.groupby('Symbol', sort=0):

    plt.title(f'{SYMBOL} - Candle Size Distribuition', fontweight='bold')

    for TF, Data in Asset.groupby('TF', sort=0): 
        # sns.kdeplot(Data['Change'], label=TF)
        # sns.histplot(Data['Change'], label=TF, stat='count', bins=100, element='step', fill=False)
        plt.hist(Data['Change'], bins=100, label=TF, histtype='step')


    plt.ylabel('Frequency')
    plt.xlabel('Candle Size')

    plt.yscale('log', base=10)
    plt.gca().yaxis.set_major_formatter(mpl.ticker.ScalarFormatter())

    plt.legend()
    plt.tight_layout()
    plt.show()