In [1]:
# This allow jupiter to upload in real time externally modified code
%load_ext autoreload
%autoreload 2 

import sys
sys.path.append("..")
import os

import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.float_format = '{:.5f}'.format




import numpy as np
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from x_CLASSES.download_data import DownloadData
from tabulate import tabulate

# Classify price movements based on candlesticks statistics

This reseach comes from [this](https://www.forexfactory.com/thread/post/14707863#post14707863) post on ForexFactory. 

### Step 1: Calculations

#### Step 1.1: Higher price frame (1k ticks)

In [2]:
class PriceFrame():

    def __init__(self, start_date, end_date, timeframe, price_frame, csv_file_path):    
        self.timeframe = timeframe
        self.price_frame = price_frame
        self.csv_file_path = csv_file_path

        self.start_date = start_date
        self.end_date = end_date

        if self.start_date > self.end_date:
            raise SystemExit(f"Start date can't be after end date.")
        
        self.dataframe, self.avg_oc, self.avg_wk = self.getDataframe()

    def getDataframe(self):
        df = pd.DataFrame()

        if os.path.exists(self.csv_file_path):
            df = pd.read_csv(self.csv_file_path)
        else:
            df = DownloadData('GBP/USD', self.start_date,self.end_date,self.timeframe).getData()
            
            data_folder_path = f"{os.getcwd()}/x_DATA"
            if not os.path.exists(data_folder_path):
                os.makedirs(data_folder_path)

            df.to_csv(self.csv_file_path)

        if "Ask" in df.columns:
            df = df.drop("Ask", axis=1)

        df["Close"] = df["Bid"].copy()
        df["Open"] = df["Bid"].shift(self.price_frame)  # Shift the "Bid" values 1000 rows back
        df["High"] = df["Bid"].rolling(window=self.price_frame).max()  # Calculate the rolling max over the last 1000 rows
        df["Low"] = df["Bid"].rolling(window=self.price_frame).min()  # Calculate the rolling min over the last 1000 rows

        # Calculate the index of the maximum value in the rolling window for "High time"
        df['High Time'] =  self.price_frame - ( df.index.values - df['Bid'].rolling(window=self.price_frame).agg(lambda x: x.index.values[np.argmax(x.values)]) ) 
        df["Low Time"] = self.price_frame  - ( df.index.values - df['Bid'].rolling(window=self.price_frame).agg(lambda x: x.index.values[np.argmin(x.values)]) )

        df['High First'] = df['High Time'] < df["Low Time"]

        df = df.dropna() #Drop initial NaN values

        # DIRECTIONAL BIAS
        df['Open - Close'] = (df['Open'] - df['Close']).abs()
        df['Upper Wick'] = (df['High'] - df[['Open', 'Close']].max(axis=1))
        df['Lower Wick'] = (df[['Open', 'Close']].min(axis=1) - df['Low'])

        df['Bias'] = np.where(df['Open'] > df['Close'], 'Bearish', np.where(df['Close'] > df['Open'], 'Bullish', 'Doji'))
        
        # Filter rows where Bias is "Bullish"
        bullish_rows = df[df['Bias'] == 'Bullish']
        bearish_rows = df[df['Bias'] == 'Bearish']

        # average directional OC
        average_bullish_oc = bullish_rows['Open - Close'].mean()
        average_bearish_oc = bearish_rows['Open - Close'].mean()
        avg_oc = ( average_bullish_oc + average_bearish_oc ) / 2
        
        # average bullish and bearish candle Wick (relative to close)
        average_bullish_upper = bullish_rows['Upper Wick'].mean()
        average_bearish_lower = bearish_rows['Lower Wick'].mean()
        avg_wk = avg_oc + ( average_bullish_upper + average_bearish_lower ) / 2
        print("\033[91mRemember that if you call average wick from PriceFrame object you'll get avg.OC + avg.WK\033[0m")

        # assign strenght bias
        df['Strength'] = np.where(df['Open - Close'] > avg_wk, 'Strong',
                                np.where((df['Open - Close'] > avg_oc) & (df['Open - Close'] <= avg_wk), 'Medium', 'Weak'))
        return df, avg_oc, avg_wk
    
    def printCategories(self):
    
        categories = ['Weak','Medium','Strong']

        thresholds_list = {
            'Thresholds' : categories,
            'Value' : [f'0 - {round(self.avg_oc, 5)}',f'{round(self.avg_oc, 5)} - {round(self.avg_wk, 5)}', f'>{round(self.avg_wk, 5)}']
        }

        print_ = f'<p style="font-size: 14px;">Categories {self.price_frame}ticks price-frame:</p>'
        display(HTML(print_))
        display(HTML(tabulate(thresholds_list, headers='keys', tablefmt='html', showindex=False, floatfmt=".5f", numalign="center", stralign="center")))

In [3]:
start_date = "01-12-2023"
end_date = "05-01-2024"
timeframe = 'tick'
price_frame = 1000

csv_file_path = f"{os.path.dirname(os.getcwd())}/x_DATA/{start_date}_{end_date}  {timeframe}.csv"

master_dimension = PriceFrame(start_date,end_date,timeframe,price_frame,csv_file_path) # Master dimension = highest one (1000 ticks)

df1k = master_dimension.dataframe
display(df1k)
master_dimension.printCategories()

[91mRemember that if you call average wick from PriceFrame object you'll get avg.OC + avg.WK[0m


Unnamed: 0,Timestamp,Bid,Close,Open,High,Low,High Time,Low Time,High First,Open - Close,Upper Wick,Lower Wick,Bias,Strength
1000,2023-12-01 00:21:58.140000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,820.00000,14.00000,False,0.00073,0.00019,0.00007,Bullish,Strong
1001,2023-12-01 00:22:00.557000+00:00,1.26377,1.26377,1.26306,1.26397,1.26298,819.00000,13.00000,False,0.00071,0.00020,0.00008,Bullish,Strong
1002,2023-12-01 00:22:00.912000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,818.00000,12.00000,False,0.00073,0.00019,0.00007,Bullish,Strong
1003,2023-12-01 00:22:01.166000+00:00,1.26378,1.26378,1.26306,1.26397,1.26298,817.00000,11.00000,False,0.00072,0.00019,0.00008,Bullish,Strong
1004,2023-12-01 00:22:01.317000+00:00,1.26378,1.26378,1.26304,1.26397,1.26298,816.00000,10.00000,False,0.00074,0.00019,0.00006,Bullish,Strong
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26810,1.26834,1.26856,1.26795,255.00000,119.00000,False,0.00024,0.00022,0.00015,Bearish,Weak
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26808,1.26833,1.26856,1.26795,254.00000,118.00000,False,0.00025,0.00023,0.00013,Bearish,Weak
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26808,1.26835,1.26856,1.26795,253.00000,117.00000,False,0.00027,0.00021,0.00013,Bearish,Weak
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26805,1.26840,1.26856,1.26795,252.00000,116.00000,False,0.00035,0.00016,0.00010,Bearish,Weak


Thresholds,Value
Weak,0 - 0.00046
Medium,0.00046 - 0.00067
Strong,>0.00067


#### Step 1.2: smaller price frame (.25*1k ticks)

In [4]:
price_frame25 = int(1000 * 0.25)

wick_dimension = PriceFrame(start_date,end_date,timeframe,price_frame25,csv_file_path) # Wick dimension = intermediate (1000 * 0.25 ticks)

df25 = wick_dimension.dataframe
display(df25)
wick_dimension.printCategories()

[91mRemember that if you call average wick from PriceFrame object you'll get avg.OC + avg.WK[0m


Unnamed: 0,Timestamp,Bid,Close,Open,High,Low,High Time,Low Time,High First,Open - Close,Upper Wick,Lower Wick,Bias,Strength
250,2023-12-01 00:06:10.974000+00:00,1.26347,1.26347,1.26305,1.26348,1.26298,238.00000,14.00000,False,0.00042,0.00001,0.00007,Bullish,Strong
251,2023-12-01 00:06:11.280000+00:00,1.26347,1.26347,1.26306,1.26348,1.26298,237.00000,13.00000,False,0.00041,0.00001,0.00008,Bullish,Strong
252,2023-12-01 00:06:11.383000+00:00,1.26345,1.26345,1.26305,1.26348,1.26298,236.00000,12.00000,False,0.00040,0.00003,0.00007,Bullish,Strong
253,2023-12-01 00:06:11.537000+00:00,1.26346,1.26346,1.26306,1.26348,1.26298,235.00000,11.00000,False,0.00040,0.00002,0.00008,Bullish,Strong
254,2023-12-01 00:06:11.992000+00:00,1.26345,1.26345,1.26304,1.26348,1.26298,234.00000,10.00000,False,0.00041,0.00003,0.00006,Bullish,Strong
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26810,1.26815,1.26834,1.26801,207.00000,24.00000,False,0.00005,0.00019,0.00009,Bearish,Weak
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26808,1.26815,1.26834,1.26801,206.00000,23.00000,False,0.00007,0.00019,0.00007,Bearish,Weak
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26808,1.26815,1.26834,1.26801,205.00000,22.00000,False,0.00007,0.00019,0.00007,Bearish,Weak
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26805,1.26816,1.26834,1.26801,204.00000,21.00000,False,0.00011,0.00018,0.00004,Bearish,Weak


Thresholds,Value
Weak,0 - 0.00023
Medium,0.00023 - 0.00033
Strong,>0.00033


### Step 3: Statistics

In [5]:
def getStatistics(df,price_frame):
    #pref_ = ""#'<div>'
    #suf_ =  ""#'</div>'
    html = ""
    
    # Possible values in each column
    unique_biases = ['Bullish','Bearish','Doji'] 
    unique_strengthes = ['All','Weak','Medium','Strong']
    unique_high_first_values = ['All',True,False]

    #pd.options.display.float_format = '{:.5f}'.format
    biases_data = [
        #("sample size"),
        ("percent"),
        ("average body"),
        ("average upper wick"),
        ("average lower wick"),
        ("average upper wick time"),
        ("average upper wick time percentage"),
        ("average lower wick time"),
        ("average lower wick time percentage"),
    ]

    html+= f'<p style="font-size: 24px;">{price_frame} tick dimension</p>'

    for bias in unique_biases:
        html+= "<hr>"
        html+= "<hr>"
        html+= f'<p style="color:{"green" if bias == "Bullish" else "red" if bias == "Bearish" else "orange"}; font-size: 24px;">{bias}</p>'
        #display(HTML(print_))
        
        biases_df = pd.DataFrame(biases_data, columns=[f'{bias}'])

        for high_first in unique_high_first_values:
            html+= "<hr>"
            info_high = 'Irrespective of High being before or after Low' if high_first == 'All' else 'High before Low' if high_first == True else 'High after Low'
            html+=  f'<p style="color:{"green" if bias == "Bullish" else "red" if bias == "Bearish" else "orange"}; font-size: 16px;">{info_high}</p>' 
            #display(HTML(print_))

            for strength in unique_strengthes:
            
                if strength == 'All' and high_first == 'All':
                    subset = df[df['Bias'] == bias]

                elif strength == 'All' and high_first != 'All':
                    subset = df[(df['Bias'] == bias) & (df['High First'] == high_first)]

                elif strength != 'All' and high_first == 'All':
                    subset = df[(df['Bias'] == bias) & (df['Strength'] == strength)]

                elif strength != 'All' and high_first != 'All':
                    subset = df[(df['Bias'] == bias) & (df['High First'] == high_first) & (df['Strength'] == strength)]
                
                sample = ( subset.size / df.size ) *100 # This is the % of occurrences

                # Average OC
                mean_oc = subset['Open - Close'].mean()
                # Average wick
                mean_uw = subset['Upper Wick'].mean()
                mean_lw = subset['Lower Wick'].mean()
                
                mean_uwt = subset['High Time'].mean()
                mean_lwt = subset['Low Time'].mean()

                mean_uwt_pct = ( subset['High Time'].mean() / price_frame ) *100
                mean_lwt_pct = ( subset['Low Time'].mean() / price_frame ) *100 
                
                biases_df[strength] = [
                    f'{int(sample)}%' if not np.isnan(sample) else 0,
                    round(mean_oc, 5) if not np.isnan(mean_oc) else 0,
                    round(mean_uw, 5) if not np.isnan(mean_uw) else 0,
                    round(mean_lw, 5) if not np.isnan(mean_lw) else 0,
                    int(mean_uwt) if not np.isnan(mean_uwt) else 0,
                    f'{int(mean_uwt_pct)}%' if not np.isnan(mean_uwt_pct) else 0,
                    int(mean_lwt) if not np.isnan(mean_lwt) else 0,
                    f'{int(mean_lwt_pct)}%' if not np.isnan(mean_lwt_pct) else 0
                ]
        
            html+=  tabulate(biases_df, headers='keys', tablefmt='html', floatfmt=".6f",showindex=False, numalign="center", stralign="right")
    
    return html


# Two sets of HTML outputs
first_html = f'<div style="margin-right:20px;">{getStatistics(df1k,master_dimension.price_frame) }</div>'  # Replace '...' with your first HTML
second_html = f'<div style="margin-right:20px;">{getStatistics(df25,wick_dimension.price_frame)}</div>'  # Replace '...' with your second HTML

# Concatenate the HTML strings and add the clear:both style
all_html = f'<div style="float:left;">{first_html}</div><div style="float:left;">{second_html}</div><div style="clear:both;"></div>'

# Display the concatenated HTML strings
display(HTML(all_html))

Bullish,All,Weak,Medium,Strong
percent,49%,29%,8%,11%
average body,0.00046,0.00021,0.00056,0.00105
average upper wick,0.00021,0.00024,0.00018,0.00016
average lower wick,0.00021,0.00024,0.00018,0.00016
average upper wick time,729,651,810,878
average upper wick time percentage,72%,65%,81%,87%
average lower wick time,242,310,165,119
average lower wick time percentage,24%,31%,16%,11%

Bullish,All,Weak,Medium,Strong
percent,5%,5%,0%,0%
average body,0.00014,0.00013,0.00052,0.00117
average upper wick,0.00027,0.00027,0.00026,0.0003
average lower wick,0.00027,0.00027,0.00019,0.0004
average upper wick time,297,296,318,461
average upper wick time percentage,29%,29%,31%,46%
average lower wick time,677,677,673,622
average lower wick time percentage,67%,67%,67%,62%

Bullish,All,Weak,Medium,Strong
percent,43%,24%,8%,11%
average body,0.0005,0.00023,0.00056,0.00105
average upper wick,0.00021,0.00024,0.00018,0.00016
average lower wick,0.0002,0.00023,0.00018,0.00016
average upper wick time,785,732,816,879
average upper wick time percentage,78%,73%,81%,87%
average lower wick time,185,225,159,117
average lower wick time percentage,18%,22%,15%,11%

Bearish,All,Weak,Medium,Strong
percent,49%,29%,8%,11%
average body,0.00046,0.00021,0.00056,0.00104
average upper wick,0.00021,0.00024,0.00018,0.00015
average lower wick,0.00022,0.00025,0.0002,0.00016
average upper wick time,245,315,172,115
average upper wick time percentage,24%,31%,17%,11%
average lower wick time,729,652,805,876
average lower wick time percentage,72%,65%,80%,87%

Bearish,All,Weak,Medium,Strong
percent,43%,24%,8%,11%
average body,0.0005,0.00023,0.00056,0.00104
average upper wick,0.0002,0.00023,0.00018,0.00015
average lower wick,0.00021,0.00024,0.0002,0.00016
average upper wick time,185,226,165,114
average upper wick time percentage,18%,22%,16%,11%
average lower wick time,788,738,811,877
average lower wick time percentage,78%,73%,81%,87%

Bearish,All,Weak,Medium,Strong
percent,6%,5%,0%,0%
average body,0.00014,0.00013,0.00053,0.00078
average upper wick,0.00027,0.00027,0.00021,0.00038
average lower wick,0.00028,0.00028,0.0002,0.00021
average upper wick time,679,679,684,647
average upper wick time percentage,67%,67%,68%,64%
average lower wick time,299,298,353,395
average lower wick time percentage,29%,29%,35%,39%

Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.00031,0.00031,0,0
average lower wick,0.00031,0.00031,0,0
average upper wick time,476,476,0,0
average upper wick time percentage,47%,47%,0,0
average lower wick time,479,479,0,0
average lower wick time percentage,47%,47%,0,0

Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.0003,0.0003,0,0
average lower wick,0.00031,0.00031,0,0
average upper wick time,265,265,0,0
average upper wick time percentage,26%,26%,0,0
average lower wick time,680,680,0,0
average lower wick time percentage,68%,68%,0,0

Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.00031,0.00031,0,0
average lower wick,0.00031,0.00031,0,0
average upper wick time,686,686,0,0
average upper wick time percentage,68%,68%,0,0
average lower wick time,279,279,0,0
average lower wick time percentage,27%,27%,0,0

Bullish,All,Weak,Medium,Strong
percent,49%,30%,7%,11%
average body,0.00023,0.00011,0.00028,0.00052
average upper wick,0.0001,0.00011,8e-05,7e-05
average lower wick,9e-05,0.00011,8e-05,7e-05
average upper wick time,181,162,200,216
average upper wick time percentage,72%,65%,80%,86%
average lower wick time,57,72,41,27
average lower wick time percentage,22%,29%,16%,11%

Bullish,All,Weak,Medium,Strong
percent,5%,5%,0%,0%
average body,7e-05,6e-05,0.00027,0.00044
average upper wick,0.00012,0.00012,0.00012,0.00033
average lower wick,0.00013,0.00013,0.0001,0.00023
average upper wick time,69,69,76,66
average upper wick time percentage,27%,27%,30%,26%
average lower wick time,166,166,164,146
average lower wick time percentage,66%,66%,65%,58%

Bullish,All,Weak,Medium,Strong
percent,43%,24%,7%,11%
average body,0.00025,0.00012,0.00028,0.00052
average upper wick,9e-05,0.00011,8e-05,7e-05
average lower wick,9e-05,0.0001,8e-05,7e-05
average upper wick time,194,182,202,216
average upper wick time percentage,77%,72%,80%,86%
average lower wick time,44,53,39,27
average lower wick time percentage,17%,21%,15%,11%

Bearish,All,Weak,Medium,Strong
percent,49%,30%,7%,11%
average body,0.00023,0.00011,0.00028,0.00052
average upper wick,9e-05,0.00011,8e-05,7e-05
average lower wick,0.0001,0.00011,9e-05,8e-05
average upper wick time,57,72,40,27
average upper wick time percentage,22%,29%,16%,11%
average lower wick time,180,162,200,216
average lower wick time percentage,72%,64%,80%,86%

Bearish,All,Weak,Medium,Strong
percent,43%,25%,7%,11%
average body,0.00025,0.00012,0.00028,0.00052
average upper wick,9e-05,0.0001,8e-05,7e-05
average lower wick,0.0001,0.00011,8e-05,8e-05
average upper wick time,44,53,38,27
average upper wick time percentage,17%,21%,15%,10%
average lower wick time,194,181,201,216
average lower wick time percentage,77%,72%,80%,86%

Bearish,All,Weak,Medium,Strong
percent,5%,5%,0%,0%
average body,7e-05,6e-05,0.00027,0.00042
average upper wick,0.00013,0.00013,0.0001,0.00027
average lower wick,0.00012,0.00012,0.00012,0.00029
average upper wick time,164,164,165,142
average upper wick time percentage,65%,65%,66%,56%
average lower wick time,69,68,77,83
average lower wick time percentage,27%,27%,31%,33%

Doji,All,Weak,Medium,Strong
percent,1%,1%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.00014,0.00014,0,0
average lower wick,0.00014,0.00014,0,0
average upper wick time,114,114,0,0
average upper wick time percentage,45%,45%,0,0
average lower wick time,114,114,0,0
average lower wick time percentage,45%,45%,0,0

Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.00014,0.00014,0,0
average lower wick,0.00014,0.00014,0,0
average upper wick time,63,63,0,0
average upper wick time percentage,25%,25%,0,0
average lower wick time,166,166,0,0
average lower wick time percentage,66%,66%,0,0

Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.00014,0.00014,0,0
average lower wick,0.00014,0.00014,0,0
average upper wick time,165,165,0,0
average upper wick time percentage,66%,66%,0,0
average lower wick time,62,62,0,0
average lower wick time percentage,24%,24%,0,0
