# Classify price movements based on candlesticks statistics

This reseach comes from [this](https://www.forexfactory.com/thread/post/14707863#post14707863) post on ForexFactory. 

### Step 1: Calculations

In [89]:
# This allow jupiter to upload in real time externally modified code
%load_ext autoreload
%autoreload 2 


import sys
sys.path.append("..")
import os
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
from IPython.display import display, HTML
from x_CLASSES.download_data import DownloadData
pd.options.display.float_format = '{:.5f}'.format


start_date = "01-12-2023"
end_date = "05-01-2024"
timeframe = 'tick'
price_frame = 1000

csv_file_path = f"{os.path.dirname(os.getcwd())}/x_DATA/{start_date}_{end_date}  {timeframe}.csv"

df = pd.DataFrame()


if os.path.exists(csv_file_path):
    df = pd.read_csv(csv_file_path)
else:
    df = DownloadData('GBP/USD', start_date,end_date,timeframe).getData()
    
    data_folder_path = f"{os.getcwd()}/x_DATA"
    if not os.path.exists(data_folder_path):
        os.makedirs(data_folder_path)

    df.to_csv(csv_file_path)

if "Ask" in df.columns:
    df = df.drop("Ask", axis=1)

df["Close"] = df["Bid"].copy()
df["Open"] = df["Bid"].shift(price_frame)  # Shift the "Bid" values 1000 rows back
df["High"] = df["Bid"].rolling(window=price_frame).max()  # Calculate the rolling max over the last 1000 rows
df["Low"] = df["Bid"].rolling(window=price_frame).min()  # Calculate the rolling min over the last 1000 rows

# Calculate the index of the maximum value in the rolling window for "High time"
df['High Time'] =  price_frame - ( df.index.values - df['Bid'].rolling(window=price_frame).agg(lambda x: x.index.values[np.argmax(x.values)]) ) 
df["Low Time"] = price_frame  - ( df.index.values - df['Bid'].rolling(window=price_frame).agg(lambda x: x.index.values[np.argmin(x.values)]) )

df['High First'] = df['High Time'] < df["Low Time"]

df = df.dropna() #Drop initial NaN values

# DIRECTIONAL BIAS
df['Open - Close'] = (df['Open'] - df['Close']).abs()
df['Upper Wick'] = (df['High'] - df[['Open', 'Close']].max(axis=1))
df['Lower Wick'] = (df[['Open', 'Close']].min(axis=1) - df['Low'])

df['Bias'] = np.where(df['Open'] > df['Close'], 'Bearish', np.where(df['Close'] > df['Open'], 'Bullish', 'Doji'))

# STRENGHT
# Filter rows where Bias is "Bullish"
bullish_rows = df[df['Bias'] == 'Bullish']
bearish_rows = df[df['Bias'] == 'Bearish']

# average directional OC
average_bullish_oc = bullish_rows['Open - Close'].mean()
average_bearish_oc = bearish_rows['Open - Close'].mean()
avg_oc = ( average_bullish_oc + average_bearish_oc ) / 2

# average bullish and bearish candle Wick (relative to close)
average_bullish_upper = bullish_rows['Upper Wick'].mean()
average_bearish_lower = bearish_rows['Lower Wick'].mean()
avg_wk = avg_oc + ( average_bullish_upper + average_bearish_lower ) / 2

# assign strenght bias
df['Strength'] = np.where(df['Open - Close'] > avg_wk, 'Strong',
                          np.where((df['Open - Close'] > avg_oc) & (df['Open - Close'] <= avg_wk), 'Medium', 'Weak'))


df

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Unnamed: 0,Timestamp,Bid,Close,Open,High,Low,High Time,Low Time,High First,Open - Close,Upper Wick,Lower Wick,Bias,Strength
1000,2023-12-01 00:21:58.140000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,820.00000,14.00000,False,0.00073,0.00019,0.00007,Bullish,Strong
1001,2023-12-01 00:22:00.557000+00:00,1.26377,1.26377,1.26306,1.26397,1.26298,819.00000,13.00000,False,0.00071,0.00020,0.00008,Bullish,Strong
1002,2023-12-01 00:22:00.912000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,818.00000,12.00000,False,0.00073,0.00019,0.00007,Bullish,Strong
1003,2023-12-01 00:22:01.166000+00:00,1.26378,1.26378,1.26306,1.26397,1.26298,817.00000,11.00000,False,0.00072,0.00019,0.00008,Bullish,Strong
1004,2023-12-01 00:22:01.317000+00:00,1.26378,1.26378,1.26304,1.26397,1.26298,816.00000,10.00000,False,0.00074,0.00019,0.00006,Bullish,Strong
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26810,1.26834,1.26856,1.26795,255.00000,119.00000,False,0.00024,0.00022,0.00015,Bearish,Weak
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26808,1.26833,1.26856,1.26795,254.00000,118.00000,False,0.00025,0.00023,0.00013,Bearish,Weak
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26808,1.26835,1.26856,1.26795,253.00000,117.00000,False,0.00027,0.00021,0.00013,Bearish,Weak
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26805,1.26840,1.26856,1.26795,252.00000,116.00000,False,0.00035,0.00016,0.00010,Bearish,Weak


### Step 2: Categories

In [90]:
unique_strengthes = ['Weak','Medium','Strong']

thresholds_list = {
    'Thresholds' : thresholds ,
    'Value' : [f'0 - {round(avg_oc, 5)}',f'{round(avg_oc, 5)} - {round(avg_wk, 5)}', f'>{round(avg_wk, 5)}']
}

print_ = f'<p style="font-size: 18px;">CATEGORIES:</p>'
display(HTML(print_))
display(HTML(tabulate(thresholds_list, headers='keys', tablefmt='html', showindex=False, floatfmt=".5f", numalign="center", stralign="center")))

Thresholds,Value
Weak,0 - 0.00046
Medium,0.00046 - 0.00067
Strong,>0.00067


### Step 3: Statistics

In [123]:
# Possible values in each column
unique_biases = ['Bullish','Bearish','Doji'] 
unique_strengthes = ['All','Weak','Medium','Strong']
unique_high_first_values = ['All',True,False]


# Create a list to store the results
result_data = []

#pd.options.display.float_format = '{:.5f}'.format

biases_data = [
    #("sample size"),
    ("percent"),
    ("average body"),
    ("average upper wick"),
    ("average lower wick"),
    ("average upper wick time"),
    ("average upper wick time percentage"),
    ("average lower wick time"),
    ("average lower wick time percentage"),
]


for bias in unique_biases:
    display(HTML("<hr>"))
    display(HTML("<hr>"))
    print_ = f'<p style="color:{"green" if bias == "Bullish" else "red" if bias == "Bearish" else "orange"}; font-size: 24px;">{bias}</p>'
    display(HTML(print_))
    
    biases_df = pd.DataFrame(biases_data, columns=[f'{bias}'])

    for high_first in unique_high_first_values:
        display(HTML("<hr>"))
        info_high = 'Irrespective of High being before or after Low' if high_first == 'All' else 'High before Low' if high_first == True else 'High after Low'
        print_ = f'<p style="color:{"green" if bias == "Bullish" else "red" if bias == "Bearish" else "orange"}; font-size: 16px;">{info_high}</p>'
        display(HTML(print_))

        for strength in unique_strengthes:
        
            if strength == 'All' and high_first == 'All':
                subset = df[df['Bias'] == bias]

            elif strength == 'All' and high_first != 'All':
                subset = df[(df['Bias'] == bias) & (df['High First'] == high_first)]

            elif strength != 'All' and high_first == 'All':
                subset = df[(df['Bias'] == bias) & (df['Strength'] == strength)]

            elif strength != 'All' and high_first != 'All':
                subset = df[(df['Bias'] == bias) & (df['High First'] == high_first) & (df['Strength'] == strength)]
            
            sample = ( subset.size / df.size ) *100 # This is the % of occurrences

            # Average OC
            mean_oc = subset['Open - Close'].mean()
            # Average wick
            mean_uw = subset['Upper Wick'].mean()
            mean_lw = subset['Lower Wick'].mean()
            
            mean_uwt = subset['High Time'].mean()
            mean_lwt = subset['Low Time'].mean()

            mean_uwt_pct = ( subset['High Time'].mean() / price_frame ) *100
            mean_lwt_pct = ( subset['Low Time'].mean() / price_frame ) *100 
            
            biases_df[strength] = [
                f'{int(sample)}%' if not np.isnan(sample) else 0,
                round(mean_oc, 5) if not np.isnan(mean_oc) else 0,
                round(mean_uw, 5) if not np.isnan(mean_uw) else 0,
                round(mean_lw, 5) if not np.isnan(mean_lw) else 0,
                int(mean_uwt) if not np.isnan(mean_uwt) else 0,
                f'{int(mean_uwt_pct)}%' if not np.isnan(mean_uwt_pct) else 0,
                int(mean_lwt) if not np.isnan(mean_lwt) else 0,
                f'{int(mean_lwt_pct)}%' if not np.isnan(mean_lwt_pct) else 0
            ]
    
        display(HTML(tabulate(biases_df, headers='keys', tablefmt='html', showindex=False, floatfmt=".5f", numalign="center", stralign="right")))




Bullish,All,Weak,Medium,Strong
percent,49%,29%,8%,11%
average body,0.00046,0.00021,0.00056,0.00105
average upper wick,0.00021,0.00024,0.00018,0.00016
average lower wick,0.00021,0.00024,0.00018,0.00016
average upper wick time,729,651,810,878
average upper wick time percentage,72%,65%,81%,87%
average lower wick time,242,310,165,119
average lower wick time percentage,24%,31%,16%,11%


Bullish,All,Weak,Medium,Strong
percent,5%,5%,0%,0%
average body,0.00014,0.00013,0.00052,0.00117
average upper wick,0.00027,0.00027,0.00026,0.0003
average lower wick,0.00027,0.00027,0.00019,0.0004
average upper wick time,297,296,318,461
average upper wick time percentage,29%,29%,31%,46%
average lower wick time,677,677,673,622
average lower wick time percentage,67%,67%,67%,62%


Bullish,All,Weak,Medium,Strong
percent,43%,24%,8%,11%
average body,0.0005,0.00023,0.00056,0.00105
average upper wick,0.00021,0.00024,0.00018,0.00016
average lower wick,0.0002,0.00023,0.00018,0.00016
average upper wick time,785,732,816,879
average upper wick time percentage,78%,73%,81%,87%
average lower wick time,185,225,159,117
average lower wick time percentage,18%,22%,15%,11%


Bearish,All,Weak,Medium,Strong
percent,49%,29%,8%,11%
average body,0.00046,0.00021,0.00056,0.00104
average upper wick,0.00021,0.00024,0.00018,0.00015
average lower wick,0.00022,0.00025,0.0002,0.00016
average upper wick time,245,315,172,115
average upper wick time percentage,24%,31%,17%,11%
average lower wick time,729,652,805,876
average lower wick time percentage,72%,65%,80%,87%


Bearish,All,Weak,Medium,Strong
percent,43%,24%,8%,11%
average body,0.0005,0.00023,0.00056,0.00104
average upper wick,0.0002,0.00023,0.00018,0.00015
average lower wick,0.00021,0.00024,0.0002,0.00016
average upper wick time,185,226,165,114
average upper wick time percentage,18%,22%,16%,11%
average lower wick time,788,738,811,877
average lower wick time percentage,78%,73%,81%,87%


Bearish,All,Weak,Medium,Strong
percent,6%,5%,0%,0%
average body,0.00014,0.00013,0.00053,0.00078
average upper wick,0.00027,0.00027,0.00021,0.00038
average lower wick,0.00028,0.00028,0.0002,0.00021
average upper wick time,679,679,684,647
average upper wick time percentage,67%,67%,68%,64%
average lower wick time,299,298,353,395
average lower wick time percentage,29%,29%,35%,39%


Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.00031,0.00031,0,0
average lower wick,0.00031,0.00031,0,0
average upper wick time,476,476,0,0
average upper wick time percentage,47%,47%,0,0
average lower wick time,479,479,0,0
average lower wick time percentage,47%,47%,0,0


Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.0003,0.0003,0,0
average lower wick,0.00031,0.00031,0,0
average upper wick time,265,265,0,0
average upper wick time percentage,26%,26%,0,0
average lower wick time,680,680,0,0
average lower wick time percentage,68%,68%,0,0


Doji,All,Weak,Medium,Strong
percent,0%,0%,0%,0%
average body,0.0,0.0,0,0
average upper wick,0.00031,0.00031,0,0
average lower wick,0.00031,0.00031,0,0
average upper wick time,686,686,0,0
average upper wick time percentage,68%,68%,0,0
average lower wick time,279,279,0,0
average lower wick time percentage,27%,27%,0,0
