# Classify price movements based on candlesticks statistics

This reseach comes from [this](https://www.forexfactory.com/thread/post/14707863#post14707863) post on ForexFactory. 

### Step 1: gather data and create a rolling TF

In [1]:
# This allow jupiter to upload in real time externally modified code
%load_ext autoreload
%autoreload 2 


import sys
sys.path.append("..")
import os
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
from x_CLASSES.download_data import DownloadData


In [2]:
start_date = "01-12-2023"
end_date = "05-01-2024"
timeframe = 'tick'
price_frame = 1000

csv_file_path = f"{os.path.dirname(os.getcwd())}/x_DATA/{start_date}_{end_date}  {timeframe}.csv"

df = pd.DataFrame()


if os.path.exists(csv_file_path):
    df = pd.read_csv(csv_file_path)
else:
    df = DownloadData('GBP/USD', start_date,end_date,timeframe).getData()
    
    data_folder_path = f"{os.getcwd()}/x_DATA"
    if not os.path.exists(data_folder_path):
        os.makedirs(data_folder_path)

    df.to_csv(csv_file_path)

df

Unnamed: 0,Timestamp,Bid,Ask
0,2023-12-01 00:00:02.033000+00:00,1.26305,1.26316
1,2023-12-01 00:00:05.974000+00:00,1.26306,1.26316
2,2023-12-01 00:00:07.587000+00:00,1.26305,1.26316
3,2023-12-01 00:00:07.689000+00:00,1.26306,1.26316
4,2023-12-01 00:00:07.792000+00:00,1.26304,1.26317
...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26820
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26821
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26820
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26819


In [3]:
if "Ask" in df.columns:
    df = df.drop("Ask", axis=1)

df["Close"] = df["Bid"].copy()
df["Open"] = df["Bid"].shift(price_frame)  # Shift the "Bid" values 1000 rows back
df["High"] = df["Bid"].rolling(window=price_frame).max()  # Calculate the rolling max over the last 1000 rows
df["Low"] = df["Bid"].rolling(window=price_frame).min()  # Calculate the rolling min over the last 1000 rows

df

Unnamed: 0,Timestamp,Bid,Close,Open,High,Low
0,2023-12-01 00:00:02.033000+00:00,1.26305,1.26305,,,
1,2023-12-01 00:00:05.974000+00:00,1.26306,1.26306,,,
2,2023-12-01 00:00:07.587000+00:00,1.26305,1.26305,,,
3,2023-12-01 00:00:07.689000+00:00,1.26306,1.26306,,,
4,2023-12-01 00:00:07.792000+00:00,1.26304,1.26304,,,
...,...,...,...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26810,1.26834,1.26856,1.26795
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26808,1.26833,1.26856,1.26795
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26808,1.26835,1.26856,1.26795
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26805,1.26840,1.26856,1.26795


#### Time of high and time of low
I'm adding a column that returns the high and low times.

In [4]:
# Calculate the index of the maximum value in the rolling window for "High time"
df['High time'] =  price_frame - ( df.index.values - df['Bid'].rolling(window=price_frame).agg(lambda x: x.index.values[np.argmax(x.values)]) ) 
df["Low time"] = price_frame  - ( df.index.values - df['Bid'].rolling(window=price_frame).agg(lambda x: x.index.values[np.argmin(x.values)]) )
df['High time2'] = df['Bid'].rolling(window=price_frame).rank(method='min')

df['High first'] = df['High time'] < df["Low time"]

df = df.dropna() #Drop initial NaN values

df

Unnamed: 0,Timestamp,Bid,Close,Open,High,Low,High time,Low time,High time2,High first
1000,2023-12-01 00:21:58.140000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,820.0,14.0,690.0,False
1001,2023-12-01 00:22:00.557000+00:00,1.26377,1.26377,1.26306,1.26397,1.26298,819.0,13.0,672.0,False
1002,2023-12-01 00:22:00.912000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,818.0,12.0,689.0,False
1003,2023-12-01 00:22:01.166000+00:00,1.26378,1.26378,1.26306,1.26397,1.26298,817.0,11.0,688.0,False
1004,2023-12-01 00:22:01.317000+00:00,1.26378,1.26378,1.26304,1.26397,1.26298,816.0,10.0,687.0,False
...,...,...,...,...,...,...,...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26810,1.26834,1.26856,1.26795,255.0,119.0,120.0,False
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26808,1.26833,1.26856,1.26795,254.0,118.0,88.0,False
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26808,1.26835,1.26856,1.26795,253.0,117.0,88.0,False
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26805,1.26840,1.26856,1.26795,252.0,116.0,56.0,False


#### How to interpret 'High time' and 'Low time' columns
e.g. 
- High time = 746 and Low time = 200
- That means: the high was made on tick number 746 of the current 1000 ticks candlestick

Here's a draw so that you can fully understand

![picture1](ORcChmJ.png)

### Step 2: Directional bias

In [5]:
df['Open - Close'] = (df['Open'] - df['Close']).abs()
df['Upper Wick'] = (df['High'] - df[['Open', 'Close']].max(axis=1))
df['Lower Wick'] = (df[['Open', 'Close']].min(axis=1) - df['Low'])

df['Bias'] = np.where(df['Open'] > df['Close'], 'Bearish', np.where(df['Close'] > df['Open'], 'Bullish', 'Doji'))

df


Unnamed: 0,Timestamp,Bid,Close,Open,High,Low,High time,Low time,High time2,High first,Open - Close,Upper Wick,Lower Wick,Bias
1000,2023-12-01 00:21:58.140000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,820.0,14.0,690.0,False,0.00073,0.00019,0.00007,Bullish
1001,2023-12-01 00:22:00.557000+00:00,1.26377,1.26377,1.26306,1.26397,1.26298,819.0,13.0,672.0,False,0.00071,0.00020,0.00008,Bullish
1002,2023-12-01 00:22:00.912000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,818.0,12.0,689.0,False,0.00073,0.00019,0.00007,Bullish
1003,2023-12-01 00:22:01.166000+00:00,1.26378,1.26378,1.26306,1.26397,1.26298,817.0,11.0,688.0,False,0.00072,0.00019,0.00008,Bullish
1004,2023-12-01 00:22:01.317000+00:00,1.26378,1.26378,1.26304,1.26397,1.26298,816.0,10.0,687.0,False,0.00074,0.00019,0.00006,Bullish
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26810,1.26834,1.26856,1.26795,255.0,119.0,120.0,False,0.00024,0.00022,0.00015,Bearish
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26808,1.26833,1.26856,1.26795,254.0,118.0,88.0,False,0.00025,0.00023,0.00013,Bearish
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26808,1.26835,1.26856,1.26795,253.0,117.0,88.0,False,0.00027,0.00021,0.00013,Bearish
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26805,1.26840,1.26856,1.26795,252.0,116.0,56.0,False,0.00035,0.00016,0.00010,Bearish


### Step 3: Strength of directional bias
On this version boundaries will be calculated trough average directional OC and average directional wick height. It can be:
- weak: $0<$ OC $<=$ Average OC
- medium ("normal"): Average OC $<$ OC $<=$ Average wick height
- strong: Average wick height $<$ OC

Here's a better rappresentation of the possible categories:

![picture2](EnRJAK1.png)

In [6]:
# Filter rows where Bias is "Bullish"
bullish_rows = df[df['Bias'] == 'Bullish']
bearish_rows = df[df['Bias'] == 'Bearish']

# average directional OC
average_bullish_oc = bullish_rows['Open - Close'].mean()
average_bearish_oc = bearish_rows['Open - Close'].mean()
avg_oc = ( average_bullish_oc + average_bearish_oc ) / 2

# average bullish and bearish candle Wick (relative to close)
average_bullish_upper = bullish_rows['Upper Wick'].mean()
average_bearish_lower = bearish_rows['Lower Wick'].mean()
avg_wk = avg_oc + ( average_bullish_upper + average_bearish_lower ) / 2

# assign strenght bias
df['Strength'] = np.where(df['Open - Close'] > avg_wk, 'Strong',
                          np.where((df['Open - Close'] > avg_oc) & (df['Open - Close'] <= avg_wk), 'Medium', 'Weak'))

del avg_oc, avg_wk
df


Unnamed: 0,Timestamp,Bid,Close,Open,High,Low,High time,Low time,High time2,High first,Open - Close,Upper Wick,Lower Wick,Bias,Strength
1000,2023-12-01 00:21:58.140000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,820.0,14.0,690.0,False,0.00073,0.00019,0.00007,Bullish,Strong
1001,2023-12-01 00:22:00.557000+00:00,1.26377,1.26377,1.26306,1.26397,1.26298,819.0,13.0,672.0,False,0.00071,0.00020,0.00008,Bullish,Strong
1002,2023-12-01 00:22:00.912000+00:00,1.26378,1.26378,1.26305,1.26397,1.26298,818.0,12.0,689.0,False,0.00073,0.00019,0.00007,Bullish,Strong
1003,2023-12-01 00:22:01.166000+00:00,1.26378,1.26378,1.26306,1.26397,1.26298,817.0,11.0,688.0,False,0.00072,0.00019,0.00008,Bullish,Strong
1004,2023-12-01 00:22:01.317000+00:00,1.26378,1.26378,1.26304,1.26397,1.26298,816.0,10.0,687.0,False,0.00074,0.00019,0.00006,Bullish,Strong
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2383418,2024-01-04 23:59:36.774000+00:00,1.26810,1.26810,1.26834,1.26856,1.26795,255.0,119.0,120.0,False,0.00024,0.00022,0.00015,Bearish,Weak
2383419,2024-01-04 23:59:41.153000+00:00,1.26808,1.26808,1.26833,1.26856,1.26795,254.0,118.0,88.0,False,0.00025,0.00023,0.00013,Bearish,Weak
2383420,2024-01-04 23:59:41.256000+00:00,1.26808,1.26808,1.26835,1.26856,1.26795,253.0,117.0,88.0,False,0.00027,0.00021,0.00013,Bearish,Weak
2383421,2024-01-04 23:59:55.951000+00:00,1.26805,1.26805,1.26840,1.26856,1.26795,252.0,116.0,56.0,False,0.00035,0.00016,0.00010,Bearish,Weak


### Step 4: print results

In [7]:
from tabulate import tabulate

# Possible values in each column
unique_biases = ['Bullish','Doji','Bearish']
unique_strengthes = ['Strong','Medium','Weak']
unique_high_first_values = df['High first'].unique()

# Create a list to store the results
result_data = []

pd.options.display.float_format = '{:.5f}'.format

# Iterate over combinations and calculate mean
for bias in unique_biases:
    for strength in unique_strengthes:
        for high_first in unique_high_first_values:
            subset = df[(df['Bias'] == bias) & (df['Strength'] == strength) & (df['High first'] == high_first)]
            sample = ( subset.size / df.size ) *100

            if sample >1: # I do not consider configurations that happenes less than 1% of times
                
                # Average OC
                mean_oc = subset['Open - Close'].mean()
                mean_oc = mean_oc
                # Average Higher and Lower wick
                mean_uwk = subset['Upper Wick'].mean()
                mean_lwk = subset['Lower Wick'].mean()
                # Average time of High and time of Low
                mean_thigh = int(round(subset['High time'].mean(),0))
                mean_tlow = int(round(subset['Low time'].mean(),0))
                time_ = f'{mean_thigh} - {mean_tlow}'

                result_data.append({'Sampple':f'{round(sample,2)}%','Bias': bias, 'Strength': strength, 'High First': high_first, 'Avg. OC': mean_oc,
                                    'Avg. Upper Wick': mean_uwk, 'Avg. Lower Wick':mean_lwk,'Avg. Time High - Low': time_})

# Convert the list of dictionaries to a DataFrame
result_df = pd.DataFrame(result_data)

# Print the formatted table
#print(tabulate(result_df, headers='keys', tablefmt='simple_outline', showindex=False, floatfmt=".5f", numalign="center", stralign="center"))
result_df


Unnamed: 0,Sampple,Bias,Strength,High First,Avg. OC,Avg. Upper Wick,Avg. Lower Wick,Avg. Time High - Low
0,11.16%,Bullish,Strong,False,0.00105,0.00016,0.00016,879 - 118
1,8.38%,Bullish,Medium,False,0.00056,0.00018,0.00018,816 - 159
2,24.36%,Bullish,Weak,False,0.00023,0.00024,0.00023,733 - 226
3,5.59%,Bullish,Weak,True,0.00013,0.00027,0.00027,296 - 678
4,11.4%,Bearish,Strong,True,0.00104,0.00015,0.00016,115 - 877
5,8.03%,Bearish,Medium,True,0.00056,0.00018,0.0002,165 - 812
6,5.87%,Bearish,Weak,False,0.00013,0.00027,0.00028,680 - 298
7,24.08%,Bearish,Weak,True,0.00023,0.00023,0.00024,226 - 739
