# EXPERIMENT : MINING CANDLESTICK USING PERMUTATION ENTROPY
In this experiment, we would try to expand the pattern probabilistic effects experminet from Tradingview (`Significant Move` Indicator).

Procedure: 
- Encode for all possible arrangements of the OHLC values. Filter out illogical, or redundant possibilities

In [85]:
# Import Libraries
import itertools

In [2]:
# Define the possible components of the comparison rules
series_options = ['open', 'high', 'low', 'close']
comparisons = ['>', '<']

# Function to generate all possible comparison rules
def generate_comparison_rules(maximum_lag):
    lags = list(range(maximum_lag))
    rules = []
    for series1, series2, lag1, lag2, comparison in itertools.product(series_options, series_options, lags, lags, comparisons):
        if series1 != series2 or lag1 != lag2:
            rule = f"{series1}[{lag1}] {comparison} {series2}[{lag2}]"
            rules.append(rule)
    return rules

# Generate all possible patterns with a given pattern size
def generate_all_patterns(pattern_size, maximum_lag):
    comparison_rules = generate_comparison_rules(maximum_lag)
    all_patterns = list(itertools.combinations(comparison_rules, pattern_size))
    return all_patterns

# Function to check for logical consistency in a pattern
def is_logical(pattern, exclude_constraints:list[str]):
    rules_checked = []
    fixed_series = ['high', 'low']
    
    for rule in pattern:
        left, comp, right = rule.split()
        flip_comp = '>' if comp == '<' else '<'

        left_series, left_lag = left[:-3], left[-2]
        right_series, right_lag = right[:-3], right[-2]

        # Check for illogical high/low comparisons
        if (left_series in fixed_series) and (right_series in fixed_series) and (left_lag == right_lag):
            return False

        # Apply constraints
        for constraint in exclude_constraints:
            split_constraint = constraint.replace(' ', '').split('/')
            if any([split_constraint[0] in x for x in [left, right]]) and any([split_constraint[1] in x for x in [left, right]]):
                return False

        # Generate all conditions that would cause the rule to be illogical
        analogs = [
            (left, right, comp),
            (right, left, flip_comp),
            (left, right, flip_comp)
        ]

        # Check for duplicates or contradictory rules
        if not set(rules_checked).isdisjoint(set(analogs)):
            return False
        else:
            rules_checked.extend(analogs)
        
    return True

# Function to filter patterns based on constraints and logical consistency
def filter_patterns(patterns, include_constraints, exclude_constraints):
    def pattern_meets_constraints(pattern):
        for rule in pattern:
            if not any(constraint in rule for constraint in include_constraints):
                return False
            if any(constraint in rule for constraint in exclude_constraints):
                return False
        
        extended_exclude_constraints = [constraint for constraint in exclude_constraints if '/' in constraint]
        return is_logical(pattern, extended_exclude_constraints)
    
    filtered_patterns = [pattern for pattern in patterns if pattern_meets_constraints(pattern)]
    return filtered_patterns


# pattern_size = 3
# max_lag = 2
# all_patterns = generate_all_patterns(pattern_size, max_lag)

In [3]:
# include_constraints = ['high', 'low', 'close']
# exclude_constraints = ['open', 'high/low']

# filtered_patterns = filter_patterns(all_patterns, include_constraints, exclude_constraints)

# len(filtered_patterns)

10560

## CANDLE STICK TARGET STATISTICS


- engulfing = down_candle[1] and up_candle[0] and (close[0] > max(open[1], close[1]))  # Reversal
- englufing_strict_1 = down_candle[1] and up_candle[0] and new_low[1] and (open[0] <= close[0]) and (close[0] > max(open[1], close[1])) and (high[0] > high[1])# Reversal
- englufing_strict_2 = down_candle[1] and up_candle[0] and new_low[1] and (low[0] < low[1]) and (open[0] <= close[0]) and (close[0] > high[1])  # Reversal

- doji = engulfing[1] and (abs(open - close) < threshold) # Reversal
- doji_strict = engulfing[1] and (abs(open - close) < threshold) and (high[0] < high[1]) and (low[0] > low[1]) # Reversal

- piercing = engulfing but with >= 50% engulfing
- piercing_strict = down_candle[1] and strong_candle[1] and (open[0] < [close[0]) and (close[0] > hl2[1])
- piercing_strict = engulfing and down_candle[1] and (open[0] < [close[0]) and (close[0] > hl2[1])
    
- inside = engulfing[1] and (high[0] > high[1]) # Reversal
- kicker = down_candle[1] and (open[0] > close[1]) and (high[0] > high[1])

In [32]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import yfinance as yf

In [171]:
# Define ticker symbols
# yf_df = yf.download('EURUSD=X') # EURUSD
yf_df = yf.download('ES=F') # S&P 500 as a proxy for ES futures

[*********************100%%**********************]  1 of 1 completed


In [172]:
raw_df = yf_df.copy()
raw_df.reset_index(drop=False, inplace=True)
raw_df.columns = raw_df.columns.str.lower()
raw_df.set_index('date', inplace=True)
raw_df.drop(['adj close', 'volume'], axis=1, inplace=True)

raw_df.head()

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-09-18,1485.25,1489.75,1462.25,1467.5
2000-09-19,1467.0,1482.75,1466.75,1478.5
2000-09-20,1478.75,1480.5,1450.25,1469.5
2000-09-21,1470.25,1474.0,1455.5,1469.5
2000-09-22,1454.75,1471.0,1436.75,1468.5


In [173]:
df = raw_df.copy()
df['hl2'] = np.average([df['high'], df['low']], axis=0)

threshold_strong_move = 1
atr = ta.atr(df['high'], df['low'], df['close'], 14)
doji_atr_limit = atr * 0.05
body_size = np.abs(df['open'] - df['close'])
df_1 = df.shift(1)
df_2 = df.shift(2)

strong_move = body_size >= threshold_strong_move
up_candle = df['open'] < df['close']
dn_candle = df['open'] > df['close']
new_high = df['high'] > df_1['high']
new_low = df['low'] < df_1['low']
df

Unnamed: 0_level_0,open,high,low,close,hl2
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-09-18,1485.25,1489.75,1462.25,1467.50,1476.000
2000-09-19,1467.00,1482.75,1466.75,1478.50,1474.750
2000-09-20,1478.75,1480.50,1450.25,1469.50,1465.375
2000-09-21,1470.25,1474.00,1455.50,1469.50,1464.750
2000-09-22,1454.75,1471.00,1436.75,1468.50,1453.875
...,...,...,...,...,...
2024-05-13,5240.50,5264.00,5233.25,5245.50,5248.625
2024-05-14,5241.75,5274.25,5216.75,5269.50,5245.500
2024-05-15,5268.50,5337.25,5266.25,5333.00,5301.750
2024-05-16,5331.50,5349.00,5315.50,5320.25,5332.250


In [174]:
engulfing = dn_candle.shift(1) & up_candle & (df['close'] > np.maximum(df_1['open'], df_1['close']))  # Reversal
engulfing_strict_1 = dn_candle.shift(1) & up_candle & new_low.shift(1) & (df['open'] <= df_1['close']) & (df['close'] > np.maximum(df_1['open'], df_1['close'])) & (df['high'] > df_1['high']) # Reversal
engulfing_strict_2 = dn_candle.shift(1) & up_candle & new_high.shift(1) & (df['low'] < df_1['low']) & (df['open'] <= df['close']) & (df['close'] > df_1['high'])  # Reversal

doji = engulfing.shift(1) & (body_size < doji_atr_limit) # Reversal
doji_strict = engulfing.shift(1) & (body_size < doji_atr_limit) & (df['high'] < df_1['high']) & (df['low'] > df_1['low']) # Reversal

inside = engulfing.shift(1) & (df['high'] > df_1['high']) # Reversal
kicker = dn_candle.shift(1) & (df['open'] > df_1['close']) & (df['high'] > df_1['high'])


# Piercing pattern definitions
piercing = dn_candle.shift(1) & up_candle & (df['close'] >= df_1['close'] + 0.5 * (df_1['open'] - df_1['close'])) & (df['close'] < df_1['open'])  # >= 50% engulfing
piercing_strict = dn_candle.shift(1) & up_candle & (df['open'] < df_1['close']) & (df['close'] > df_1['hl2'])  # Strong piercing
piercing_strict_2 = engulfing & dn_candle.shift(1) & (df['open'] < df_1['close']) & (df['close'] > df_1['hl2'])  # Engulfing and strong piercing



# BEARS

# Bearish patterns
bearish_engulfing = up_candle.shift(1) & dn_candle & (df['close'] < np.minimum(df_1['open'], df_1['close']))  # Reversal
bearish_engulfing_strict_1 = up_candle.shift(1) & dn_candle & new_high.shift(1) & (df['open'] >= df_1['close']) & (df['close'] < np.minimum(df_1['open'], df_1['close'])) & (df['low'] < df_1['low'])  # Reversal
bearish_engulfing_strict_2 = up_candle.shift(1) & dn_candle & new_low.shift(1) & (df['high'] > df_1['high']) & (df['open'] >= df['close']) & (df['close'] < df_1['low'])  # Reversal

bearish_doji = bearish_engulfing.shift(1) & (body_size < doji_atr_limit)  # Reversal
bearish_doji_strict = bearish_engulfing.shift(1) & (body_size < doji_atr_limit) & (df['high'] < df_1['high']) & (df['low'] > df_1['low'])  # Reversal

bearish_inside = bearish_engulfing.shift(1) & (df['low'] < df_1['low'])  # Reversal
bearish_kicker = up_candle.shift(1) & (df['open'] < df_1['close']) & (df['low'] < df_1['low'])

# Dark cloud cover (bearish analog of piercing)
bearish_piercing = up_candle.shift(1) & dn_candle & (df['close'] <= df_1['close'] - 0.5 * (df_1['close'] - df_1['open'])) & (df['close'] > df_1['open'])  # >= 50% engulfing
bearish_piercing_strict = up_candle.shift(1) & dn_candle & (df['open'] > df_1['close']) & (df['close'] < df_1['hl2'])  # Strong dark cloud
bearish_piercing_strict_2 = bearish_engulfing & up_candle.shift(1) & (df['open'] > df_1['close']) & (df['close'] < df_1['hl2'])  # Engulfing and strong dark cloud


In [175]:
def analyse_pattern(pattern_df:pd.Series, original_df:pd.DataFrame):
    # Possible Targets : 
    # - open[0]
    # - high[0]
    # - low[0]
    # - ohlc[1]

    data = original_df.copy()
    data_forward = data.shift(-1)
    data_backward = data.shift(1)

    data['target_open'] = (data_forward['high'] >= data['open']) & (data_forward['low'] <= data['open']) # Checks if open[0] is hit on the next candle
    data['target_high'] = (data_forward['high'] >= data['high']) & (data_forward['low'] <= data['high']) # Checks if high[0] is hit on the next candle
    data['target_low'] = (data_forward['high'] >= data['low']) & (data_forward['low'] <= data['low']) # Checks if low[0] is hit on the next candle

    data['target_open_1'] = (data_forward['high'] >= data_backward['open']) & (data_forward['low'] <= data_backward['open']) # Checks if previous open is hit on the next candle
    data['target_high_1'] = (data_forward['high'] >= data_backward['high']) & (data_forward['low'] <= data_backward['high']) # Checks if previous high is hit on the next candle
    data['target_low_1'] = (data_forward['high'] >= data_backward['low']) & (data_forward['low'] <= data_backward['low']) # Checks if previous low is hit on the next candle
    data['target_close_1'] = (data_forward['high'] >= data_backward['close']) & (data_forward['low'] <= data_backward['close']) # Checks if previous close is hit on the next candle

    data['pattern'] = pattern_df

    # Get the statistics for each target
    stats = {
        'target' : [],
        'total' : [],
        'success' : [],
        'probability' : []
    }
    
    for target in ["target_open", "target_high", "target_low", "target_open_1", "target_high_1", "target_low_1", "target_close_1"]:

        # Select rows where pattern is found
        selected = data[data['pattern']]
        count = len(selected)
        success = len(selected[selected[target]])
        probability = round(100 * success / count, 2)

        stats['target'].append(target)
        stats['total'].append(count)
        stats['success'].append(success)
        stats['probability'].append(probability)
        

    return display(pd.DataFrame(stats))
    

In [176]:
analyse_pattern(engulfing, df), analyse_pattern(bearish_engulfing, df) 

Unnamed: 0,target,total,success,probability
0,target_open,790,257,32.53
1,target_high,790,590,74.68
2,target_low,790,168,21.27
3,target_open_1,790,392,49.62
4,target_high_1,790,469,59.37
5,target_low_1,790,143,18.1
6,target_close_1,790,255,32.28


Unnamed: 0,target,total,success,probability
0,target_open,709,247,34.84
1,target_high,709,163,22.99
2,target_low,709,500,70.52
3,target_open_1,709,387,54.58
4,target_high_1,709,155,21.86
5,target_low_1,709,443,62.48
6,target_close_1,709,238,33.57


(None, None)

In [177]:
analyse_pattern(engulfing_strict_1, df), analyse_pattern(bearish_engulfing_strict_1, df)

Unnamed: 0,target,total,success,probability
0,target_open,170,43,25.29
1,target_high,170,130,76.47
2,target_low,170,31,18.24
3,target_open_1,170,78,45.88
4,target_high_1,170,103,60.59
5,target_low_1,170,24,14.12
6,target_close_1,170,49,28.82


Unnamed: 0,target,total,success,probability
0,target_open,167,42,25.15
1,target_high,167,30,17.96
2,target_low,167,122,73.05
3,target_open_1,167,83,49.7
4,target_high_1,167,28,16.77
5,target_low_1,167,105,62.87
6,target_close_1,167,45,26.95


(None, None)

In [178]:
analyse_pattern(engulfing_strict_2, df), analyse_pattern(bearish_engulfing_strict_2, df)

Unnamed: 0,target,total,success,probability
0,target_open,37,5,13.51
1,target_high,37,33,89.19
2,target_low,37,3,8.11
3,target_open_1,37,10,27.03
4,target_high_1,37,26,70.27
5,target_low_1,37,5,13.51
6,target_close_1,37,7,18.92


Unnamed: 0,target,total,success,probability
0,target_open,36,4,11.11
1,target_high,36,2,5.56
2,target_low,36,30,83.33
3,target_open_1,36,7,19.44
4,target_high_1,36,2,5.56
5,target_low_1,36,20,55.56
6,target_close_1,36,4,11.11


(None, None)

In [179]:
analyse_pattern(doji, df), analyse_pattern(bearish_doji, df)


Unnamed: 0,target,total,success,probability
0,target_open,65,58,89.23
1,target_high,65,32,49.23
2,target_low,65,39,60.0
3,target_open_1,65,23,35.38
4,target_high_1,65,40,61.54
5,target_low_1,65,18,27.69
6,target_close_1,65,56,86.15


Unnamed: 0,target,total,success,probability
0,target_open,46,43,93.48
1,target_high,46,28,60.87
2,target_low,46,25,54.35
3,target_open_1,46,17,36.96
4,target_high_1,46,11,23.91
5,target_low_1,46,29,63.04
6,target_close_1,46,42,91.3


(None, None)

In [180]:
analyse_pattern(doji_strict, df), analyse_pattern(bearish_doji_strict, df) 

Unnamed: 0,target,total,success,probability
0,target_open,13,12,92.31
1,target_high,13,7,53.85
2,target_low,13,9,69.23
3,target_open_1,13,6,46.15
4,target_high_1,13,4,30.77
5,target_low_1,13,4,30.77
6,target_close_1,13,11,84.62


Unnamed: 0,target,total,success,probability
0,target_open,4,4,100.0
1,target_high,4,4,100.0
2,target_low,4,3,75.0
3,target_open_1,4,2,50.0
4,target_high_1,4,1,25.0
5,target_low_1,4,1,25.0
6,target_close_1,4,4,100.0


(None, None)

In [181]:
analyse_pattern(inside, df), analyse_pattern(bearish_inside, df)

Unnamed: 0,target,total,success,probability
0,target_open,584,285,48.8
1,target_high,584,343,58.73
2,target_low,584,256,43.84
3,target_open_1,584,143,24.49
4,target_high_1,584,310,53.08
5,target_low_1,584,100,17.12
6,target_close_1,584,286,48.97


Unnamed: 0,target,total,success,probability
0,target_open,501,251,50.1
1,target_high,501,254,50.7
2,target_low,501,242,48.3
3,target_open_1,501,141,28.14
4,target_high_1,501,99,19.76
5,target_low_1,501,261,52.1
6,target_close_1,501,250,49.9


(None, None)

In [182]:
analyse_pattern(kicker, df), analyse_pattern(bearish_kicker, df)

Unnamed: 0,target,total,success,probability
0,target_open,446,189,42.38
1,target_high,446,279,62.56
2,target_low,446,146,32.74
3,target_open_1,446,220,49.33
4,target_high_1,446,239,53.59
5,target_low_1,446,122,27.35
6,target_close_1,446,174,39.01


Unnamed: 0,target,total,success,probability
0,target_open,472,217,45.97
1,target_high,472,203,43.01
2,target_low,472,260,55.08
3,target_open_1,472,251,53.18
4,target_high_1,472,152,32.2
5,target_low_1,472,265,56.14
6,target_close_1,472,204,43.22


(None, None)

In [183]:
analyse_pattern(piercing, df), analyse_pattern(bearish_piercing, df)

Unnamed: 0,target,total,success,probability
0,target_open,280,141,50.36
1,target_high,280,207,73.93
2,target_low,280,79,28.21
3,target_open_1,280,219,78.21
4,target_high_1,280,146,52.14
5,target_low_1,280,93,33.21
6,target_close_1,280,138,49.29


Unnamed: 0,target,total,success,probability
0,target_open,263,138,52.47
1,target_high,263,97,36.88
2,target_low,263,181,68.82
3,target_open_1,263,223,84.79
4,target_high_1,263,98,37.26
5,target_low_1,263,150,57.03
6,target_close_1,263,136,51.71


(None, None)

In [184]:
analyse_pattern(piercing_strict, df), analyse_pattern(bearish_piercing_strict, df)

Unnamed: 0,target,total,success,probability
0,target_open,441,150,34.01
1,target_high,441,326,73.92
2,target_low,441,97,22.0
3,target_open_1,441,236,53.51
4,target_high_1,441,260,58.96
5,target_low_1,441,96,21.77
6,target_close_1,441,177,40.14


Unnamed: 0,target,total,success,probability
0,target_open,364,144,39.56
1,target_high,364,92,25.27
2,target_low,364,254,69.78
3,target_open_1,364,225,61.81
4,target_high_1,364,95,26.1
5,target_low_1,364,236,64.84
6,target_close_1,364,158,43.41


(None, None)

In [185]:
analyse_pattern(piercing_strict_2, df), analyse_pattern(bearish_piercing_strict_2, df)


Unnamed: 0,target,total,success,probability
0,target_open,307,84,27.36
1,target_high,307,236,76.87
2,target_low,307,57,18.57
3,target_open_1,307,147,47.88
4,target_high_1,307,193,62.87
5,target_low_1,307,52,16.94
6,target_close_1,307,101,32.9


Unnamed: 0,target,total,success,probability
0,target_open,266,91,34.21
1,target_high,266,58,21.8
2,target_low,266,189,71.05
3,target_open_1,266,150,56.39
4,target_high_1,266,61,22.93
5,target_low_1,266,173,65.04
6,target_close_1,266,99,37.22


(None, None)