# Candlestick Pattern Detection
by Chee-Foong 


## Summary 
This analysis follows the steps found in the article "[Recognizing over 50 Candlestick Patterns with Python](https://medium.com/analytics-vidhya/recognizing-over-50-candlestick-patterns-with-python-4f02a1822cb5)" by **Caner Irfanoglu**.  I thank him for making his work public.

Candlestick patterns is one of many features for machine learning modeling work to predict time-series returns of investible assets like shares, currency, cryptos, etc.


## Reference
1. https://medium.com/analytics-vidhya/recognizing-over-50-candlestick-patterns-with-python-4f02a1822cb5
2. https://github.com/mrjbq7/ta-lib
3. https://en.wikipedia.org/wiki/Candlestick_pattern
4. https://www.youtube.com/watch?v=sJCgnSOcTPE

In [1]:
# !pip3 install TA-Lib

## Initialisation

In [2]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=Warning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

# IMPORTS
import pandas as pd
import numpy as np

import time
import math
import os.path

# from tqdm import tnrange, notebook
from tqdm.notebook import tqdm

from datetime import timedelta, datetime
from dateutil import parser

# Import the plotting library
import matplotlib.pyplot as plt
# %matplotlib inline

import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
from matplotlib.dates import MonthLocator

import seaborn as sns
sns.set()

plt.rcParams.update({'figure.figsize':(15,7), 'figure.dpi':120})
# plt.style.use('ggplot')

## Load Data

In [3]:
crypto = pd.read_csv('../data/ETHUSDT-1m-data.csv', parse_dates=True)

def cleanPx(prices, freq='1H'):
    prices = prices.iloc[prices.timestamp.drop_duplicates(keep='last').index]
    prices.timestamp = pd.to_datetime(prices.timestamp)
    prices.set_index('timestamp', inplace=True)

    prices_ohlc = prices[['open','high','low','close']]
    prices_vol = prices[['volume']]

    prices_ohlc = prices_ohlc.resample(freq).agg({'open': 'first', 
                                 'high': 'max', 
                                 'low': 'min', 
                                 'close': 'last'})
    prices_vol = prices_vol.resample(freq).sum()

    prices = pd.concat([prices_ohlc, prices_vol], axis=1)
    prices.index = prices.index.tz_localize('UTC').tz_convert('Asia/Singapore')

    return prices.dropna()

crypto = cleanPx(crypto, '1D')
crypto.reset_index(inplace=True)
crypto.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
crypto.set_index('Date', inplace=True)

crypto

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-03-06 08:00:00+08:00,1579.01,1671.58,1513.03,1650.35,770062.5
2021-03-07 08:00:00+08:00,1650.68,1734.16,1630.35,1726.16,848470.8
2021-03-08 08:00:00+08:00,1726.46,1843.59,1664.96,1833.18,1149711.0
2021-03-09 08:00:00+08:00,1833.22,1870.77,1798.2,1870.51,857229.5
2021-03-10 08:00:00+08:00,1870.5,1877.69,1757.0,1795.12,937230.6
2021-03-11 08:00:00+08:00,1795.28,1846.7,1725.0,1825.78,909011.5
2021-03-12 08:00:00+08:00,1825.78,1841.88,1719.72,1766.1,869564.6
2021-03-13 08:00:00+08:00,1766.13,1944.3,1727.75,1922.35,926283.2
2021-03-14 08:00:00+08:00,1922.35,1933.1,1836.0,1848.1,637963.6
2021-03-15 08:00:00+08:00,1848.1,1891.76,1730.0,1793.48,1025327.0


## Patterns for Detection

In [4]:
import talib
candle_names = talib.get_function_groups()['Pattern Recognition']
removed = ['CDLCOUNTERATTACK', 'CDLLONGLINE', 'CDLSHORTLINE', 
           'CDLSTALLEDPATTERN', 'CDLKICKINGBYLENGTH']
candle_names = [name for name in candle_names if name not in removed]

In [5]:
# interested = ['CDLDOJI', 'CDLDOJISTAR','CDLENGULFING']
# candle_names = [name for name in candle_names if name in interested]

# len(candle_names)

In [6]:
', '.join(candle_names)

'CDL2CROWS, CDL3BLACKCROWS, CDL3INSIDE, CDL3LINESTRIKE, CDL3OUTSIDE, CDL3STARSINSOUTH, CDL3WHITESOLDIERS, CDLABANDONEDBABY, CDLADVANCEBLOCK, CDLBELTHOLD, CDLBREAKAWAY, CDLCLOSINGMARUBOZU, CDLCONCEALBABYSWALL, CDLDARKCLOUDCOVER, CDLDOJI, CDLDOJISTAR, CDLDRAGONFLYDOJI, CDLENGULFING, CDLEVENINGDOJISTAR, CDLEVENINGSTAR, CDLGAPSIDESIDEWHITE, CDLGRAVESTONEDOJI, CDLHAMMER, CDLHANGINGMAN, CDLHARAMI, CDLHARAMICROSS, CDLHIGHWAVE, CDLHIKKAKE, CDLHIKKAKEMOD, CDLHOMINGPIGEON, CDLIDENTICAL3CROWS, CDLINNECK, CDLINVERTEDHAMMER, CDLKICKING, CDLLADDERBOTTOM, CDLLONGLEGGEDDOJI, CDLMARUBOZU, CDLMATCHINGLOW, CDLMATHOLD, CDLMORNINGDOJISTAR, CDLMORNINGSTAR, CDLONNECK, CDLPIERCING, CDLRICKSHAWMAN, CDLRISEFALL3METHODS, CDLSEPARATINGLINES, CDLSHOOTINGSTAR, CDLSPINNINGTOP, CDLSTICKSANDWICH, CDLTAKURI, CDLTASUKIGAP, CDLTHRUSTING, CDLTRISTAR, CDLUNIQUE3RIVER, CDLUPSIDEGAP2CROWS, CDLXSIDEGAP3METHODS'

In [7]:
crypto.reset_index(inplace=True)
crypto = crypto[['Date', 'Open', 'High', 'Low', 'Close']]
crypto.columns = ['time', 'open', 'high', 'low', 'close']

## Ranking of Patterns
Candlestick pattern ranking so that the most important candlestick pattern is selected for the candle.

In [8]:
candle_rankings = {
        "CDL3LINESTRIKE_Bull": 1,
        "CDL3LINESTRIKE_Bear": 2,
        "CDL3BLACKCROWS_Bull": 3,
        "CDL3BLACKCROWS_Bear": 3,
        "CDLEVENINGSTAR_Bull": 4,
        "CDLEVENINGSTAR_Bear": 4,
        "CDLTASUKIGAP_Bull": 5,
        "CDLTASUKIGAP_Bear": 5,
        "CDLINVERTEDHAMMER_Bull": 6,
        "CDLINVERTEDHAMMER_Bear": 6,
        "CDLMATCHINGLOW_Bull": 7,
        "CDLMATCHINGLOW_Bear": 7,
        "CDLABANDONEDBABY_Bull": 8,
        "CDLABANDONEDBABY_Bear": 8,
        "CDLBREAKAWAY_Bull": 10,
        "CDLBREAKAWAY_Bear": 10,
        "CDLMORNINGSTAR_Bull": 12,
        "CDLMORNINGSTAR_Bear": 12,
        "CDLPIERCING_Bull": 13,
        "CDLPIERCING_Bear": 13,
        "CDLSTICKSANDWICH_Bull": 14,
        "CDLSTICKSANDWICH_Bear": 14,
        "CDLTHRUSTING_Bull": 15,
        "CDLTHRUSTING_Bear": 15,
        "CDLINNECK_Bull": 17,
        "CDLINNECK_Bear": 17,
        "CDL3INSIDE_Bull": 20,
        "CDL3INSIDE_Bear": 56,
        "CDLHOMINGPIGEON_Bull": 21,
        "CDLHOMINGPIGEON_Bear": 21,
        "CDLDARKCLOUDCOVER_Bull": 22,
        "CDLDARKCLOUDCOVER_Bear": 22,
        "CDLIDENTICAL3CROWS_Bull": 24,
        "CDLIDENTICAL3CROWS_Bear": 24,
        "CDLMORNINGDOJISTAR_Bull": 25,
        "CDLMORNINGDOJISTAR_Bear": 25,
        "CDLXSIDEGAP3METHODS_Bull": 27,
        "CDLXSIDEGAP3METHODS_Bear": 26,
        "CDLTRISTAR_Bull": 28,
        "CDLTRISTAR_Bear": 76,
        "CDLGAPSIDESIDEWHITE_Bull": 46,
        "CDLGAPSIDESIDEWHITE_Bear": 29,
        "CDLEVENINGDOJISTAR_Bull": 30,
        "CDLEVENINGDOJISTAR_Bear": 30,
        "CDL3WHITESOLDIERS_Bull": 32,
        "CDL3WHITESOLDIERS_Bear": 32,
        "CDLONNECK_Bull": 33,
        "CDLONNECK_Bear": 33,
        "CDL3OUTSIDE_Bull": 34,
        "CDL3OUTSIDE_Bear": 39,
        "CDLRICKSHAWMAN_Bull": 35,
        "CDLRICKSHAWMAN_Bear": 35,
        "CDLSEPARATINGLINES_Bull": 36,
        "CDLSEPARATINGLINES_Bear": 40,
        "CDLLONGLEGGEDDOJI_Bull": 37,
        "CDLLONGLEGGEDDOJI_Bear": 37,
        "CDLHARAMI_Bull": 38,
        "CDLHARAMI_Bear": 72,
        "CDLLADDERBOTTOM_Bull": 41,
        "CDLLADDERBOTTOM_Bear": 41,
        "CDLCLOSINGMARUBOZU_Bull": 70,
        "CDLCLOSINGMARUBOZU_Bear": 43,
        "CDLTAKURI_Bull": 47,
        "CDLTAKURI_Bear": 47,
        "CDLDOJISTAR_Bull": 49,
        "CDLDOJISTAR_Bear": 51,
        "CDLHARAMICROSS_Bull": 50,
        "CDLHARAMICROSS_Bear": 80,
        "CDLADVANCEBLOCK_Bull": 54,
        "CDLADVANCEBLOCK_Bear": 54,
        "CDLSHOOTINGSTAR_Bull": 55,
        "CDLSHOOTINGSTAR_Bear": 55,
        "CDLMARUBOZU_Bull": 71,
        "CDLMARUBOZU_Bear": 57,
        "CDLUNIQUE3RIVER_Bull": 60,
        "CDLUNIQUE3RIVER_Bear": 60,
        "CDL2CROWS_Bull": 61,
        "CDL2CROWS_Bear": 61,
        "CDLBELTHOLD_Bull": 62,
        "CDLBELTHOLD_Bear": 63,
        "CDLHAMMER_Bull": 65,
        "CDLHAMMER_Bear": 65,
        "CDLHIGHWAVE_Bull": 67,
        "CDLHIGHWAVE_Bear": 67,
        "CDLSPINNINGTOP_Bull": 69,
        "CDLSPINNINGTOP_Bear": 73,
        "CDLUPSIDEGAP2CROWS_Bull": 74,
        "CDLUPSIDEGAP2CROWS_Bear": 74,
        "CDLGRAVESTONEDOJI_Bull": 77,
        "CDLGRAVESTONEDOJI_Bear": 77,
        "CDLHIKKAKEMOD_Bull": 82,
        "CDLHIKKAKEMOD_Bear": 81,
        "CDLHIKKAKE_Bull": 85,
        "CDLHIKKAKE_Bear": 83,
        "CDLENGULFING_Bull": 84,
        "CDLENGULFING_Bear": 91,
        "CDLMATHOLD_Bull": 86,
        "CDLMATHOLD_Bear": 86,
        "CDLHANGINGMAN_Bull": 87,
        "CDLHANGINGMAN_Bear": 87,
        "CDLRISEFALL3METHODS_Bull": 94,
        "CDLRISEFALL3METHODS_Bear": 89,
        "CDLKICKING_Bull": 96,
        "CDLKICKING_Bear": 102,
        "CDLDRAGONFLYDOJI_Bull": 98,
        "CDLDRAGONFLYDOJI_Bear": 98,
        "CDLCONCEALBABYSWALL_Bull": 101,
        "CDLCONCEALBABYSWALL_Bear": 101,
        "CDL3STARSINSOUTH_Bull": 103,
        "CDL3STARSINSOUTH_Bear": 103,
        "CDLDOJI_Bull": 104,
        "CDLDOJI_Bear": 104
    }

## Detection of Patterns

In [9]:
# extract OHLC 
op = crypto['open']
hi = crypto['high']
lo = crypto['low']
cl = crypto['close']

# create columns for each pattern
for candle in candle_names:
    # below is same as;
    # df["CDL3LINESTRIKE"] = talib.CDL3LINESTRIKE(op, hi, lo, cl)
    crypto[candle] = getattr(talib, candle)(op, hi, lo, cl)

## Naming of Patterns

In [10]:
from itertools import compress

crypto['candlestick_pattern'] = np.nan
crypto['candlestick_match_count'] = np.nan

for index, row in crypto.iterrows():

    # no pattern found
    if len(row[candle_names]) - sum(row[candle_names] == 0) == 0:
        crypto.loc[index,'candlestick_pattern'] = "NO_PATTERN"
        crypto.loc[index, 'candlestick_match_count'] = 0
    # single pattern found
    elif len(row[candle_names]) - sum(row[candle_names] == 0) == 1:
        # bull pattern 100 or 200
        if any(row[candle_names].values > 0):
            pattern = list(compress(row[candle_names].keys(), row[candle_names].values != 0))[0] + '_Bull'
            crypto.loc[index, 'candlestick_pattern'] = pattern
            crypto.loc[index, 'candlestick_match_count'] = 1
        # bear pattern -100 or -200
        else:
            pattern = list(compress(row[candle_names].keys(), row[candle_names].values != 0))[0] + '_Bear'
            crypto.loc[index, 'candlestick_pattern'] = pattern
            crypto.loc[index, 'candlestick_match_count'] = 1
    # multiple patterns matched -- select best performance
    else:
        # filter out pattern names from bool list of values
        patterns = list(compress(row[candle_names].keys(), row[candle_names].values != 0))
        container = []
        for pattern in patterns:
            if row[pattern] > 0:
                container.append(pattern + '_Bull')
            else:
                container.append(pattern + '_Bear')
        rank_list = [candle_rankings[p] for p in container]
        if len(rank_list) == len(container):
            rank_index_best = rank_list.index(min(rank_list))
            crypto.loc[index, 'candlestick_pattern'] = container[rank_index_best]
            crypto.loc[index, 'candlestick_match_count'] = len(container)


In [11]:
# clean up candle columns
try:
    crypto.drop(candle_names, axis = 1, inplace = True)
except:
    pass

crypto.loc[crypto.candlestick_pattern == 'NO_PATTERN', 'candlestick_pattern'] = ''
crypto.candlestick_pattern = crypto.candlestick_pattern.apply(lambda x: x[3:])

## Saving the output

In [12]:
OUTPUT_FOLDER = '../output/'
crypto.to_csv(OUTPUT_FOLDER + 'ethusd.csv', index=False)

## Visualisation
See the visualisation on Tableau here: [Candlestick Patterns](https://public.tableau.com/profile/edsicage#!/vizhome/MachineTrading/CandlestickPattern)

---
# END