In [176]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import talib
import pandas as pd
from datetime import datetime
import plotly.graph_objects as go
from tqdm import tqdm
from glob import glob

import plotly
import plotly.offline as py
import plotly.graph_objs as go

# My libs
from src.utils.data_util import DataUtil
from src.strategies.candle_stick import CandleStick

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.expand_frame_repr', False)

In [177]:
list_cdls = talib.get_function_groups()['Pattern Recognition']

In [178]:
# list_cdls = ['CDLENGULFING', 'CDL3OUTSIDE', 'CDL3INSIDE', 'CDLHARAMI', 'CDLDRAGONFLYDOJI']

```json
'CDL2CROWS', 'CDL3BLACKCROWS', 'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3STARSINSOUTH', 'CDL3WHITESOLDIERS', 'CDLABANDONEDBABY', 'CDLADVANCEBLOCK', 'CDLBELTHOLD', 'CDLBREAKAWAY', 'CDLCLOSINGMARUBOZU', 'CDLCONCEALBABYSWALL', 'CDLCOUNTERATTACK', 'CDLDARKCLOUDCOVER', 'CDLDOJI', 'CDLDOJISTAR', 'CDLDRAGONFLYDOJI', 'CDLENGULFING', 'CDLEVENINGDOJISTAR', 'CDLEVENINGSTAR', 'CDLGAPSIDESIDEWHITE', 'CDLGRAVESTONEDOJI', 'CDLHAMMER', 'CDLHANGINGMAN', 'CDLHARAMI', 'CDLHARAMICROSS', 'CDLHIGHWAVE', 'CDLHIKKAKE', 'CDLHIKKAKEMOD', 'CDLHOMINGPIGEON', 'CDLIDENTICAL3CROWS', 'CDLINNECK', 'CDLINVERTEDHAMMER', 'CDLKICKING', 'CDLKICKINGBYLENGTH', 'CDLLADDERBOTTOM', 'CDLLONGLEGGEDDOJI', 'CDLLONGLINE', 'CDLMARUBOZU', 'CDLMATCHINGLOW', 'CDLMATHOLD', 'CDLMORNINGDOJISTAR', 'CDLMORNINGSTAR', 'CDLONNECK', 'CDLPIERCING', 'CDLRICKSHAWMAN', 'CDLRISEFALL3METHODS', 'CDLSEPARATINGLINES', 'CDLSHOOTINGSTAR', 'CDLSHORTLINE', 'CDLSPINNINGTOP', 'CDLSTALLEDPATTERN', 'CDLSTICKSANDWICH', 'CDLTAKURI', 'CDLTASUKIGAP', 'CDLTHRUSTING', 'CDLTRISTAR', 'CDLUNIQUE3RIVER', 'CDLUPSIDEGAP2CROWS', 'CDLXSIDEGAP3METHODS'
```

In [179]:
data_util = DataUtil()
# Selected candlestick patterns initiation
cdl_pattern = CandleStick(list_cdls)

## Important patterns
* CDL3OUTSIDE : This may not work in resistance and support levels. such as ema, vwap
* CDLGAPSIDESIDEWHITE: Very nice move 2 times.
* CDL3INSIDE
* CDLHARAMI: Bearish days Harami (-) has pretty strong move on down side
* DRAGONFLYDOJI: Looks quite a nice reversal pattern. It has a huge moves

# Data Collection

In [180]:
def sing_day_data(file_path):
    """
    Load single data of data with pre processing (epoch, date string)
    """
    df = pd.read_csv(file_path)
    df = data_util.pre_data_process(df)
    return df

In [181]:
# Load all the TF files
all_files = glob('data/STK/1_min/TSLA/*')

source_lst = []
for path in tqdm(all_files):
    source_lst.append(sing_day_data(path))

source = pd.concat(source_lst).reset_index(drop=True)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 568/568 [00:30<00:00, 18.63it/s]


# Analysis

In [305]:
df = source.copy()

In [306]:
# Find all the dates 
dates = df['date_str'].unique()

In [307]:
def find_direction(t1, t2, t3, t4):
    """
    Find the next candle move up or down based on the identified pattern
    """
    if (t1 > 0) and (t2 > 0):
        return 2
    elif (t1 < 0)  and (t2 < 0):
        return 1
    else:
        return 0

"""
Find the next candles moves based on daily basis. Because If we don't process this by day. Then the next day candles may
create bias in the process
"""
generated_direction_by_day = []
for date in tqdm(dates):
    tmp_data = df[df['date_str'] == date].copy()
    tmp_data = tmp_data.sort_values(['date'])
    
    # Find the close price on another n time stamps, the timestamp may vary based on time frame (e.g. 1min, 2 min, 5 min)
    # This will help to understand the price movement up or down and by price
    # Change in price + or -
    tmp_data['t+1'] = tmp_data['close'].shift(-1) - tmp_data['close']
    
    # Find the direction based on 3 candles
#     tmp_data['price_direction'] = tmp_data.apply(lambda x: find_direction(x['t+1'], x['t+2'], x['t+3'], x['t+4']), axis=1)
    
    generated_direction_by_day.append(tmp_data)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 392/392 [00:02<00:00, 155.97it/s]


In [308]:
# Combine processed data
df  = pd.concat(generated_direction_by_day).reset_index(drop=True)

In [309]:
# Generate candle stick patterns for given input
df = cdl_pattern.generate_pattern(df)

In [310]:
# df = df[df['t+1'] != 0].copy()

In [282]:
import plotly.express as px
fig = px.histogram(df, x="t+1")
fig.show()

### Move of more than $1, Long Side

In [316]:
long_position = df[df['t+1'] >= 1].copy()

In [317]:
all_position = df.copy()

In [318]:
def generte_order(in_data):
    # Move the columns to rows
    in_data = in_data.drop(['open', 'high', 'low', 'close', 'volume', 'barCount', 'average','time', 'date_str', 'date_epoch', '9EMA', '20EMA', 't+1',], axis=1)
    in_data = in_data.melt(id_vars=['date'], var_name="cdl_pattern", value_name="pattern_check")
    
    in_data = in_data[in_data['pattern_check'] != 0]
    
    in_data = in_data.groupby(['date'])['cdl_pattern'].agg(lambda x: ' | '.join(x)).reset_index()
    in_data = in_data.groupby(['cdl_pattern'])['date'].count().reset_index().sort_values('date', ascending=False)
    
    return in_data

In [319]:
long_position = generte_order(long_position)

In [320]:
all_position = generte_order(all_position)

In [321]:
combined = all_position.merge(long_position, on=['cdl_pattern'], suffixes=('_total', '_move'))

In [322]:
combined['win_rate'] = (combined['date_move']/combined['date_total'])*100

In [323]:
combined.sort_values(['win_rate'], ascending=False)

Unnamed: 0,cdl_pattern,date_total,date_move,win_rate
385,CDLBELTHOLD | CDLLONGLINE | CDLMATCHINGLOW,1,22,2200.0
379,CDLLONGLINE | CDLMATCHINGLOW,1,19,1900.0
305,CDLBELTHOLD | CDLMATCHINGLOW,5,59,1180.0
318,CDLCLOSINGMARUBOZU | CDLMATCHINGLOW,4,26,650.0
361,CDLHANGINGMAN | CDLMATCHINGLOW,2,9,450.0
357,CDLDOJI | CDLDRAGONFLYDOJI | CDLHANGINGMAN | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLTAKURI,2,4,200.0
57,CDLMATCHINGLOW,123,236,191.869919
168,CDLBELTHOLD | CDLCLOSINGMARUBOZU | CDLLONGLINE | CDLMARUBOZU | CDLMATCHINGLOW,18,31,172.222222
356,CDLDOJI | CDLGRAVESTONEDOJI | CDLHIGHWAVE | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLSPINNINGTOP,2,3,150.0
118,CDLMATCHINGLOW | CDLSPINNINGTOP,31,35,112.903226


In [275]:
long_df_oneday

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,t+1,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,9EMA,20EMA
2080,2020-01-23 22:53:00,115.06,115.37,114.54,115.24,5123,584,114.9234,22:53:00,2020-01-23,1579834380,1.16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,114.559226,113.825110
2357,2020-01-27 22:32:00,108.58,108.80,108.04,108.57,2656,336,108.4194,22:32:00,2020-01-27,1580178720,1.22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,-100,-100,-100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,108.489392,108.495895
2798,2020-01-30 22:27:00,125.86,126.00,125.80,125.87,349,39,125.8920,22:27:00,2020-01-30,1580437620,1.13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,-100,-100,100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,100,0,0,0,0,0,0,0,0,0,125.594379,125.652279
2803,2020-01-30 22:32:00,124.56,124.58,123.60,124.05,10937,1210,124.0896,22:32:00,2020-01-30,1580437920,1.16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,125.196766,125.467163
2804,2020-01-30 22:33:00,124.14,125.21,124.00,125.21,7467,865,124.6410,22:33:00,2020-01-30,1580437980,1.11,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,125.199413,125.442671
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61140,2021-07-22 23:01:00,650.15,650.15,649.46,649.47,197,121,649.8580,23:01:00,2021-07-22,1627009260,1.13,0,0,0,0,0,0,0,0,0,-100,0,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,650.351375,651.411332
61166,2021-07-22 23:27:00,647.46,647.46,646.88,646.88,319,147,647.0280,23:27:00,2021-07-22,1627010820,1.18,0,0,0,0,0,0,0,0,0,-100,0,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,647.574782,648.103738
61213,2021-07-23 21:35:00,642.20,643.27,641.56,642.42,542,271,642.2920,21:35:00,2021-07-23,1627090500,1.29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,-100,0,0,0,0,644.313866,645.639351
61218,2021-07-23 21:40:00,643.27,644.37,642.89,643.22,651,279,643.5760,21:40:00,2021-07-23,1627090800,1.58,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,643.502611,644.649852


In [274]:
df

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,t+1,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,9EMA,20EMA
0,2020-01-03 21:15:00,85.20,85.20,85.19,85.19,35,3,85.1966,21:15:00,2020-01-03,1578100500,0.57,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
1,2020-01-03 21:16:00,85.19,86.06,85.16,85.76,766,87,85.4698,21:16:00,2020-01-03,1578100560,-0.28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
2,2020-01-03 21:17:00,85.73,85.76,85.45,85.48,66,11,85.5576,21:17:00,2020-01-03,1578100620,0.69,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
3,2020-01-03 21:18:00,85.49,86.30,85.41,86.17,628,66,85.9854,21:18:00,2020-01-03,1578100680,0.06,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
4,2020-01-03 21:19:00,86.19,86.44,85.92,86.23,676,79,86.2232,21:19:00,2020-01-03,1578100740,0.07,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61347,2021-07-23 23:55:00,639.41,640.21,639.36,639.80,179,89,639.7720,23:55:00,2021-07-23,1627098900,0.34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,639.614960,639.901831
61348,2021-07-23 23:56:00,639.91,640.18,639.87,640.14,87,51,640.0350,23:56:00,2021-07-23,1627098960,-0.33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,639.719968,639.924513
61349,2021-07-23 23:57:00,640.10,640.29,639.69,639.81,85,57,640.0040,23:57:00,2021-07-23,1627099020,-0.18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,639.737974,639.913607
61350,2021-07-23 23:58:00,639.80,640.01,639.58,639.63,88,69,639.8170,23:58:00,2021-07-23,1627099080,-0.27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,639.716380,639.886597


In [273]:
data_all

Unnamed: 0,cdl_pattern,date
0,CDL2CROWS | CDL3BLACKCROWS | CDL3INSIDE | CDL3LINESTRIKE | CDL3OUTSIDE | CDL3STARSINSOUTH | CDL3WHITESOLDIERS | CDLABANDONEDBABY | CDLADVANCEBLOCK | CDLBELTHOLD | CDLBREAKAWAY | CDLCLOSINGMARUBOZU | CDLCONCEALBABYSWALL | CDLCOUNTERATTACK | CDLDARKCLOUDCOVER | CDLDOJI | CDLDOJISTAR | CDLDRAGONFLYDOJI | CDLENGULFING | CDLEVENINGDOJISTAR | CDLEVENINGSTAR | CDLGAPSIDESIDEWHITE | CDLGRAVESTONEDOJI | CDLHAMMER | CDLHANGINGMAN | CDLHARAMI | CDLHARAMICROSS | CDLHIGHWAVE | CDLHIKKAKE | CDLHIKKAKEMOD | CDLHOMINGPIGEON | CDLIDENTICAL3CROWS | CDLINNECK | CDLINVERTEDHAMMER | CDLKICKING | CDLKICKINGBYLENGTH | CDLLADDERBOTTOM | CDLLONGLEGGEDDOJI | CDLLONGLINE | CDLMARUBOZU | CDLMATCHINGLOW | CDLMATHOLD | CDLMORNINGDOJISTAR | CDLMORNINGSTAR | CDLONNECK | CDLPIERCING | CDLRICKSHAWMAN | CDLRISEFALL3METHODS | CDLSEPARATINGLINES | CDLSHOOTINGSTAR | CDLSHORTLINE | CDLSPINNINGTOP | CDLSTALLEDPATTERN | CDLSTICKSANDWICH | CDLTAKURI | CDLTASUKIGAP | CDLTHRUSTING | CDLTRISTAR | CDLUNIQUE3RIVER | CDLUPSIDEGAP2CROWS | CDLXSIDEGAP3METHODS,61352


In [259]:
all_candles = df.copy()

In [260]:
all_candles

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,t+1,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,9EMA,20EMA
0,2020-01-03 21:15:00,85.20,85.20,85.19,85.19,35,3,85.1966,21:15:00,2020-01-03,1578100500,0.57,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
1,2020-01-03 21:16:00,85.19,86.06,85.16,85.76,766,87,85.4698,21:16:00,2020-01-03,1578100560,-0.28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
2,2020-01-03 21:17:00,85.73,85.76,85.45,85.48,66,11,85.5576,21:17:00,2020-01-03,1578100620,0.69,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
3,2020-01-03 21:18:00,85.49,86.30,85.41,86.17,628,66,85.9854,21:18:00,2020-01-03,1578100680,0.06,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
4,2020-01-03 21:19:00,86.19,86.44,85.92,86.23,676,79,86.2232,21:19:00,2020-01-03,1578100740,0.07,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61347,2021-07-23 23:55:00,639.41,640.21,639.36,639.80,179,89,639.7720,23:55:00,2021-07-23,1627098900,0.34,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,639.614960,639.901831
61348,2021-07-23 23:56:00,639.91,640.18,639.87,640.14,87,51,640.0350,23:56:00,2021-07-23,1627098960,-0.33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,639.719968,639.924513
61349,2021-07-23 23:57:00,640.10,640.29,639.69,639.81,85,57,640.0040,23:57:00,2021-07-23,1627099020,-0.18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,639.737974,639.913607
61350,2021-07-23 23:58:00,639.80,640.01,639.58,639.63,88,69,639.8170,23:58:00,2021-07-23,1627099080,-0.27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,639.716380,639.886597


In [218]:
long_df

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,t+1,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,9EMA,20EMA
2080,2020-01-23 22:53:00,115.06,115.37,114.54,115.24,5123,584,114.9234,22:53:00,2020-01-23,1579834380,1.16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,114.559226,113.825110
2357,2020-01-27 22:32:00,108.58,108.80,108.04,108.57,2656,336,108.4194,22:32:00,2020-01-27,1580178720,1.22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,-100,-100,-100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,108.489392,108.495895
2798,2020-01-30 22:27:00,125.86,126.00,125.80,125.87,349,39,125.8920,22:27:00,2020-01-30,1580437620,1.13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,-100,-100,100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,100,0,0,0,0,0,0,0,0,0,125.594379,125.652279
2803,2020-01-30 22:32:00,124.56,124.58,123.60,124.05,10937,1210,124.0896,22:32:00,2020-01-30,1580437920,1.16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,125.196766,125.467163
2804,2020-01-30 22:33:00,124.14,125.21,124.00,125.21,7467,865,124.6410,22:33:00,2020-01-30,1580437980,1.11,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,125.199413,125.442671
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61140,2021-07-22 23:01:00,650.15,650.15,649.46,649.47,197,121,649.8580,23:01:00,2021-07-22,1627009260,1.13,0,0,0,0,0,0,0,0,0,-100,0,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,650.351375,651.411332
61166,2021-07-22 23:27:00,647.46,647.46,646.88,646.88,319,147,647.0280,23:27:00,2021-07-22,1627010820,1.18,0,0,0,0,0,0,0,0,0,-100,0,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,647.574782,648.103738
61213,2021-07-23 21:35:00,642.20,643.27,641.56,642.42,542,271,642.2920,21:35:00,2021-07-23,1627090500,1.29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,-100,0,0,0,0,644.313866,645.639351
61218,2021-07-23 21:40:00,643.27,644.37,642.89,643.22,651,279,643.5760,21:40:00,2021-07-23,1627090800,1.58,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,643.502611,644.649852


In [209]:
import plotly.express as px
fig = px.histogram(long_df, x="t+1")
fig.show()

In [138]:
data = data_util.pre_data_process(data)

In [139]:
# Generate candle stick patterns for given input
data = cdl_pattern.generate_pattern(data)

In [129]:
data

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,9EMA,20EMA
0,2021-07-26 21:15:00,649.0,649.0,648.5,648.5,6,12,648.878,21:15:00,2021-07-26,1627348500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
1,2021-07-26 21:16:00,649.45,649.45,649.2,649.2,5,9,649.252,21:16:00,2021-07-26,1627348560,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
2,2021-07-26 21:17:00,649.2,649.2,649.2,649.2,0,0,649.2,21:17:00,2021-07-26,1627348620,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
3,2021-07-26 21:18:00,649.3,649.5,649.3,649.5,4,4,649.438,21:18:00,2021-07-26,1627348680,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
4,2021-07-26 21:19:00,649.96,649.96,649.75,649.75,4,4,649.908,21:19:00,2021-07-26,1627348740,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
5,2021-07-26 21:20:00,649.99,649.99,649.77,649.78,3,4,649.872,21:20:00,2021-07-26,1627348800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
6,2021-07-26 21:21:00,649.92,650.46,649.92,650.03,8,12,650.08,21:21:00,2021-07-26,1627348860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
7,2021-07-26 21:22:00,649.98,650.46,649.52,649.52,10,10,650.143,21:22:00,2021-07-26,1627348920,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,
8,2021-07-26 21:23:00,650.2,650.2,650.2,650.2,1,1,650.2,21:23:00,2021-07-26,1627348980,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,649.52,
9,2021-07-26 21:24:00,649.99,649.99,649.99,649.99,1,1,649.99,21:24:00,2021-07-26,1627349040,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,649.614,


In [159]:
data['rule'] = (data['CDLENGULFING'] >= 100) & (data['CDLMORNINGSTAR'] >= 100) & (data['CDLMORNINGDOJISTAR'] >= 100)

In [160]:
data[data['rule']]

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,9EMA,20EMA,spintop_check,gravestonedoji_check,gravestonedoji_check - 1,rule
15694,2020-04-07 21:34:00,107.2,107.88,107.2,107.79,3736,461,107.5064,21:34:00,2020-04-07,1586309640,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,107.877275,107.16904,False,False,False,True
16777,2020-04-13 23:07:00,119.75,120.15,119.75,120.05,1383,141,119.9904,23:07:00,2020-04-13,1586833620,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,120.069436,120.021203,False,False,False,True
21848,2020-05-14 22:23:00,153.9,154.4,153.8,154.27,1051,105,154.123,22:23:00,2020-05-14,1589509380,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,100,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,154.397906,154.628105,False,False,False,True
25239,2020-06-03 23:54:00,177.46,177.7,177.46,177.63,490,53,177.5996,23:54:00,2020-06-03,1591242840,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,177.748752,177.913727,False,False,True,True
27649,2020-06-18 22:49:00,201.37,201.8,201.33,201.77,882,114,201.6162,22:49:00,2020-06-18,1592534940,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,201.272375,201.114861,False,False,False,True
33260,2020-07-22 22:50:00,314.58,315.35,314.05,315.25,1126,112,314.863,22:50:00,2020-07-22,1595472600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,316.078397,317.022614,False,False,False,True
37727,2020-08-18 23:02:00,380.56,381.22,380.56,381.22,871,112,380.9246,23:02:00,2020-08-18,1597806120,0,0,0,0,0,0,0,0,0,100,0,100,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,381.39911,380.870715,False,False,False,True
41167,2020-09-08 22:37:00,356.76,359.22,356.07,358.77,2319,1055,358.132,22:37:00,2020-09-08,1599619020,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,360.8747,361.128092,False,False,False,True
44757,2020-09-30 21:57:00,428.05,429.34,428.01,428.82,1676,729,428.776,21:57:00,2020-09-30,1601517420,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,427.53082,426.462422,False,False,False,True
44931,2020-10-01 22:06:00,437.27,438.0,437.19,437.81,819,372,437.713,22:06:00,2020-10-01,1601604360,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,437.564883,437.712968,False,False,False,True


In [140]:
data['spintop_check'] = data['CDLSPINNINGTOP'] >= 100
data['gravestonedoji_check'] = data['CDLGRAVESTONEDOJI'] >= 100

In [141]:
data['gravestonedoji_check  - 1'] = data['gravestonedoji_check'].shift(1)

In [142]:
data[data[['spintop_check', 'gravestonedoji_check  - 1']].all(axis='columns')]

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,9EMA,20EMA,spintop_check,gravestonedoji_check,gravestonedoji_check - 1
136,2020-01-03 23:31:00,89.87,89.9,89.8,89.88,1732,187,89.8464,23:31:00,2020-01-03,1578108660,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,100,0,0,0,0,0,0,0,0,0,89.789299,89.651408,True,False,True
301,2020-01-03 23:31:00,89.87,89.9,89.8,89.88,1732,187,89.8464,23:31:00,2020-01-03,1578108660,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,100,0,0,0,0,0,0,0,0,0,89.789299,89.651414,True,False,True
466,2020-01-03 23:31:00,89.87,89.9,89.8,89.88,1732,187,89.8464,23:31:00,2020-01-03,1578108660,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,100,0,0,0,0,0,0,0,0,0,89.789299,89.651414,True,False,True
909,2020-01-08 22:39:00,94.33,94.46,94.2,94.36,3837,452,94.3454,22:39:00,2020-01-08,1578537540,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,100,0,0,0,0,0,0,0,0,0,94.53958,94.653769,True,False,True
1830,2020-01-14 21:30:00,108.2,108.22,108.16,108.2,96,13,108.2058,21:30:00,2020-01-14,1579051800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,100,100,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,100,0,0,0,0,100,0,0,0,0,0,0,0,0,0,108.037536,106.784408,True,False,True
2188,2020-01-16 21:58:00,99.4,99.42,99.31,99.4,409,41,99.4032,21:58:00,2020-01-16,1579226280,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,99.375958,99.500184,True,False,True
3014,2020-01-21 21:59:00,104.6,104.65,104.58,104.61,104,13,104.6068,21:59:00,2020-01-21,1579658340,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,104.629426,104.619275,True,False,True
3613,2020-01-24 23:43:00,112.57,112.74,112.54,112.59,831,69,112.664,23:43:00,2020-01-24,1579923780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,112.70135,112.794424,True,False,True
3778,2020-01-24 23:43:00,112.57,112.74,112.54,112.59,831,69,112.664,23:43:00,2020-01-24,1579923780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,112.70135,112.794423,True,False,True
3943,2020-01-24 23:43:00,112.57,112.74,112.54,112.59,831,69,112.664,23:43:00,2020-01-24,1579923780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,112.70135,112.794423,True,False,True


## Find the most frequest pattern

In [122]:
# Move the columns to rows
candles = data.drop(['open', 'high', 'low', 'close', 'volume', 'barCount', 'average','time', 'date_str', 'date_epoch'], axis=1)
candles = candles.melt(id_vars=['date'],
                       var_name="cdl_pattern",
                       value_name="pattern_check")

In [123]:
candles['spintop_check'] = (candles['cdl_pattern'] == 'CDLSPINNINGTOP') & (candles['pattern_check'] >= 100)

In [128]:
candles.head(300)

Unnamed: 0,date,cdl_pattern,pattern_check,spintop_check,gravestonedoji_check,gravestonedoji_check - 1
0,2021-07-26 21:15:00,CDL2CROWS,0.0,False,False,
1,2021-07-26 21:16:00,CDL2CROWS,0.0,False,False,False
2,2021-07-26 21:17:00,CDL2CROWS,0.0,False,False,False
3,2021-07-26 21:18:00,CDL2CROWS,0.0,False,False,False
4,2021-07-26 21:19:00,CDL2CROWS,0.0,False,False,False
5,2021-07-26 21:20:00,CDL2CROWS,0.0,False,False,False
6,2021-07-26 21:21:00,CDL2CROWS,0.0,False,False,False
7,2021-07-26 21:22:00,CDL2CROWS,0.0,False,False,False
8,2021-07-26 21:23:00,CDL2CROWS,0.0,False,False,False
9,2021-07-26 21:24:00,CDL2CROWS,0.0,False,False,False


In [126]:
candles[candles['spintop_check']]

Unnamed: 0,date,cdl_pattern,pattern_check,spintop_check,gravestonedoji_check,gravestonedoji_check - 1
8434,2021-07-26 21:34:00,CDLSPINNINGTOP,100.0,True,False,False
8445,2021-07-26 21:45:00,CDLSPINNINGTOP,100.0,True,False,False
8465,2021-07-26 22:05:00,CDLSPINNINGTOP,100.0,True,False,False
8470,2021-07-26 22:10:00,CDLSPINNINGTOP,100.0,True,False,False
8479,2021-07-26 22:19:00,CDLSPINNINGTOP,100.0,True,False,False
8480,2021-07-26 22:20:00,CDLSPINNINGTOP,100.0,True,False,False
8494,2021-07-26 22:34:00,CDLSPINNINGTOP,100.0,True,False,False
8498,2021-07-26 22:38:00,CDLSPINNINGTOP,100.0,True,False,False
8539,2021-07-26 23:19:00,CDLSPINNINGTOP,100.0,True,False,False
8547,2021-07-26 23:27:00,CDLSPINNINGTOP,100.0,True,False,False


In [124]:
candles['gravestonedoji_check'] = (candles['cdl_pattern'] == 'CDLGRAVESTONEDOJI') & (candles['pattern_check'] >= 100)

In [118]:
candles[candles['gravestonedoji_check']]

Unnamed: 0,date,cdl_pattern,pattern_check,spintop_check,gravestonedoji_check,gravestonedoji_check - 1,gravestonedoji_check - 2
3654,2021-07-26 21:39:00,CDLGRAVESTONEDOJI,100.0,False,True,False,False
3679,2021-07-26 22:04:00,CDLGRAVESTONEDOJI,100.0,False,True,False,False
3716,2021-07-26 22:41:00,CDLGRAVESTONEDOJI,100.0,False,True,False,False
3785,2021-07-26 23:50:00,CDLGRAVESTONEDOJI,100.0,False,True,False,False


In [125]:
candles['gravestonedoji_check  - 1'] = candles['gravestonedoji_check'].shift(1)

In [117]:
candles[candles[['spintop_check', 'gravestonedoji_check  - 1']].all(axis='columns')]

Unnamed: 0,date,cdl_pattern,pattern_check,spintop_check,gravestonedoji_check,gravestonedoji_check - 1,gravestonedoji_check - 2


In [12]:
candles = candles[candles['pattern_check'] != 0]

In [11]:
candles = candles.groupby(['cdl_pattern'])['date'].nunique().reset_index(name='pattern_count').sort_values(['pattern_count'], ascending=False)

In [12]:
candles.to_csv('insights/tsla_candle_patterns.csv', index=False)

## Find all trading days that has specific pattern

In [166]:
data_for_visual = source.copy()
cdl_pat = 'CDLENGULFING'
list_cdls = ['CDLENGULFING', 'CDLMORNINGSTAR', 'CDLMORNINGDOJISTAR']
cdl_pattern = CandleStick(list_cdls)
data_for_visual = cdl_pattern.generate_pattern(data_for_visual)

In [167]:
# Find the days which has selected pattern by order
days_with_patterns = data_for_visual[data_for_visual[list_cdls].all(axis='columns')]['date_str']
days_with_patterns = days_with_patterns.value_counts().reset_index(name='count')

In [168]:
days_with_patterns.sort_values(['index'], ascending=False)

Unnamed: 0,index,count
24,2021-07-13,1
0,2021-07-02,4
2,2021-06-25,3
9,2021-06-16,1
25,2021-06-01,1
7,2021-05-26,1
1,2021-05-07,3
8,2021-05-04,1
6,2021-04-27,1
11,2021-03-29,1


In [169]:
day = days_with_patterns.iloc[1]['index']

In [170]:
date = '2021-07-13'

In [171]:
# Filter data by day
df = data_for_visual[data_for_visual['date_str'] == date]

In [172]:
# Filter data for annotation
filtered = df[df[cdl_pat].astype(bool)]

In [173]:
matched_times = df[df[cdl_pat].astype(bool)]['time']

In [174]:
def find_cld_name(x):
    return str(df[df['time'] == x].iloc[0][cdl_pat])

def find_y_axis(x):
    return df[df['time'] == x].iloc[0]['high']



# Draw the lines for reference
shapes_list = []
annotation_list = []
for time in matched_times:
    annotation_list.append(go.layout.Annotation(x=time, y=find_y_axis(time), 
                                                showarrow=True,  arrowhead=1, 
                                                arrowcolor="purple", arrowsize=2, arrowwidth=2, text=find_cld_name(time)))

In [175]:
fig = go.Figure()

fig.add_trace(go.Candlestick(x=df['time'],
        open=df['open'],
        high=df['high'],
        low=df['low'],
        close=df['close'],
        text=df['close']))

fig.update_layout(
    title=f'Analysis on {date}',
    yaxis_title='Price',
    annotations=annotation_list,
    xaxis=go.layout.XAxis(rangeslider=dict (visible = False))
)

import plotly.io as pio
pio.renderers.default = 'browser'
pio.show(fig)
# fig.show()

In [82]:
data_for_visual.head()

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,CDLBELTHOLD
0,2020-01-03 21:00:00,84.88,85.33,84.55,85.2,579,60,84.9156,21:00:00,2020-01-03,1578099600,0
1,2020-01-03 21:01:00,85.15,85.19,85.15,85.16,45,7,85.1598,21:01:00,2020-01-03,1578099660,0
2,2020-01-03 21:02:00,85.16,85.16,85.16,85.16,20,3,85.16,21:02:00,2020-01-03,1578099720,0
3,2020-01-03 21:03:00,85.12,85.15,85.1,85.1,20,4,85.121,21:03:00,2020-01-03,1578099780,0
4,2020-01-03 21:04:00,85.11,85.11,85.11,85.11,10,2,85.11,21:04:00,2020-01-03,1578099840,0


In [11]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch
0,2020-01-03 21:00:00,84.88,85.33,84.55,85.2,579,60,84.9156,21:00:00,2020-01-03,1578099600
1,2020-01-03 21:01:00,85.15,85.19,85.15,85.16,45,7,85.1598,21:01:00,2020-01-03,1578099660
2,2020-01-03 21:02:00,85.16,85.16,85.16,85.16,20,3,85.16,21:02:00,2020-01-03,1578099720
3,2020-01-03 21:03:00,85.12,85.15,85.1,85.1,20,4,85.121,21:03:00,2020-01-03,1578099780
4,2020-01-03 21:04:00,85.11,85.11,85.11,85.11,10,2,85.11,21:04:00,2020-01-03,1578099840


In [12]:
# Find all the dates 
dates = df['date_str'].unique()

In [13]:
def find_direction(t1, t2, t3, t4):
    """
    Find the next candle move up or down based on the identified pattern
    """
    if (t1 > 0) and (t2 > 0):
        return 2
    elif (t1 < 0)  and (t2 < 0):
        return 1
    else:
        return 0

"""
Find the next candles moves based on daily basis. Because If we don't process this by day. Then the next day candles may
create bias in the process
"""
generated_direction_by_day = []
for date in tqdm(dates):
    tmp_data = df[df['date_str'] == date].copy()
    tmp_data = tmp_data.sort_values(['date'])
    
    # Find the close price on another n time stamps, the timestamp may vary based on time frame (e.g. 1min, 2 min, 5 min)
    # This will help to understand the price movement up or down and by price
    # Change in price + or -
    tmp_data['t+1'] = tmp_data['close'].shift(-1) - tmp_data['close']
    tmp_data['t+2'] = tmp_data['close'].shift(-2) - tmp_data['close'].shift(-1)
    tmp_data['t+3'] = tmp_data['close'].shift(-3) - tmp_data['close'].shift(-2)
    tmp_data['t+4'] = tmp_data['close'].shift(-4) - tmp_data['close'].shift(-3)
    tmp_data['t+5'] = tmp_data['close'].shift(-5) - tmp_data['close'].shift(-4)
    
    # Find the direction based on 3 candles
    tmp_data['price_direction'] = tmp_data.apply(lambda x: find_direction(x['t+1'], x['t+2'], x['t+3'], x['t+4']), axis=1)
    
    generated_direction_by_day.append(tmp_data)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 386/386 [00:05<00:00, 67.67it/s]


In [14]:
# Combine processed data
df  = pd.concat(generated_direction_by_day).reset_index(drop=True)

In [15]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,t+1,t+2,t+3,t+4,t+5,price_direction
0,2020-01-03 21:00:00,84.88,85.33,84.55,85.2,579,60,84.9156,21:00:00,2020-01-03,1578099600,0.0,0.0,-0.04,0.0,0.0,0
1,2020-01-03 21:00:00,84.88,85.33,84.55,85.2,579,60,84.9156,21:00:00,2020-01-03,1578099600,0.0,-0.04,0.0,0.0,0.0,0
2,2020-01-03 21:00:00,84.88,85.33,84.55,85.2,579,60,84.9156,21:00:00,2020-01-03,1578099600,-0.04,0.0,0.0,0.0,0.0,0
3,2020-01-03 21:01:00,85.15,85.19,85.15,85.16,45,7,85.1598,21:01:00,2020-01-03,1578099660,0.0,0.0,0.0,0.0,0.0,0
4,2020-01-03 21:01:00,85.15,85.19,85.15,85.16,45,7,85.1598,21:01:00,2020-01-03,1578099660,0.0,0.0,0.0,0.0,-0.06,0


In [16]:
# Generate candle stick patterns for given input
patterns = cdl_pattern.generate_pattern(df)

In [17]:
# Drop column which are not relevatn to the Prediction
patterns.drop(['date','open', 'high', 'low', 'close', 'volume', 'barCount', 'average',
       'time', 'date_str', 'date_epoch', 't+1', 't+2', 't+3', 't+4', 't+5'], axis=1, inplace=True)

In [18]:
# Filter by rows which contains atleast 1 found pattern
patterns['contains'] = patterns[list_cdls].any(axis='columns')
patterns = patterns[patterns['contains']].copy()
patterns.drop(['contains'], axis=1, inplace=True)

In [19]:
valid_classes = patterns[patterns['price_direction'].isin([1, 2])].copy()

In [20]:
other = patterns[patterns['price_direction'] == 0].copy()

In [21]:
other = other.sample(2800)

In [22]:
patterns= pd.concat([valid_classes, other]).reset_index(drop=True)

In [23]:
patterns['price_direction'].value_counts()

1    10115
2     9950
0     2800
Name: price_direction, dtype: int64

## Prepare model

In [24]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier

In [25]:
from matplotlib import pyplot

In [26]:
X = patterns.drop('price_direction', axis=1).values
y = patterns['price_direction'].values

In [27]:
scalar = preprocessing.StandardScaler().fit(X)
with open('models/scalar.pickle', 'wb') as handle:
    pickle.dump(scalar, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [28]:
X_scaled = scalar.transform(X)

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

In [30]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [31]:
"""
Model Grid Search
"""
param_grid = {'max_features': ['auto', 'sqrt', 'log2'],
              'ccp_alpha': [0.1, .01, .001],
              'max_depth' : [5, 6, 7, 8, 9],
              'criterion' :['gini', 'entropy']
             }

tree_clas = DecisionTreeClassifier(random_state=1024)

grid_search = GridSearchCV(estimator=tree_clas, param_grid=param_grid, cv=5, verbose=True)

grid_search.fit(X_train, y_train)

final_model = grid_search.best_estimator_
final_model

Fitting 5 folds for each of 90 candidates, totalling 450 fits


DecisionTreeClassifier(ccp_alpha=0.001, criterion='entropy', max_depth=7,
                       max_features='auto', random_state=1024)

In [36]:
tree_clas = DecisionTreeClassifier(ccp_alpha=0.001, class_weight=None, criterion='entropy',
                       max_depth=7, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, random_state=1024, splitter='best')
tree_clas.fit(X_train, y_train)
y_predict = tree_clas.predict(X_test)

In [37]:
list(tree_clas.predict(X_train)).count(1)

13382

In [38]:
with open('models/decision_tree.pickle', 'wb') as handle:
    pickle.dump(tree_clas, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Model performance

In [39]:
accuracy_score(y_test, y_predict)

0.4550755367081898

# Creation of New Predictions

In [40]:
import pandas as pd

In [87]:
df = sing_day_data('data/STK/1_min/TSLA/20210503.csv')

In [41]:
df = pd.read_csv(f'realtime_data/data.csv', names=['date', 'open', 'high', 'low', 'close', 'volume', 'barCount', 'average'])
df = data_util.pre_data_process(df)

In [42]:
data_patterns = cdl_pattern.generate_pattern(df)

In [43]:
input_data = data_patterns[list_cdls]

In [44]:
input_data = input_data[input_data.any(axis='columns')]

In [45]:
input_data = input_data.values

In [46]:
input_data = scalar.transform(input_data)

In [47]:
output = tree_clas.predict(input_data)

In [48]:
from src.api_call.simulate_api_call import Simulator

In [49]:
sim_obj = Simulator()

In [50]:
sim_obj.get_prediction_data()

array([[0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.13140676, 0.38604144, 0.4825518 ],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.12188789, 0.44531847, 0.43279365],
       [0.

In [105]:
X_new = patterns.iloc[0]

In [108]:
X_new = X_new.drop('price_direction')

In [110]:
X_new = X_new.values

In [112]:
X_new_scale = scaler.transform([X_new])

In [115]:
tree_clas.predict_proba(X_new_scale)

array([[0.89303661, 0.06389088, 0.04307251]])

In [51]:
importance = tree.coef_[0]
# summarize feature importance
for i,v in enumerate(importance):
    print('Feature: %0d, Score: %.5f' % (i,v))
# plot feature importance
pyplot.bar([x for x in range(len(importance))], importance)
pyplot.show()

AttributeError: 'DecisionTreeClassifier' object has no attribute 'coef_'

In [None]:
score = pd.DataFrame({'name': list_cdls, 'score': importance})

In [None]:
score.sort_values(['score'], ascending=False, inplace=True)

In [None]:
list_cdls = list(score[score['score'] > 0.1]['name'].values)

In [None]:
patterns.drop_duplicates().shape

In [None]:
# Move the columns to rows
candles = patterns.melt(id_vars=['date', 'price_direction'],
                       var_name="cdl_pattern",
                       value_name="pattern_check")

In [None]:
candles.head()

In [None]:
candles_selected = candles[candles['pattern_check'] != 0]

In [None]:
candles_selected = candles_selected.groupby(['date', 'price_direction'])['cdl_pattern'].unique().reset_index()

In [None]:
candles_selected['cdl_pattern'] = candles_selected['cdl_pattern'].apply(lambda x: ' | '.join(sorted(x)))

In [None]:
candles_selected = candles_selected.groupby(['cdl_pattern', 'price_direction'])['date'].nunique().reset_index()

In [None]:
candles_selected.sort_values(['date'], ascending=False, inplace=True)

In [None]:
candles_selected[candles_selected['price_direction'] != -1]

In [None]:
candles

In [None]:
one_pattern = candles[(candles['cdl_pattern'] == 'CDLDRAGONFLYDOJI') & (candles['pattern_check'] != 0)]

In [None]:
one_pattern = one_pattern['price_direction'].value_counts().reset_index()

In [None]:
total = one_pattern['price_direction'].sum()

In [None]:
one_pattern['frac'] = one_pattern['price_direction']/total

In [None]:
one_pattern

In [None]:
# Find the close price on another n time stamps, the timestamp may vary based on time frame (e.g. 1min, 2 min, 5 min)
# This will help to understand the price movement up or down and by price
# Change in price + or -
# Calculate the close price difference and find the percentage of price change
# df['t+1'] = (((df['close'].shift(-1) - df['close'])/df['close']) * 100).round(2)
# df['t+2'] = (((df['close'].shift(-2) - df['close'])/df['close']) * 100).round(2)
# df['t+3'] = (((df['close'].shift(-3) - df['close'])/df['close']) * 100).round(2)
# df['t+4'] = (((df['close'].shift(-4) - df['close'])/df['close']) * 100).round(2)

# Generate Candle Names

In [None]:
import talib

In [None]:
# This provide the previous day night and current day mid morning day
day = '20210601'
path = f'data/STK/1_min/TSLA/{day}.csv'
df = sing_day_data(path)

In [None]:
all_cdl_patterns = talib.get_function_groups()['Pattern Recognition']

In [None]:
# patterns = ['CDLSHORTLINE']

patterns = ['CDLDRAGONFLYDOJI']

In [None]:
for pattern in patterns:
    df[pattern] = getattr(talib, pattern)(df['open'], df['high'], df['low'], df['close'])

In [None]:
df[df[patterns].any(axis='columns')]

In [None]:
filtered = df[df['CDLDRAGONFLYDOJI'].astype(bool)]

In [None]:
matched_times = df[df['CDLDRAGONFLYDOJI'].astype(bool)]['time']

In [None]:
# Draw the lines for reference
shapes_list = []
annotation_list = []
for time in matched_times:
    shapes_list.append(dict(x0=time, x1=time, y0=0, y1=1, xref='x', yref='paper', line_width=1))
    annotation_list.append(dict(x=time, y=0.05, xref='x', yref='paper', showarrow=False, xanchor='left', text='CDLSHORTLINE'))

# Candle Chart
Draw for single day regarless of dates

In [None]:
fig = go.Figure()

fig.add_trace(go.Candlestick(x=df['time'],
        open=df['open'],
        high=df['high'],
        low=df['low'],
        close=df['close'],
        text=df['close']))

fig.update_layout(
    title=f'Analysis on {day}',
    yaxis_title='Price',
    shapes = shapes_list,
    annotations=annotation_list
)

import plotly.io as pio
pio.renderers.default = 'browser'
pio.show(fig)

# Line Chart

In [None]:
line_data = df[['date', 'time' ,'average', 'open']].copy()

In [None]:
# Find the min open price
min_open_price = line_data['open'].min()

In [None]:
# To bring all open price to 0, subtract open price from average
line_data['avg_nor'] = line_data['average'] - line_data['open']

In [None]:
dates = sorted(line_data['date'].unique())

In [None]:
fig = go.Figure()

for date in dates:
    data = line_data[line_data['date'] == date]
    fig.add_trace(go.Scatter(x=data['time'], y=data['avg_nor'], name = date))

import plotly.io as pio
pio.renderers.default = 'browser'
pio.show(fig)