In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import talib
import pandas as pd
from datetime import datetime
import plotly.graph_objects as go
from tqdm import tqdm
from glob import glob

import plotly
import plotly.offline as py
import plotly.graph_objs as go

# My libs
from src.utils.data_util import DataUtil
from src.strategies.candle_stick import CandleStick

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.expand_frame_repr', False)

In [2]:
data_util = DataUtil()
# Pass all patterns
cdl_pattern = CandleStick(talib.get_function_groups()['Pattern Recognition'])

## Important patterns
* CDL3OUTSIDE : This may not work in resistance and support levels. such as ema, vwap
* CDLGAPSIDESIDEWHITE: Very nice move 2 times.
* CDL3INSIDE
* CDLHARAMI: Bearish days Harami (-) has pretty strong move on down side
* DRAGONFLYDOJI: Looks quite a nice reversal pattern. It has a huge moves

In [3]:
print(talib.get_function_groups()['Pattern Recognition'])

['CDL2CROWS', 'CDL3BLACKCROWS', 'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3STARSINSOUTH', 'CDL3WHITESOLDIERS', 'CDLABANDONEDBABY', 'CDLADVANCEBLOCK', 'CDLBELTHOLD', 'CDLBREAKAWAY', 'CDLCLOSINGMARUBOZU', 'CDLCONCEALBABYSWALL', 'CDLCOUNTERATTACK', 'CDLDARKCLOUDCOVER', 'CDLDOJI', 'CDLDOJISTAR', 'CDLDRAGONFLYDOJI', 'CDLENGULFING', 'CDLEVENINGDOJISTAR', 'CDLEVENINGSTAR', 'CDLGAPSIDESIDEWHITE', 'CDLGRAVESTONEDOJI', 'CDLHAMMER', 'CDLHANGINGMAN', 'CDLHARAMI', 'CDLHARAMICROSS', 'CDLHIGHWAVE', 'CDLHIKKAKE', 'CDLHIKKAKEMOD', 'CDLHOMINGPIGEON', 'CDLIDENTICAL3CROWS', 'CDLINNECK', 'CDLINVERTEDHAMMER', 'CDLKICKING', 'CDLKICKINGBYLENGTH', 'CDLLADDERBOTTOM', 'CDLLONGLEGGEDDOJI', 'CDLLONGLINE', 'CDLMARUBOZU', 'CDLMATCHINGLOW', 'CDLMATHOLD', 'CDLMORNINGDOJISTAR', 'CDLMORNINGSTAR', 'CDLONNECK', 'CDLPIERCING', 'CDLRICKSHAWMAN', 'CDLRISEFALL3METHODS', 'CDLSEPARATINGLINES', 'CDLSHOOTINGSTAR', 'CDLSHORTLINE', 'CDLSPINNINGTOP', 'CDLSTALLEDPATTERN', 'CDLSTICKSANDWICH', 'CDLTAKURI', 'CDLTASUKIGAP', 'CDL

# Single Day Analysis

In [146]:
def sing_day_data(file_path):
    df = pd.read_csv(file_path)
    df = data_util.pre_data_process(df)
    return df

In [147]:
source = sing_day_data(path)

In [148]:
# Load all the TF files
all_files = glob('data/STK/1_min/TSLA/*')

source_lst = []
for path in tqdm(all_files):
    source_lst.append(sing_day_data(path))

source = pd.concat(source_lst).reset_index(drop=True)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:04<00:00, 16.93it/s]


In [149]:
df = source.copy()

In [150]:
# Find the close price on another n time stamps, the timestamp may vary based on time frame (e.g. 1min, 2 min, 5 min)
# This will help to understand the price movement up or down and by price
# Change in price + or -
df['t+1'] = df['close'].shift(-1) - df['close']
df['t+2'] = df['close'].shift(-2) - df['close'].shift(-1)
df['t+3'] = df['close'].shift(-3) - df['close'].shift(-2)
df['t+4'] = df['close'].shift(-4) - df['close'].shift(-3)
df['t+5'] = df['close'].shift(-5) - df['close'].shift(-4)

In [151]:
def find_direction(t1, t2, t3, t4):
    if (t1 > 0):
        return 1
    elif (t1 < 0):
        return 0
    else:
        return -1

In [152]:
# Find the direction based on 3 candles
df['price_direction'] = df.apply(lambda x: find_direction(x['t+1'], x['t+2'], x['t+3'], x['t+4']), axis=1)

In [153]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,t+1,t+2,t+3,t+4,t+5,price_direction
0,2021-04-30 21:00:00,664.7,664.7,664.11,664.11,14,8,664.399,21:00:00,2021-04-30,1619830800,0.39,0.6,-0.1,0.0,-0.44,1
1,2021-04-30 21:01:00,664.5,664.5,664.5,664.5,1,1,664.5,21:01:00,2021-04-30,1619830860,0.6,-0.1,0.0,-0.44,-0.16,1
2,2021-04-30 21:02:00,664.95,665.1,664.3,665.1,14,8,664.829,21:02:00,2021-04-30,1619830920,-0.1,0.0,-0.44,-0.16,0.05,0
3,2021-04-30 21:03:00,665.03,665.35,665.0,665.0,10,6,665.053,21:03:00,2021-04-30,1619830980,0.0,-0.44,-0.16,0.05,-0.2,-1
4,2021-04-30 21:04:00,664.5,665.18,664.5,665.0,3,3,664.895,21:04:00,2021-04-30,1619831040,-0.44,-0.16,0.05,-0.2,-0.25,0


In [154]:
patterns = cdl_pattern.generate_pattern(df)

In [155]:
patterns.drop(['open', 'high', 'low', 'close', 'volume', 'barCount', 'average',
       'time', 'date_str', 'date_epoch', 't+1', 't+2', 't+3', 't+4', 't+5'], axis=1, inplace=True)

In [156]:
patterns.head()

Unnamed: 0,date,price_direction,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS
0,2021-04-30 21:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2021-04-30 21:01:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2021-04-30 21:02:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2021-04-30 21:03:00,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2021-04-30 21:04:00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [157]:
# Move the columns to rows
candles = patterns.melt(id_vars=['date', 'price_direction'],
                       var_name="cdl_pattern",
                       value_name="pattern_check")

In [158]:
candles.head()

Unnamed: 0,date,price_direction,cdl_pattern,pattern_check
0,2021-04-30 21:00:00,1,CDL2CROWS,0
1,2021-04-30 21:01:00,1,CDL2CROWS,0
2,2021-04-30 21:02:00,0,CDL2CROWS,0
3,2021-04-30 21:03:00,-1,CDL2CROWS,0
4,2021-04-30 21:04:00,0,CDL2CROWS,0


In [159]:
candles_selected = candles[candles['pattern_check'] != 0]

In [160]:
candles_selected = candles_selected.groupby(['date', 'price_direction'])['cdl_pattern'].unique().reset_index()

In [161]:
candles_selected['cdl_pattern'] = candles_selected['cdl_pattern'].apply(lambda x: ' | '.join(sorted(x)))

In [162]:
candles_selected = candles_selected.groupby(['cdl_pattern', 'price_direction'])['date'].nunique().reset_index()

In [163]:
candles_selected.sort_values(['date'], ascending=False, inplace=True)

In [164]:
candles_selected

Unnamed: 0,cdl_pattern,price_direction,date
866,CDLSHORTLINE,0,329
867,CDLSHORTLINE,1,313
198,CDLBELTHOLD,0,206
229,CDLBELTHOLD | CDLCLOSINGMARUBOZU | CDLLONGLINE | CDLMARUBOZU,1,179
228,CDLBELTHOLD | CDLCLOSINGMARUBOZU | CDLLONGLINE | CDLMARUBOZU,0,172
...,...,...,...
555,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLSPINNINGTOP,0,1
554,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLRICKSHAWMAN | CDLSPINNINGTOP,1,1
553,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLRICKSHAWMAN | CDLSPINNINGTOP,0,1
552,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLRICKSHAWMAN | CDLSHORTLINE | CDLSPINNINGTOP,0,1


In [165]:
candles_selected[candles_selected['price_direction'] != -1]

Unnamed: 0,cdl_pattern,price_direction,date
866,CDLSHORTLINE,0,329
867,CDLSHORTLINE,1,313
198,CDLBELTHOLD,0,206
229,CDLBELTHOLD | CDLCLOSINGMARUBOZU | CDLLONGLINE | CDLMARUBOZU,1,179
228,CDLBELTHOLD | CDLCLOSINGMARUBOZU | CDLLONGLINE | CDLMARUBOZU,0,172
...,...,...,...
555,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLSPINNINGTOP,0,1
554,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLRICKSHAWMAN | CDLSPINNINGTOP,1,1
553,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLRICKSHAWMAN | CDLSPINNINGTOP,0,1
552,CDLDOJI | CDLGRAVESTONEDOJI | CDLLONGLEGGEDDOJI | CDLMATCHINGLOW | CDLRICKSHAWMAN | CDLSHORTLINE | CDLSPINNINGTOP,0,1


In [166]:
candles

Unnamed: 0,date,price_direction,cdl_pattern,pattern_check
0,2021-04-30 21:00:00,1,CDL2CROWS,0
1,2021-04-30 21:01:00,1,CDL2CROWS,0
2,2021-04-30 21:02:00,0,CDL2CROWS,0
3,2021-04-30 21:03:00,-1,CDL2CROWS,0
4,2021-04-30 21:04:00,0,CDL2CROWS,0
...,...,...,...,...
834475,2021-07-15 23:55:00,0,CDLXSIDEGAP3METHODS,0
834476,2021-07-15 23:56:00,1,CDLXSIDEGAP3METHODS,0
834477,2021-07-15 23:57:00,0,CDLXSIDEGAP3METHODS,0
834478,2021-07-15 23:58:00,1,CDLXSIDEGAP3METHODS,0


In [167]:
one_pattern = candles[(candles['cdl_pattern'] == 'CDLDRAGONFLYDOJI') & (candles['pattern_check'] != 0)]

In [168]:
one_pattern = one_pattern['price_direction'].value_counts().reset_index()

In [169]:
total = one_pattern['price_direction'].sum()

In [170]:
one_pattern['frac'] = one_pattern['price_direction']/total

In [171]:
one_pattern

Unnamed: 0,index,price_direction,frac
0,0,144,0.549618
1,1,104,0.396947
2,-1,14,0.053435


In [172]:
candles [candles['cdl_pattern'] == 'CDLDOJI | CDLHIGHWAVE | CDLLONGLEGGEDDOJI | CDLRICKSHAWMAN | CDLSPINNINGTOP']

Unnamed: 0,date,price_direction,cdl_pattern,pattern_check


In [173]:
# Find the days with abobe pattern
candles

Unnamed: 0,date,price_direction,cdl_pattern,pattern_check
0,2021-04-30 21:00:00,1,CDL2CROWS,0
1,2021-04-30 21:01:00,1,CDL2CROWS,0
2,2021-04-30 21:02:00,0,CDL2CROWS,0
3,2021-04-30 21:03:00,-1,CDL2CROWS,0
4,2021-04-30 21:04:00,0,CDL2CROWS,0
...,...,...,...,...
834475,2021-07-15 23:55:00,0,CDLXSIDEGAP3METHODS,0
834476,2021-07-15 23:56:00,1,CDLXSIDEGAP3METHODS,0
834477,2021-07-15 23:57:00,0,CDLXSIDEGAP3METHODS,0
834478,2021-07-15 23:58:00,1,CDLXSIDEGAP3METHODS,0


In [174]:
# Find the close price on another n time stamps, the timestamp may vary based on time frame (e.g. 1min, 2 min, 5 min)
# This will help to understand the price movement up or down and by price
# Change in price + or -
# Calculate the close price difference and find the percentage of price change
# df['t+1'] = (((df['close'].shift(-1) - df['close'])/df['close']) * 100).round(2)
# df['t+2'] = (((df['close'].shift(-2) - df['close'])/df['close']) * 100).round(2)
# df['t+3'] = (((df['close'].shift(-3) - df['close'])/df['close']) * 100).round(2)
# df['t+4'] = (((df['close'].shift(-4) - df['close'])/df['close']) * 100).round(2)

# Generate Candle Names

In [175]:
import talib

In [176]:
# This provide the previous day night and current day mid morning day
day = '20210601'
path = f'data/STK/1_min/TSLA/{day}.csv'
df = sing_day_data(path)

In [177]:
all_cdl_patterns = talib.get_function_groups()['Pattern Recognition']

In [178]:
# patterns = ['CDLSHORTLINE']

patterns = ['CDLDRAGONFLYDOJI']

In [179]:
for pattern in patterns:
    df[pattern] = getattr(talib, pattern)(df['open'], df['high'], df['low'], df['close'])

In [180]:
df[df[patterns].any(axis='columns')]

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,CDLDRAGONFLYDOJI
17,2021-06-01 21:17:00,628.22,628.22,628.0,628.21,7,4,628.16,21:17:00,2021-06-01,1622596620,100
67,2021-06-01 22:07:00,629.3,629.39,628.53,629.22,364,106,629.001,22:07:00,2021-06-01,1622599620,100
114,2021-06-01 22:54:00,624.0,624.09,623.5,624.02,266,135,623.763,22:54:00,2021-06-01,1622602440,100
173,2021-06-01 23:53:00,625.88,625.93,625.5,625.85,156,71,625.703,23:53:00,2021-06-01,1622605980,100


In [181]:
df = pd.DataFrame({"A": [0, 2], "B": [0, 2], "C": [0, 0]})

In [182]:
df

Unnamed: 0,A,B,C
0,0,0,0
1,2,2,0


In [183]:
df.any(axis='columns')

0    False
1     True
dtype: bool

In [141]:
filtered = df[df['CDLDRAGONFLYDOJI'].astype(bool)]

In [132]:
filtered

Unnamed: 0,date,open,high,low,close,volume,barCount,average,time,date_str,date_epoch,CDLDRAGONFLYDOJI
103,2021-04-30 22:43:00,694.87,695.0,693.93,694.93,613,424,694.653,22:43:00,2021-04-30,1619836980,100
130,2021-04-30 23:10:00,696.13,696.27,695.63,696.07,551,302,695.967,23:10:00,2021-04-30,1619838600,100
154,2021-04-30 23:34:00,701.26,701.38,700.39,701.38,770,407,700.813,23:34:00,2021-04-30,1619840040,100
166,2021-04-30 23:46:00,702.82,703.0,702.33,702.9,355,206,702.777,23:46:00,2021-04-30,1619840760,100


In [142]:
matched_times = df[df['CDLDRAGONFLYDOJI'].astype(bool)]['time']

In [143]:
# Draw the lines for reference
shapes_list = []
annotation_list = []
for time in matched_times:
    shapes_list.append(dict(x0=time, x1=time, y0=0, y1=1, xref='x', yref='paper', line_width=1))
    annotation_list.append(dict(x=time, y=0.05, xref='x', yref='paper', showarrow=False, xanchor='left', text='CDLSHORTLINE'))

# Candle Chart
Draw for single day regarless of dates

In [205]:
patterns = ['CDLDOJI', 'CDLHIGHWAVE', 'CDLLONGLEGGEDDOJI', 'CDLRICKSHAWMAN', 'CDLSPINNINGTOP']

In [None]:
df = 

In [145]:
fig = go.Figure()

fig.add_trace(go.Candlestick(x=df['time'],
        open=df['open'],
        high=df['high'],
        low=df['low'],
        close=df['close'],
        text=df['close']))

fig.update_layout(
    title=f'Analysis on {day}',
    yaxis_title='Price',
    shapes = shapes_list,
    annotations=annotation_list
)

import plotly.io as pio
pio.renderers.default = 'browser'
pio.show(fig)

# Line Chart

In [13]:
line_data = df[['date', 'time' ,'average', 'open']].copy()

In [14]:
# Find the min open price
min_open_price = line_data['open'].min()

In [15]:
# To bring all open price to 0, subtract open price from average
line_data['avg_nor'] = line_data['average'] - line_data['open']

In [16]:
dates = sorted(line_data['date'].unique())

In [17]:
fig = go.Figure()

for date in dates:
    data = line_data[line_data['date'] == date]
    fig.add_trace(go.Scatter(x=data['time'], y=data['avg_nor'], name = date))

import plotly.io as pio
pio.renderers.default = 'browser'
pio.show(fig)