# Candlesticks Engulfing patterns and statistical analysis
# Loading and preparing data

In [3]:
import pandas as pd
df = pd.read_csv("TSLA_last_30_days.csv")
df.tail()

Unnamed: 0,timestamp,open,high,low,close,volume
17095,2024-12-23 04:04:00,433.59,433.6,432.61,433.32,18694
17096,2024-12-23 04:03:00,433.41,434.0,433.29,433.5,18590
17097,2024-12-23 04:02:00,434.59,435.0,433.0,433.35,27983
17098,2024-12-23 04:01:00,434.0,435.13,433.5,434.59,45713
17099,2024-12-23 04:00:00,430.83,434.28,429.3,434.19,31438


In [5]:
#Check if NA values are in data
df.isna().sum()

timestamp    0
open         0
high         0
low          0
close        0
volume       0
dtype: int64

# Identifying the Engulfing candle patterns

In [6]:
#Engulfing pattern signals
import random
def Revsignal1(df1):
    length = len(df1)
    high = list(df1['high'])
    low = list(df1['low'])
    close = list(df1['close'])
    open = list(df1['open'])
    signal = [0] * length
    bodydiff = [0] * length

    for row in range(1, length):
        bodydiff[row] = abs(open[row]-close[row])
        bodydiffmin = 0.003
        if (bodydiff[row]>bodydiffmin and bodydiff[row-1]>bodydiffmin and
            open[row-1]<close[row-1] and
            open[row]>close[row] and 
            #open[row]>=close[row-1] and close[row]<open[row-1]):
            (open[row]-close[row-1])>=+0e-5 and close[row]<open[row-1]):
            signal[row] = 1
        elif (bodydiff[row]>bodydiffmin and bodydiff[row-1]>bodydiffmin and
            open[row-1]>close[row-1] and
            open[row]<close[row] and 
            #open[row]<=close[row-1] and close[row]>open[row-1]):
            (open[row]-close[row-1])<=-0e-5 and close[row]>open[row-1]):
            signal[row] = 2
        else:
            signal[row] = 0
        #signal[row]=random.choice([0, 1, 2])
        #signal[row]=1
    return signal
df['signal1'] = Revsignal1(df)
df[df['signal1']==1].count()

timestamp    824
open         824
high         824
low          824
close        824
volume       824
signal1      824
dtype: int64

In [7]:
#Target
def mytarget(df1, barsfront):
    length = len(df1)
    high = list(df1['high'])
    low = list(df1['low'])
    close = list(df1['close'])
    open = list(df1['open'])
    trendcat = [None] * length
    
    piplim = 300e-5
    for line in range (0, length-1-barsfront):
        for i in range(1,barsfront+1):
            if ((high[line+i]-max(close[line],open[line]))>piplim) and ((min(close[line],open[line])-low[line+i])>piplim):
                trendcat[line] = 3 # no trend
            elif (min(close[line],open[line])-low[line+i])>piplim:
                trendcat[line] = 1 #-1 downtrend
                break
            elif (high[line+i]-max(close[line],open[line]))>piplim:
                trendcat[line] = 2 # uptrend
                break
            else:
                trendcat[line] = 0 # no clear trend  
    return trendcat

df['Trend'] = mytarget(df,3)
#df.head(30)


In [8]:
import numpy as np
conditions = [(df['Trend'] == 1) & (df['signal1'] == 1),(df['Trend'] == 2) & (df['signal1'] == 2)]
values = [1, 2]
df['result'] = np.select(conditions, values)

trendId=2
print(df[df['result']==trendId].result.count()/df[df['signal1']==trendId].signal1.count())
df[ (df['Trend']!=trendId) & (df['signal1']==trendId) ] # false positives

0.18478260869565216


Unnamed: 0,timestamp,open,high,low,close,volume,signal1,Trend,result
59,2025-01-21 19:00:00,421.79,421.98,421.65,421.98,1082,2,1.0,0
89,2025-01-21 18:30:00,421.17,421.67,421.10,421.67,5159,2,1.0,0
97,2025-01-21 18:22:00,420.50,421.15,420.40,421.15,5435,2,1.0,0
161,2025-01-21 17:18:00,424.67,425.01,424.60,425.00,3135,2,1.0,0
171,2025-01-21 17:08:00,425.10,425.29,425.00,425.18,13257,2,1.0,0
...,...,...,...,...,...,...,...,...,...
17037,2024-12-23 05:02:00,434.15,434.39,433.89,434.32,3780,2,1.0,0
17057,2024-12-23 04:42:00,435.02,435.55,435.02,435.50,4342,2,1.0,0
17061,2024-12-23 04:38:00,435.58,435.93,435.55,435.86,2106,2,1.0,0
17088,2024-12-23 04:11:00,433.46,433.76,433.45,433.69,9139,2,1.0,0


In [4]:
import pandas as pd
df = pd.read_csv("TSLA_last_30_days.csv")

dfpl = df[:]
print(dfpl.head())
print(len(dfpl))
import plotly.graph_objects as go
from datetime import datetime

fig = go.Figure(data=[go.Candlestick(x=dfpl.index,
                open=dfpl['open'],
                high=dfpl['high'],
                low=dfpl['low'],
                close=dfpl['close'])])

fig.show()

             timestamp      open    high       low   close  volume
0  2025-01-21 19:59:00  420.2100  420.40  420.1500  420.30   10620
1  2025-01-21 19:58:00  420.5984  420.60  420.1799  420.23   15004
2  2025-01-21 19:57:00  420.6000  420.68  420.5500  420.56    7386
3  2025-01-21 19:56:00  420.6700  420.69  420.5500  420.63    3530
4  2025-01-21 19:55:00  420.7500  420.75  420.6500  420.68    3202
17100
