In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm


In [5]:
path = './../Data/'
usdjpy = './../Data/USDJPY_M15_202010070430_202410140915.csv'
os.listdir(path)

['GBPUSD_M15_202010071130_202410140915.csv',
 'NZDUSD_M15_202010010000_202410140915.csv',
 'USDJPY_M15_202010070430_202410140915.csv',
 'AUDNZD_M15_202010010000_202410140915.csv',
 'NZDJPY_M15_202010010000_202410140915.csv',
 'EURGBP_M15_202010010000_202410140915.csv',
 'AUDUSD_M15_202010010000_202410140915.csv',
 'GBPJPY_M15_202010010000_202410140915.csv',
 'EURUSD_M15_202010070145_202410140915.csv',
 'EURJPY_M15_202010010000_202410140915.csv',
 'CADJPY_M15_202010010000_202410140915.csv',
 'USDSGD_M15_202010010000_202410140915.csv']

In [19]:
def preprocess_data(df: pd.DataFrame, atr_length=14)->pd.DataFrame:
    df['<BODY_LENGTH>'] = abs(df['<CLOSE>'] - df['<OPEN>']) 
    df['<ATR>'] = df['<BODY_LENGTH>'].rolling(atr_length).mean()
    return df

def load_data(path: str)->pd.DataFrame:
    df = pd.read_csv(usdjpy, delimiter='\t')
    df = preprocess_data(df)
    return df

df = load_data(usdjpy)
df.head(20)

Unnamed: 0,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<TICKVOL>,<VOL>,<SPREAD>,<BODY_LENGTH>,<ATR>
0,2020.10.07,04:30:00,105.643,105.665,105.633,105.633,277,0,1,0.01,
1,2020.10.07,04:45:00,105.637,105.663,105.631,105.662,453,0,1,0.025,
2,2020.10.07,05:00:00,105.661,105.686,105.658,105.668,442,0,0,0.007,
3,2020.10.07,05:15:00,105.668,105.686,105.65,105.67,331,0,1,0.002,
4,2020.10.07,05:30:00,105.67,105.684,105.666,105.667,184,0,1,0.003,
5,2020.10.07,05:45:00,105.667,105.672,105.648,105.657,173,0,1,0.01,
6,2020.10.07,06:00:00,105.657,105.67,105.655,105.668,137,0,1,0.011,
7,2020.10.07,06:15:00,105.668,105.677,105.661,105.675,252,0,1,0.007,
8,2020.10.07,06:30:00,105.676,105.683,105.673,105.681,157,0,1,0.005,
9,2020.10.07,06:45:00,105.681,105.695,105.677,105.693,282,0,1,0.012,


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100026 entries, 0 to 100025
Data columns (total 9 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   <DATE>     100026 non-null  object 
 1   <TIME>     100026 non-null  object 
 2   <OPEN>     100026 non-null  float64
 3   <HIGH>     100026 non-null  float64
 4   <LOW>      100026 non-null  float64
 5   <CLOSE>    100026 non-null  float64
 6   <TICKVOL>  100026 non-null  int64  
 7   <VOL>      100026 non-null  int64  
 8   <SPREAD>   100026 non-null  int64  
dtypes: float64(4), int64(3), object(2)
memory usage: 6.9+ MB


In [None]:
'''
G = Bull Candle = A
R = Bear Candle = B
N = No Color Candle = C

E = Candle == ATR ratio = D
SSS = Candle <<< ATR = E
SS = Candle << ATR = F
S = Candle < ATR = G
BBB = Candle >>> ATR = H
BB = Canadle >> ATR = I
B = Candle > ATR = J

M = Top Wick == Bot Wick = K
LLL = Bot Wick >>> TopWick = L
LL = Bot Wick >> TopWick = M 
L = Bot Wick > TopWick = N

UUU = Top Wick >>> Bot Wick = O
UU = Top Wick >> Bot Wick = P 
U = Top Wick > Bot Wick = Q
'''

def isbull(open: float, close: float)-> bool:
    return close > open

def bear_bull(open: float, close: float) -> str:
    
    if open < close:
        return 'A'  # bull candle
    elif open > close:
        return 'B' # bear candle
    # in rare case open == close
    return 'C' 
    
def candle_to_atr_ratio(open: float, close: float, atr: float, eps: float = 0.05) -> str:
    canlde_height = abs(open - close)
    ratio = canlde_height/atr

    if ratio < 1 + eps and ratio > 1 - eps:
        return 'D'
    
    if ratio < 0.25:
        return "E"

    if ratio < 0.5:
        return "F"
    
    if ratio < 0.75:
        return "G"
    
    if ratio > 1.75:
        return "H"
    
    if ratio > 1.50:
        return "I"
    
    if ratio > 1.25:
        return "J"
    
    raise ValueError("Sanity Check")

def top_bot_wick_ratio(high: float, low: float, open: float, close: float, eps: float=0.05)-> str:
    if isbull(open, close):
        top_wick = high - close
        bot_wick = open - low
    else:
        top_wick = high - open
        bot_wick = close - low
    
    ratio = top_wick/bot_wick
    if ratio < 1 + eps and ratio > 1 - eps:
        return 'K'
    
    if ratio < 0.25:
        return "L"

    if ratio < 0.5:
        return "M"
    
    if ratio < 0.75:
        return "N"
    
    if ratio > 1.75:
        return "O"
    
    if ratio > 1.50:
        return "P"
    
    if ratio > 1.25:
        return "Q"
    
    raise ValueError("Sanity Check")
    

def candle2char(open:float, close: float, low: float,
                 high: float, vol: float, atr: float) -> str:
    candle = ''
    candle += bear_bull(open, close)
    
    # top wick to bottom wick ratio division
    
    pass
