In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

import os

In [30]:
from pathlib import Path

# Configurable parts
SYMBOL = "USDJPY"
SAMPLE_TYPE = "time"
MINUTES = 5

START_DATE = "20240101"
END_DATE = "20241231"

# Build base name
RESAMPLED_NAME = f"{SYMBOL}-{MINUTES}m-{START_DATE}-{END_DATE}"

# Base directories
BASE_DIR = Path("../data")
RESAMPLED_DIR = BASE_DIR / "resampled"
PROCESSED_DIR = BASE_DIR / "processed"
EVENTS_DIR = BASE_DIR / "interm/events"
SIDES_DIR = BASE_DIR / "interm/sides"

# Final paths
RESAMPLED_FILE_PATH = RESAMPLED_DIR / f"{RESAMPLED_NAME}.pkl"
PROCESSED_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}_processed.pkl"

## Loading Data

In [3]:
%%time
df = pd.read_pickle(PROCESSED_FILE_PATH)

CPU times: user 397 μs, sys: 1.15 ms, total: 1.54 ms
Wall time: 2.59 ms


In [4]:
df.shape

(72013, 10)

In [5]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,spread,return,log_return,fd_return,log_fd_return
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-01-15 06:35:00,1.09644,1.096615,1.096425,1.096595,969750000000.0,2.7e-05,0.00016,0.000146,0.113814,0.009814
2024-01-15 06:40:00,1.0966,1.09662,1.09651,1.096595,544860000000.0,2.5e-05,0.0,0.0,0.113773,0.009777
2024-01-15 06:45:00,1.0966,1.096605,1.096445,1.096525,728700000000.0,2.5e-05,-7e-05,-6.4e-05,0.113687,0.009698
2024-01-15 06:50:00,1.09653,1.096625,1.096485,1.096545,823070000000.0,2.7e-05,2e-05,1.8e-05,0.113715,0.009724
2024-01-15 06:55:00,1.096535,1.096545,1.09634,1.0964,932760000000.0,2.6e-05,-0.000145,-0.000132,0.113565,0.009587


## Calculating EMA values

In [7]:
FAST = 9 #(45 min)
SLOW = 36 #(180 min)

In [23]:
SIDE_NAME = f"EMACross_{FAST}_{SLOW}"

In [24]:
import pandas_ta as ta

In [25]:
df.ta.ema(length=FAST, append=True)
df.ta.ema(length=SLOW, append=True)

timestamp
2024-01-15 06:35:00         NaN
2024-01-15 06:40:00         NaN
2024-01-15 06:45:00         NaN
2024-01-15 06:50:00         NaN
2024-01-15 06:55:00         NaN
                         ...   
2024-12-30 23:35:00    1.040267
2024-12-30 23:40:00    1.040280
2024-12-30 23:45:00    1.040289
2024-12-30 23:50:00    1.040299
2024-12-30 23:55:00    1.040314
Name: EMA_36, Length: 72013, dtype: float64

## Finding Cross Points

標記出快線與慢線的相對位置（ema_cross_flag）並找到跨越的時間點（t_events）

In [26]:
df['ema_cross_flag'] = df[f'EMA_{FAST}'] > df[f'EMA_{SLOW}']
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,spread,return,log_return,fd_return,log_fd_return,EMA_9,EMA_36,ema_cross_flag
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-01-15 06:35:00,1.09644,1.096615,1.096425,1.096595,969750000000.0,2.7e-05,0.00016,0.000146,0.113814,0.009814,,,False
2024-01-15 06:40:00,1.0966,1.09662,1.09651,1.096595,544860000000.0,2.5e-05,0.0,0.0,0.113773,0.009777,,,False
2024-01-15 06:45:00,1.0966,1.096605,1.096445,1.096525,728700000000.0,2.5e-05,-7e-05,-6.4e-05,0.113687,0.009698,,,False
2024-01-15 06:50:00,1.09653,1.096625,1.096485,1.096545,823070000000.0,2.7e-05,2e-05,1.8e-05,0.113715,0.009724,,,False
2024-01-15 06:55:00,1.096535,1.096545,1.09634,1.0964,932760000000.0,2.6e-05,-0.000145,-0.000132,0.113565,0.009587,,,False


In [27]:
t_events = df.index[df['ema_cross_flag'] != df['ema_cross_flag'].shift(1)]
t_events

DatetimeIndex(['2024-01-15 06:35:00', '2024-01-15 12:05:00',
               '2024-01-15 14:05:00', '2024-01-15 15:55:00',
               '2024-01-15 20:30:00', '2024-01-15 20:40:00',
               '2024-01-15 22:00:00', '2024-01-16 07:00:00',
               '2024-01-16 07:05:00', '2024-01-16 07:45:00',
               ...
               '2024-12-30 00:55:00', '2024-12-30 01:40:00',
               '2024-12-30 01:55:00', '2024-12-30 05:15:00',
               '2024-12-30 05:20:00', '2024-12-30 06:00:00',
               '2024-12-30 07:25:00', '2024-12-30 09:50:00',
               '2024-12-30 13:25:00', '2024-12-30 18:05:00'],
              dtype='datetime64[ns]', name='timestamp', length=2558, freq=None)

## Labeling Sides

黃金交叉與死亡交叉

In [33]:
sides = pd.Series(df['ema_cross_flag'].loc[t_events].apply(lambda x: 1 if x else 0), index=t_events)
sides

timestamp
2024-01-15 06:35:00    0
2024-01-15 12:05:00    1
2024-01-15 14:05:00    0
2024-01-15 15:55:00    1
2024-01-15 20:30:00    0
                      ..
2024-12-30 06:00:00    1
2024-12-30 07:25:00    0
2024-12-30 09:50:00    1
2024-12-30 13:25:00    0
2024-12-30 18:05:00    1
Name: ema_cross_flag, Length: 2558, dtype: int64

## Saving to disk

In [31]:
SIDE_FILE_PATH = SIDES_DIR / f"{RESAMPLED_NAME}-{SIDE_NAME}.pkl"
SIDE_FILE_PATH

PosixPath('../data/interm/sides/USDJPY-5m-20240101-20241231-EMACross_9_36.pkl')

In [34]:
sides.to_pickle(SIDE_FILE_PATH)