# Feature Engineering - Technical Indicators

## Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from pathlib import Path

# Configurable parts
SYMBOL = "USDJPY"
SAMPLE_TYPE = "time"
MINUTES = 1

START_DATE = "20240101"
END_DATE = "20241231"

# Build base name
RESAMPLED_NAME = f"{SYMBOL}-{MINUTES}m-{START_DATE}-{END_DATE}"

# Base directories
BASE_DIR = Path("../data")
RESAMPLED_DIR = BASE_DIR / "resampled"
PROCESSED_DIR = BASE_DIR / "processed"
EVENTS_DIR = BASE_DIR / "events"

# Final paths
RESAMPLED_FILE_PATH = RESAMPLED_DIR / f"{RESAMPLED_NAME}.pkl"
PROCESSED_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}-TA.pkl"

## Load Data

In [3]:
%%time
df = pd.read_pickle(RESAMPLED_FILE_PATH)

CPU times: user 766 μs, sys: 2.39 ms, total: 3.15 ms
Wall time: 2.88 ms


In [4]:
df.shape

(371455, 7)

In [5]:
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,spread
0,2024-01-01 22:00:00,1.10454,1.104545,1.104505,1.104545,11800000000.0,0.000531
1,2024-01-01 22:01:00,1.10453,1.10454,1.10453,1.10454,34200000000.0,0.000491
2,2024-01-01 22:02:00,1.104535,1.10454,1.10453,1.10454,37800000000.0,0.000494
3,2024-01-01 22:03:00,1.104535,1.104535,1.10452,1.104525,27900000000.0,0.000518
4,2024-01-01 22:04:00,1.10452,1.104555,1.10451,1.104525,29700000000.0,0.000529


In [6]:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df = df.set_index('timestamp')

## Technical Indicators

In [7]:
import pandas_ta as ta

In [8]:
print(df.ta.indicators())

Pandas TA - Technical Analysis Indicators - v0.4.71b0

Indicators and Utilities [154]:
    aberration, accbands, ad, adosc, adx, alligator, alma, alphatrend, amat, ao, aobv, apo, aroon, atr, atrts, bbands, bias, bop, brar, cci, cdl_pattern, cdl_z, cfo, cg, chandelier_exit, chop, cksp, cmf, cmo, coppock, crsi, cti, decay, decreasing, dema, dm, donchian, dpo, ebsw, efi, ema, entropy, eom, er, eri, exhc, fisher, fwma, ha, hilo, hl2, hlc3, hma, ht_trendline, hwc, hwma, ichimoku, increasing, inertia, jma, kama, kc, kdj, kst, kurtosis, kvo, linreg, log_return, long_run, macd, mad, mama, massi, mcgd, median, mfi, midpoint, midprice, mom, natr, nvi, obv, ohlc4, pdist, percent_return, pgo, pivots, ppo, psar, psl, pvi, pvo, pvol, pvr, pvt, pwma, qqe, qstick, quantile, reflex, rma, roc, rsi, rsx, rvgi, rvi, rwi, short_run, sinwma, skew, slope, sma, smc, smi, smma, squeeze, squeeze_pro, ssf, ssf3, stc, stdev, stoch, stochf, stochrsi, supertrend, swma, t3, tema, thermo, tmo, tos_stdevall, trendflex

SMA

In [9]:
df.ta.sma(length=5, append=True)
df.ta.sma(length=15, append=True)
df.ta.sma(length=30, append=True)
df.ta.sma(length=60, append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956


#### EMA

In [10]:
df.ta.ema(length=5, append=True)
df.ta.ema(length=15, append=True)
df.ta.ema(length=30, append=True)
df.ta.ema(length=60, append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,EMA_5,EMA_15,EMA_30,EMA_60
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001,1.103721,1.103748,1.103808,1.103964
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399,1.103712,1.103741,1.103801,1.103955
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979,1.103705,1.103735,1.103793,1.103946
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968,1.103703,1.10373,1.103787,1.103938
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956,1.103684,1.10372,1.103778,1.103929


MACD

In [11]:
df.ta.macd(append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,EMA_5,EMA_15,EMA_30,EMA_60,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001,1.103721,1.103748,1.103808,1.103964,-4.8e-05,9e-06,-5.6e-05
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399,1.103712,1.103741,1.103801,1.103955,-4.8e-05,7e-06,-5.5e-05
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979,1.103705,1.103735,1.103793,1.103946,-4.8e-05,5e-06,-5.3e-05
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968,1.103703,1.10373,1.103787,1.103938,-4.6e-05,5e-06,-5.2e-05
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956,1.103684,1.10372,1.103778,1.103929,-4.9e-05,2e-06,-5.1e-05


RSI

In [12]:
df.ta.rsi(length=5, append=True)
df.ta.rsi(length=14, append=True)
df.ta.rsi(length=20, append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,EMA_5,EMA_15,EMA_30,EMA_60,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_5,RSI_14,RSI_20
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001,1.103721,1.103748,1.103808,1.103964,-4.8e-05,9e-06,-5.6e-05,33.195305,38.171739,36.806737
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399,1.103712,1.103741,1.103801,1.103955,-4.8e-05,7e-06,-5.5e-05,35.637346,38.873166,37.310776
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979,1.103705,1.103735,1.103793,1.103946,-4.8e-05,5e-06,-5.3e-05,34.080102,38.403968,37.000125
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968,1.103703,1.10373,1.103787,1.103938,-4.6e-05,5e-06,-5.2e-05,40.572147,39.964698,38.085397
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956,1.103684,1.10372,1.103778,1.103929,-4.9e-05,2e-06,-5.1e-05,24.192186,34.749498,34.6315


ADX

In [13]:
df.ta.adx(length=5, append=True)
df.ta.adx(length=15, append=True)
df.ta.adx(length=30, append=True)
df.ta.adx(length=60, append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,...,DMP_15,DMN_15,ADX_30,ADXR_30_2,DMP_30,DMN_30,ADX_60,ADXR_60_2,DMP_60,DMN_60
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001,...,0.000141,0.000284,35.886577,35.947243,0.000264,0.000537,,,0.000465,0.000959
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399,...,0.000132,0.000295,35.910848,35.928531,0.000255,0.000549,,,0.000457,0.000973
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979,...,0.000123,0.000275,35.93431,35.910444,0.000246,0.000531,,,0.000449,0.000957
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968,...,0.000145,0.000257,35.782079,35.846463,0.000268,0.000513,,,0.000472,0.000941
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956,...,0.000135,0.00028,35.750026,35.842168,0.000259,0.000536,,,0.000464,0.000965


BBands

In [14]:
df.ta.bbands(length=15, append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,...,DMN_30,ADX_60,ADXR_60_2,DMP_60,DMN_60,BBL_15_2.0_2.0,BBM_15_2.0_2.0,BBU_15_2.0_2.0,BBB_15_2.0_2.0,BBP_15_2.0_2.0
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001,...,0.000537,,,0.000465,0.000959,1.103642,1.103741,1.10384,0.018026,0.243664
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399,...,0.000549,,,0.000457,0.000973,1.103635,1.103736,1.103837,0.018243,0.29638
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979,...,0.000531,,,0.000449,0.000957,1.10363,1.10373,1.103829,0.018029,0.300663
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968,...,0.000513,,,0.000472,0.000941,1.103628,1.103729,1.103829,0.018186,0.357186
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956,...,0.000536,,,0.000464,0.000965,1.103627,1.103728,1.10383,0.01838,0.089225


ATR

In [15]:
df.ta.atr(length=5, append=True)
df.ta.atr(length=10, append=True)
df.ta.atr(length=15, append=True)
df.ta.atr(length=20, append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,...,DMN_60,BBL_15_2.0_2.0,BBM_15_2.0_2.0,BBU_15_2.0_2.0,BBB_15_2.0_2.0,BBP_15_2.0_2.0,ATRr_5,ATRr_10,ATRr_15,ATRr_20
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001,...,0.000959,1.103642,1.103741,1.10384,0.018026,0.243664,5.6e-05,5.7e-05,5.7e-05,5.6e-05
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399,...,0.000973,1.103635,1.103736,1.103837,0.018243,0.29638,6.4e-05,6.1e-05,5.9e-05,5.8e-05
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979,...,0.000957,1.10363,1.10373,1.103829,0.018029,0.300663,5.4e-05,5.6e-05,5.6e-05,5.5e-05
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968,...,0.000941,1.103628,1.103729,1.103829,0.018186,0.357186,5.2e-05,5.5e-05,5.6e-05,5.5e-05
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956,...,0.000965,1.103627,1.103728,1.10383,0.01838,0.089225,6e-05,5.9e-05,5.8e-05,5.7e-05


Bolliger Bands

In [16]:
df.ta.bbands(length=5, append=True)
df.ta.bbands(length=14, append=True)
df[100:105]

Unnamed: 0_level_0,open,high,low,close,volume,spread,SMA_5,SMA_15,SMA_30,SMA_60,...,BBL_5_2.0_2.0,BBM_5_2.0_2.0,BBU_5_2.0_2.0,BBB_5_2.0_2.0,BBP_5_2.0_2.0,BBL_14_2.0_2.0,BBM_14_2.0_2.0,BBU_14_2.0_2.0,BBB_14_2.0_2.0,BBP_14_2.0_2.0
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 23:44:00,1.103685,1.10372,1.103685,1.10369,73800000000.0,3.2e-05,1.103726,1.103741,1.103757,1.104001,...,1.103655,1.103726,1.103797,0.012834,0.245848,1.103637,1.103739,1.103841,0.018431,0.259476
2024-01-01 23:45:00,1.103685,1.10375,1.103655,1.103695,186470000000.0,3.7e-05,1.103707,1.103736,1.103753,1.10399,...,1.103674,1.103707,1.10374,0.005911,0.316058,1.103632,1.103732,1.103833,0.018233,0.313661
2024-01-01 23:46:00,1.1037,1.1037,1.103685,1.10369,110700000000.0,3.8e-05,1.103702,1.10373,1.10375,1.103979,...,1.103668,1.103702,1.103736,0.006129,0.322606,1.103628,1.103731,1.103833,0.018604,0.30172
2024-01-01 23:47:00,1.103685,1.10373,1.103685,1.1037,125780000000.0,3.5e-05,1.103695,1.103729,1.103747,1.103968,...,1.103686,1.103695,1.103704,0.001621,0.779507,1.10364,1.103734,1.103829,0.017093,0.318265
2024-01-01 23:48:00,1.10371,1.103735,1.103645,1.103645,76500000000.0,2.8e-05,1.103684,1.103728,1.103744,1.103956,...,1.103644,1.103684,1.103724,0.007194,0.008802,1.103631,1.103732,1.103833,0.018258,0.067582


### Remove NaN Values

In [17]:
df.dropna(inplace=True)

### Save to Disk

In [18]:
df.to_pickle(PROCESSED_FILE_PATH)