In [1]:
import pandas as pd
import numpy as np
import joblib
import os

In [2]:
DATA_PATH = os.path.join("..", "data", "grouped_by_date")
FILES = [i for i in os.listdir(DATA_PATH)]

In [3]:
grouped_data = joblib.load(os.path.join(DATA_PATH, FILES[5]))

In [13]:
# 14 MIN Technical Indicators Tests

DATA_PATH = os.path.join("..", "data", "imputed_1min")
GROUPED_BY_DATE_PATH = os.path.join("..", "data", "grouped_by_date")

OFFSET = 20  # minutes from start
FILES = os.listdir(DATA_PATH)
FILES_GROUPED_BY_DATE = os.listdir(GROUPED_BY_DATE_PATH)


def get_technical_indicator_features(daily_data, OFFSET=20):
    close = daily_data["close"].to_numpy()
    high = daily_data["high"].to_numpy()  # daily closing prices as np array
    low = daily_data["low"].to_numpy()  # daily closing prices as np array
    raw_money_flow = daily_data["volume"].to_numpy()
    typical_price = np.average([close, high, low], axis=0)
    ema_offset_multiplier = (2 / (OFFSET + 1))
    
    out = {
        "keep_row": [False] * OFFSET,  # these rows will be deleted later, first N minutes of a day
        "typical_price": typical_price,
        "20min_sma": [0] * OFFSET,
        "20min_tsma": [0] * OFFSET,
        "20min_sd_tp": [0] * OFFSET,
        "20min_ema": [0] * (OFFSET-1) + [np.mean(close[0:OFFSET-1])],
    }

    for i in range(OFFSET, len(close)):

        subset_close = close[i-OFFSET:i]
        subset_typical_price = typical_price[i - OFFSET:i]

        out["keep_row"].append(True)

        out["20min_sma"].append(np.mean(subset_close))
        out["20min_tsma"].append(np.mean(subset_typical_price))

        out["20min_sd_tp"].append(np.std(subset_typical_price))

        prev_ema = out["20min_ema"][i-1]
        out["20min_ema"].append(((subset_close[-1] - prev_ema) * ema_offset_multiplier) + prev_ema)

    out["20min_diff_sma_ema"] = np.array(out["20min_sma"]) - np.array(out["20min_ema"])
    out["bbu"] = np.array(out["20min_tsma"]) + 2*np.array(out["20min_sd_tp"])
    out["bbl"] = np.array(out["20min_tsma"]) - 2*np.array(out["20min_sd_tp"])

    return out


In [9]:
random_day_data = grouped_data["2017-01-02"]

features = get_technical_indicator_features(random_day_data)

In [10]:
features_df = pd.DataFrame(data=features)

In [11]:
features_df.head(50)

Unnamed: 0,keep_row,typical_price,20min_sma,20min_tsma,20min_sd_tp,20min_ema,20min_diff_sma_ema,bbu,bbl
0,False,2.649445,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,False,2.652412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,False,2.652412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,False,2.646478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,False,2.649445,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,False,2.652412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,False,2.649445,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,False,2.646478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,False,2.646478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,False,2.637577,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
features = get_technical_indicator_features(random_day_data, 14)

In [31]:
features = get_technical_indicator_features(random_day_data, 14)
features_df14 = pd.DataFrame(data=features)
features_df14.head(50)

Unnamed: 0,keep_row,typical_price,20min_sma,20min_tsma,20min_sd_tp,20min_ema,20min_diff_sma_ema,bbu,bbl
0,False,2.649445,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,False,2.652412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,False,2.652412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,False,2.646478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,False,2.649445,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,False,2.652412,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,False,2.649445,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,False,2.646478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,False,2.646478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,False,2.637577,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
features = get_technical_indicator_features(random_day_data, 14)
features_df14 = pd.DataFrame(data=features)
features_df14.head(50)