In [4]:
import pandas as pd
import numpy as np


merdeg_data_path = "/Users/apple/Desktop/time-series/informer-validation/data/btc_with_macro.csv"

merged_df = pd.read_csv(merdeg_data_path, parse_dates=["datetime"])

# Hourly price data
df_5min  = merged_df.resample('5min', on='datetime').first().reset_index()
df_5min

Unnamed: 0,datetime,close,high,low,open,volume,fng_value,vix_value,fed_funds_rate
0,2023-01-01 00:00:00,16543.67,16544.76,16538.45,16541.77,83.08143,26.0,21.67,4.33
1,2023-01-01 00:05:00,16536.70,16537.80,16533.94,16534.91,53.58957,26.0,21.67,4.33
2,2023-01-01 00:10:00,16525.34,16530.87,16522.55,16526.67,96.60421,26.0,21.67,4.33
3,2023-01-01 00:15:00,16521.00,16526.84,16519.40,16521.26,114.31289,26.0,21.67,4.33
4,2023-01-01 00:20:00,16536.61,16537.82,16534.15,16534.94,37.63557,26.0,21.67,4.33
...,...,...,...,...,...,...,...,...,...
230683,2025-03-11 23:35:00,82699.67,82699.93,82622.63,82627.82,4.78086,24.0,26.92,4.33
230684,2025-03-11 23:40:00,82731.14,82884.81,82731.14,82880.90,19.86220,24.0,26.92,4.33
230685,2025-03-11 23:45:00,82818.44,82818.45,82732.01,82786.94,11.97104,24.0,26.92,4.33
230686,2025-03-11 23:50:00,82699.99,82700.00,82663.93,82663.93,2.94964,24.0,26.92,4.33


In [9]:
def create_features(df: pd.DataFrame) -> pd.DataFrame: 
    df = df.copy()
    # ensure datetime index
    df["datetime"] = pd.to_datetime(df["datetime"])
    df = df.set_index("datetime").sort_index()
    df = df.interpolate(method='time')

    close = df["close"]
    volume = df["volume"]

    # 1) Basic real features
    df["returns"]        = close.pct_change()
    df["fear_and_greed"] = df["fng_value"]
    df["vix"]            = df["vix_value"]
    df["fed_rates"]      = df["fed_funds_rate"]
    df["open_to_close"]  = close - df["open"]
    df["high_to_close"]  = df["high"] - close
    df["low_to_close"]   = df["low"] - close

    # 2) Volume over windows (in minutes)
    df["vol_1h"] = volume.rolling(window=60).sum()
    df["vol_1d"] = volume.rolling(window=1440).sum()
    df["vol_7d"] = volume.rolling(window=1440*7).sum()

    # 3) Simple Moving Averages on close
    df["sma_1h"] = close.rolling(window=60).mean()
    df["sma_1d"] = close.rolling(window=1440).mean()
    df["sma_7d"] = close.rolling(window=1440*7).mean()

    # 4) Exponential Moving Averages on close
    df["ema_1h"] = close.ewm(span=60, adjust=False).mean()
    df["ema_1d"] = close.ewm(span=1440, adjust=False).mean()
    df["ema_7d"] = close.ewm(span=1440*7, adjust=False).mean()

    # 5) MACD and signal line
    ema12 = close.ewm(span=12, adjust=False).mean()
    ema26 = close.ewm(span=26, adjust=False).mean()
    df["macd"]        = ema12 - ema26
    df["macd_signal"] = df["macd"].ewm(span=9, adjust=False).mean()

    # 6) RSI (14-period)
    delta = close.diff()
    gain  = delta.clip(lower=0)
    loss  = -delta.clip(upper=0)
    roll_gain = gain.ewm(com=13, adjust=False).mean()
    roll_loss = loss.ewm(com=13, adjust=False).mean()
    rs = roll_gain / roll_loss
    df["rsi"] = 100 - (100 / (1 + rs))

    # 7) Bollinger Bands (20-period SMA ± 2×STD)
    mid = close.rolling(window=20).mean()
    std = close.rolling(window=20).std()
    df["bb_mid"] = mid
    df["bb_up"]  = mid + 2 * std
    df["bb_low"] = mid - 2 * std

    # 8) Categorical time features
    df["hour"]    = df.index.hour
    df["weekday"] = df.index.weekday

    # 9) Drop rows with any NaN (from initial lags, rolls)
    return df.dropna()


df_features = create_features(df_5min)

df_features.to_csv("/Users/apple/Desktop/time-series/informer-validation/data/btc_features_5min.csv", index=True)
df_features


Unnamed: 0_level_0,close,high,low,open,volume,fng_value,vix_value,fed_funds_rate,returns,fear_and_greed,...,ema_1d,ema_7d,macd,macd_signal,rsi,bb_mid,bb_up,bb_low,hour,weekday
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-02-04 23:55:00,23311.98,23316.70,23309.95,23314.64,86.66435,58.0,18.33,4.57,0.000409,58.0,...,23385.524763,21107.558883,-23.064157,-10.829820,28.631769,23392.8865,23476.769453,23309.003547,23,5
2023-02-05 00:00:00,23316.76,23328.24,23316.51,23327.66,185.16841,58.0,18.33,4.57,0.000205,58.0,...,23385.429323,21107.997173,-25.018012,-13.667459,30.531903,23388.1380,23478.064126,23298.211874,0,6
2023-02-05 00:05:00,23337.24,23338.45,23331.42,23336.91,87.94456,58.0,18.33,4.57,0.000878,58.0,...,23385.362440,21108.439440,-24.629972,-15.859961,38.132203,23384.6540,23476.885906,23292.422094,0,6
2023-02-05 00:10:00,23349.61,23355.70,23344.76,23353.32,97.64175,58.0,18.33,4.57,0.000530,58.0,...,23385.312818,21108.884072,-23.058488,-17.299667,42.242543,23381.4700,23473.935920,23289.004080,0,6
2023-02-05 00:15:00,23322.29,23336.77,23320.25,23335.37,120.10147,58.0,18.33,4.57,-0.001170,58.0,...,23385.225347,21109.323196,-23.743867,-18.588507,36.478287,23376.5635,23470.725667,23282.401333,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-11 23:35:00,82699.67,82699.93,82622.63,82627.82,4.78086,24.0,26.92,4.33,0.000303,24.0,...,83526.020989,90938.471561,-85.039185,-30.226108,39.582644,82991.4145,83381.709333,82601.119667,23,1
2025-03-11 23:40:00,82731.14,82884.81,82731.14,82880.90,19.86220,24.0,26.92,4.33,0.000381,24.0,...,83524.917754,90936.843283,-87.898704,-41.760627,41.241269,82968.4720,83363.533938,82573.410062,23,1
2025-03-11 23:45:00,82818.44,82818.45,82732.01,82786.94,11.97104,24.0,26.92,4.33,0.001055,24.0,...,83523.937216,90935.232649,-82.173270,-49.843156,45.695041,82955.3745,83352.178644,82558.570356,23,1
2025-03-11 23:50:00,82699.99,82700.00,82663.93,82663.93,2.94964,24.0,26.92,4.33,-0.001430,24.0,...,83522.793639,90933.598834,-86.200086,-57.114542,41.138743,82929.4955,83321.785557,82537.205443,23,1


In [14]:
df_features_5_min = pd.read_csv("/Users/apple/Desktop/time-series/informer-validation/data/btc_features_5min.csv", parse_dates=["datetime"])

df_features_5_min = df_features_5_min.set_index("datetime")
df_features_5_min

Unnamed: 0_level_0,close,high,low,open,volume,fng_value,vix_value,fed_funds_rate,returns,fear_and_greed,...,ema_1d,ema_7d,macd,macd_signal,rsi,bb_mid,bb_up,bb_low,hour,weekday
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-02-04 23:55:00,23311.98,23316.70,23309.95,23314.64,86.66435,58.0,18.33,4.57,0.000409,58.0,...,23385.524763,21107.558883,-23.064157,-10.829820,28.631769,23392.8865,23476.769453,23309.003547,23,5
2023-02-05 00:00:00,23316.76,23328.24,23316.51,23327.66,185.16841,58.0,18.33,4.57,0.000205,58.0,...,23385.429323,21107.997173,-25.018012,-13.667459,30.531903,23388.1380,23478.064126,23298.211874,0,6
2023-02-05 00:05:00,23337.24,23338.45,23331.42,23336.91,87.94456,58.0,18.33,4.57,0.000878,58.0,...,23385.362440,21108.439440,-24.629972,-15.859961,38.132203,23384.6540,23476.885906,23292.422094,0,6
2023-02-05 00:10:00,23349.61,23355.70,23344.76,23353.32,97.64175,58.0,18.33,4.57,0.000530,58.0,...,23385.312818,21108.884072,-23.058488,-17.299667,42.242543,23381.4700,23473.935920,23289.004080,0,6
2023-02-05 00:15:00,23322.29,23336.77,23320.25,23335.37,120.10147,58.0,18.33,4.57,-0.001170,58.0,...,23385.225347,21109.323196,-23.743867,-18.588507,36.478287,23376.5635,23470.725667,23282.401333,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-11 23:35:00,82699.67,82699.93,82622.63,82627.82,4.78086,24.0,26.92,4.33,0.000303,24.0,...,83526.020989,90938.471561,-85.039185,-30.226108,39.582644,82991.4145,83381.709333,82601.119667,23,1
2025-03-11 23:40:00,82731.14,82884.81,82731.14,82880.90,19.86220,24.0,26.92,4.33,0.000381,24.0,...,83524.917754,90936.843283,-87.898704,-41.760627,41.241269,82968.4720,83363.533938,82573.410062,23,1
2025-03-11 23:45:00,82818.44,82818.45,82732.01,82786.94,11.97104,24.0,26.92,4.33,0.001055,24.0,...,83523.937216,90935.232649,-82.173270,-49.843156,45.695041,82955.3745,83352.178644,82558.570356,23,1
2025-03-11 23:50:00,82699.99,82700.00,82663.93,82663.93,2.94964,24.0,26.92,4.33,-0.001430,24.0,...,83522.793639,90933.598834,-86.200086,-57.114542,41.138743,82929.4955,83321.785557,82537.205443,23,1
