In [1]:
import ccxt
import time
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone

binance = ccxt.binance()

symbol = 'BTC/USDT'
timeframe = '5m'
limit = 1000

since_dt = datetime.now(timezone.utc) - timedelta(days=4*3 + 3) # 4 weeks (+3 to avoid weekends if i fetch index, like the dxy, where they dont work on weekends, and i just remove where dxy is Nan)
since = int(since_dt.timestamp() * 1000) # second in milliseconds

all_candles = []

while True:
    candles = binance.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit) # binance api from ccxt
    if not candles:
        break
    all_candles += candles

    since = candles[-1][0] + 1 # update since to last candle timestamp + 1 ms
    time.sleep(0.5)  # avoid hitting rate limits with api

# Convert to DataFrame (table)
df = pd.DataFrame(all_candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

In [2]:
import yfinance as yf

dxy_daily = yf.download(
    'DX-Y.NYB',
    start=df['timestamp'].min().date() - pd.Timedelta(days=3),  # to avoid weekends
    end=df['timestamp'].max().date() + pd.Timedelta(days=3),
    interval='1d',
    progress=False,
    auto_adjust=False
)

# Reset index to move 'Date' to a column
dxy_daily = dxy_daily.reset_index(drop=False)

# Because its multilevel, make it one level (like df)
if isinstance(dxy_daily.columns, pd.MultiIndex):
    dxy_daily.columns = [col[0] for col in dxy_daily.columns]

dxy_daily.rename(columns={'Date': 'date'}, inplace=True)
dxy_daily.rename(columns={'Close': 'dxy_close'}, inplace=True)

dxy_daily["date"] = pd.to_datetime(dxy_daily["date"]).dt.date
df["date"] = pd.to_datetime(df["timestamp"]).dt.date

print(dxy_daily)

df = pd.merge(df, dxy_daily[["date", "dxy_close"]], on="date", how="left")

df['dxy_close'] = df['dxy_close'].ffill()
df = df.dropna(subset=['dxy_close']).reset_index(drop=True)
df.rename(columns={'dxy_close': 'dxy'}, inplace=True)

print(dxy_daily)

          date  Adj Close  dxy_close       High        Low       Open  Volume
0   2025-09-11  97.540001  97.540001  98.089996  97.470001  97.790001       0
1   2025-09-12  97.550003  97.550003  97.860001  97.489998  97.519997       0
2   2025-09-15  97.300003  97.300003  97.699997  97.269997  97.629997       0
3   2025-09-16  96.629997  96.629997  97.389999  96.559998  97.339996       0
4   2025-09-17  96.870003  96.870003  97.070000  96.220001  96.620003       0
5   2025-09-18  97.349998  97.349998  97.599998  96.839996  96.930000       0
6   2025-09-19  97.639999  97.639999  97.809998  97.269997  97.339996       0
7   2025-09-22  97.330002  97.330002  97.820000  97.309998  97.650002       0
8   2025-09-23  97.260002  97.260002  97.459999  97.199997  97.320000       0
9   2025-09-24  97.870003  97.870003  97.919998  97.220001  97.239998       0
10  2025-09-25  98.550003  98.550003  98.610001  97.739998  97.820000       0
11  2025-09-26  98.150002  98.150002  98.529999  98.129997  98.5

In [3]:
import ta
from ta.momentum import RSIIndicator
from ta.trend import MACD

# Short-term (~30min)
df['rsi_6'] = RSIIndicator(df['close'], window=6).rsi()

# Classic (~1hour)
df['rsi_12'] = RSIIndicator(df['close'], window=12).rsi()

macd_indicator = MACD(close=df['close'], window_slow=26, window_fast=12, window_sign=9)

df['macd'] = macd_indicator.macd()           # MACD line
df['macd_signal'] = macd_indicator.macd_signal() # Signal line
df['macd_hist'] = macd_indicator.macd_diff()    # Histogram



In [4]:

# df['ema_9'] = df['close'].ewm(span=9, adjust=False).mean()

df['ema_21'] = df['close'].ewm(span=21, adjust=False).mean()

df['sma_50'] = df['close'].rolling(window=50).mean()

df.dropna(subset=['ema_21', 'sma_50'], inplace=True)

# potential indicator to experiment with
# ema_short - sma_long for crossovers
# close / sma_50 - 1 for capturing trends

In [5]:
from utils import buy_col, sell_col, define_activity

df['next_high'] = df['high'].shift(-1)
df['next_low'] = df['low'].shift(-1)

df['buy_index'] = df.apply(buy_col, axis=1)
df['sell_index'] = df.apply(sell_col, axis=1)

df['signal'] = df['buy_index'] - df['sell_index']
scale = df['signal'].abs().median()
df['signal_scaled'] = 0.5 * (np.tanh( (df['buy_index'] - df['sell_index']) / scale ) + 1)

df['activity'] = df.apply(define_activity, axis=1)

df.dropna(subset=['buy_index','sell_index'], inplace=True)


In [6]:
print(df)

# export_df = df[['dxy', 'rsi_6', 'rsi_12', 'ema_21', 'sma_50', 'activity']]
export_df = df.drop(columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'date', 'next_high', 'next_low', 'buy_index', 'sell_index', 'signal', 'signal_scaled'])
print(export_df)

%store export_df

               timestamp       open       high        low      close  \
49   2025-09-15 04:05:00  115436.98  115436.99  115400.00  115421.75   
50   2025-09-15 04:10:00  115421.75  115506.85  115405.81  115497.99   
51   2025-09-15 04:15:00  115498.00  115627.91  115498.00  115627.91   
52   2025-09-15 04:20:00  115627.91  116168.26  115627.90  115930.59   
53   2025-09-15 04:25:00  115930.59  116052.00  115930.59  115962.44   
...                  ...        ...        ...        ...        ...   
4197 2025-09-29 13:45:00  113299.99  113490.00  113197.50  113334.50   
4198 2025-09-29 13:50:00  113334.51  113401.90  113079.23  113217.01   
4199 2025-09-29 13:55:00  113217.01  113350.01  113100.00  113227.29   
4200 2025-09-29 14:00:00  113227.29  113437.04  113220.21  113419.01   
4201 2025-09-29 14:05:00  113419.01  113520.00  113326.72  113519.99   

         volume        date        dxy      rsi_6     rsi_12  ...   macd_hist  \
49     10.38610  2025-09-15  97.300003  42.429678  51.