In [70]:
import ccxt
import time
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone

binance = ccxt.binance()

symbol = 'BTC/USDT'
timeframe = '5m'
limit = 1000

since_dt = datetime.now(timezone.utc) - timedelta(days=7*3 + 3) # 3 weeks (+3 to avoid weekends if i fetch index, like the dxy, where they dont work on weekends, and i just remove where dxy is Nan)
since = int(since_dt.timestamp() * 1000) # second in milliseconds

all_candles = []

while True:
    candles = binance.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit) # binance api from ccxt
    if not candles:
        break
    all_candles += candles

    since = candles[-1][0] + 1 # update since to last candle timestamp + 1 ms
    time.sleep(0.5)  # avoid hitting rate limits with api

# Convert to DataFrame (table)
df = pd.DataFrame(all_candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

In [72]:
import yfinance as yf

dxy_daily = yf.download(
    'DX-Y.NYB',
    start=df['timestamp'].min().date() - pd.Timedelta(days=3),  # to avoid weekends
    end=df['timestamp'].max().date() + pd.Timedelta(days=3),
    interval='1d',
    progress=False,
    auto_adjust=False
)

# Reset index to move 'Date' to a column
dxy_daily = dxy_daily.reset_index(drop=False)

# Because its multilevel, make it one level (like df)
if isinstance(dxy_daily.columns, pd.MultiIndex):
    dxy_daily.columns = [col[0] for col in dxy_daily.columns]

dxy_daily.rename(columns={'Date': 'date'}, inplace=True)
dxy_daily.rename(columns={'Close': 'dxy_close'}, inplace=True)

dxy_daily["date"] = pd.to_datetime(dxy_daily["date"]).dt.date
df["date"] = pd.to_datetime(df["timestamp"]).dt.date

print(dxy_daily)

df = pd.merge(df, dxy_daily[["date", "dxy_close"]], on="date", how="left")

df['dxy_close'] = df['dxy_close'].ffill()
df = df.dropna(subset=['dxy_close']).reset_index(drop=True)
df.rename(columns={'dxy_close': 'dxy'}, inplace=True)

print(dxy_daily)

2025-08-28
          date  Adj Close  dxy_close       High        Low       Open  Volume
0   2025-08-25  98.430000  98.430000  98.540001  97.709999  97.730003       0
1   2025-08-26  98.230003  98.230003  98.559998  98.089996  98.470001       0
2   2025-08-27  98.230003  98.230003  98.730003  98.160004  98.250000       0
3   2025-08-28  97.809998  97.809998  98.230003  97.739998  98.120003       0
4   2025-08-29  97.769997  97.769997  98.129997  97.690002  97.870003       0
5   2025-09-02  98.400002  98.400002  98.599998  97.620003  97.650002       0
6   2025-09-03  98.139999  98.139999  98.639999  98.010002  98.400002       0
7   2025-09-04  98.349998  98.349998  98.440002  98.080002  98.150002       0
8   2025-09-05  97.769997  97.769997  98.260002  97.430000  98.239998       0
9   2025-09-08  97.449997  97.449997  97.940002  97.419998  97.790001       0
10  2025-09-09  97.790001  97.790001  97.820000  97.250000  97.379997       0
11  2025-09-10  97.779999  97.779999  97.930000  97.5

In [73]:
def buy_col_ranged(row):
    if pd.isna(row.next_high) or pd.isna(row.next_low):
        return np.nan
    if row.next_low >= row.high:
        return 2
    elif row.next_high > row.high:
        return 1
    elif row.next_high > row.low:
        return 0
    else:
        return -1
def sell_col_ranged(row):
    if pd.isna(row.next_high) or pd.isna(row.next_low):
        return np.nan
    if row.next_high <= row.low:
        return 2
    elif row.next_low < row.low:
        return 1
    elif row.next_low < row.high:
        return 0
    else:
        return -1

In [74]:
def buy_col(row):
    if pd.isna(row.next_high) or pd.isna(row.next_low):
        return np.nan
    return (row.next_high - row.high) / row.high

def sell_col(row):
    if pd.isna(row.next_high) or pd.isna(row.next_low):
        return np.nan
    return (row.low - row.next_low) / row.low

def define_activity(row):
    if row.signal_scaled < 0.2:
        return 'Strong Sell'
    elif row.signal_scaled < 0.4:
        return 'Sell'
    elif row.signal_scaled < 0.6:
        return 'Hold'
    elif row.signal_scaled < 0.8:
        return 'Buy'
    else:
        return 'Strong Buy'

In [75]:

df['next_high'] = df['high'].shift(-1)
df['next_low'] = df['low'].shift(-1)

df['buy_index'] = df.apply(buy_col, axis=1)
df['sell_index'] = df.apply(sell_col, axis=1)

df['signal'] = df['buy_index'] - df['sell_index']
scale = df['signal'].abs().median()
df['signal_scaled'] = 0.5 * (np.tanh( (df['buy_index'] - df['sell_index']) / scale ) + 1)

df['activity'] = df.apply(define_activity, axis=1)

df.dropna(subset=['buy_index','sell_index'], inplace=True)


In [76]:
df = df[['date', 'timestamp', 'dxy', 'volume', 'open', 'close', 'high', 'next_high', 'low', 'next_low', 'buy_index', 'sell_index', 'signal', 'signal_scaled', 'activity']]
print(df)

            date           timestamp        dxy    volume       open  \
0     2025-08-28 2025-08-28 10:05:00  97.809998  43.93834  112840.98   
1     2025-08-28 2025-08-28 10:10:00  97.809998  26.39111  112969.92   
2     2025-08-28 2025-08-28 10:15:00  97.809998  14.12757  113019.38   
3     2025-08-28 2025-08-28 10:20:00  97.809998  12.62388  112911.01   
4     2025-08-28 2025-08-28 10:25:00  97.809998  12.10310  112888.01   
...          ...                 ...        ...       ...        ...   
6906  2025-09-21 2025-09-21 09:35:00  97.647003  29.01007  115406.28   
6907  2025-09-21 2025-09-21 09:40:00  97.647003  20.10248  115409.81   
6908  2025-09-21 2025-09-21 09:45:00  97.647003  34.06336  115516.29   
6909  2025-09-21 2025-09-21 09:50:00  97.647003  18.06040  115568.26   
6910  2025-09-21 2025-09-21 09:55:00  97.647003  37.62566  115597.98   

          close       high  next_high        low   next_low  buy_index  \
0     112969.92  112995.96  113037.98  112840.98  112969.91  