In [1]:
import ccxt
import time
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone

binance = ccxt.binance()

symbol = 'BTC/USDT'
timeframe = '15m'
limit = 1000

since_dt = datetime.now(timezone.utc) - timedelta(days=7*560 + 3) # 4 weeks (+3 to avoid weekends if i fetch index, like the dxy, where they dont work on weekends, and i just remove where dxy is Nan)
since = int(since_dt.timestamp() * 1000) # second in milliseconds

all_candles = []

while True:
    candles = binance.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit) # binance api from ccxt
    if not candles:
        break
    all_candles += candles

    since = candles[-1][0] + 1 # update since to last candle timestamp + 1 ms
    time.sleep(0.5)  # avoid hitting rate limits with api

# Convert to DataFrame (table)
df = pd.DataFrame(all_candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

In [2]:
# import yfinance as yf
#
# dxy_daily = yf.download(
#     'DX-Y.NYB',
#     start=df['timestamp'].min().date() - pd.Timedelta(days=3),  # to avoid weekends
#     end=df['timestamp'].max().date() + pd.Timedelta(days=3),
#     interval='1d',
#     progress=False,
#     auto_adjust=False
# )
#
# # Reset index to move 'Date' to a column
# dxy_daily = dxy_daily.reset_index(drop=False)
#
# # Because its multilevel, make it one level (like df)
# if isinstance(dxy_daily.columns, pd.MultiIndex):
#     dxy_daily.columns = [col[0] for col in dxy_daily.columns]
#
# dxy_daily.rename(columns={'Date': 'date'}, inplace=True)
# dxy_daily.rename(columns={'Close': 'dxy_close'}, inplace=True)
#
# dxy_daily["date"] = pd.to_datetime(dxy_daily["date"]).dt.date
df["date"] = pd.to_datetime(df["timestamp"]).dt.date
#
# print(dxy_daily)
#
# df = pd.merge(df, dxy_daily[["date", "dxy_close"]], on="date", how="left")
#
# df['dxy_close'] = df['dxy_close'].ffill()
# df = df.dropna(subset=['dxy_close']).reset_index(drop=True)
# df.rename(columns={'dxy_close': 'dxy'}, inplace=True)
#
# print(dxy_daily)

In [3]:
import ta
from ta.momentum import RSIIndicator
from ta.trend import MACD

# Short-term (~30min)
df['rsi_6'] = RSIIndicator(df['close'], window=6).rsi()

# Classic (~1hour)
df['rsi_12'] = RSIIndicator(df['close'], window=12).rsi()

macd_indicator = MACD(close=df['close'], window_slow=26, window_fast=12, window_sign=9)

df['macd'] = macd_indicator.macd()                  # MACD line
df['macd_signal'] = macd_indicator.macd_signal()    # Signal line
df['macd_hist'] = macd_indicator.macd_diff()        # Histogram



In [4]:

# df['ema_9'] = df['close'].ewm(span=9, adjust=False).mean()

df['ema_21'] = df['close'].ewm(span=21, adjust=False).mean()

df['sma_50'] = df['close'].rolling(window=50).mean()

df.dropna(subset=['ema_21', 'sma_50'], inplace=True)

df['ema/sma crossover'] = df['ema_21'] - df['sma_50']

df['trends with sma'] = df['close'] / df['sma_50'] - 1

In [5]:
from utils import buy_col, sell_col, define_activity

df['next_high'] = df['high'].shift(-1)
df['next_low'] = df['low'].shift(-1)

df['price change'] = np.log(df['close'] / df['close'].shift(1))
df.dropna(subset=['price change'], inplace=True)

df['buy_index'] = df.apply(buy_col, axis=1)
df['sell_index'] = df.apply(sell_col, axis=1)

df['signal'] = df['buy_index'] - df['sell_index']
scale = df['signal'].abs().median()
df['signal_scaled'] = 0.5 * (np.tanh( (df['buy_index'] - df['sell_index']) / scale ) + 1)

df['activity'] = df.apply(define_activity, axis=1)
# df['activity'] = np.where(df['close'].shift(-5) > df['close'], 'Buy', 'Sell')

df.dropna(subset=['buy_index','sell_index'], inplace=True)


In [6]:
# Add Bollinger Bands
from ta.volatility import BollingerBands

bollinger = BollingerBands(close=df['close'], window=20, window_dev=2)
df['bollinger_hband'] = bollinger.bollinger_hband()
df['bollinger_lband'] = bollinger.bollinger_lband()
df['bollinger_mavg'] = bollinger.bollinger_mavg()
df['bollinger_bandwidth'] = bollinger.bollinger_wband()

# Add Average True Range (ATR)
from ta.volatility import AverageTrueRange

atr = AverageTrueRange(high=df['high'], low=df['low'], close=df['close'], window=14)
df['atr'] = atr.average_true_range()

# Add Bitcoin trading volume trends
df['volume_change'] = df['volume'].pct_change()
df['volume_sma_10'] = df['volume'].rolling(window=10).mean()

# Drop rows with NaN values introduced by rolling calculations
df.dropna(inplace=True)

# Print the updated DataFrame
print(df.head())

             timestamp     open     high      low    close     volume  \
69 2017-08-17 21:15:00  4346.73  4369.69  4335.98  4349.33  18.736552   
70 2017-08-17 21:30:00  4349.33  4369.69  4322.65  4354.35  13.210825   
71 2017-08-17 21:45:00  4354.03  4368.68  4309.23  4333.55  19.269546   
72 2017-08-17 22:00:00  4333.55  4349.63  4322.39  4334.32  11.280595   
73 2017-08-17 22:15:00  4325.24  4326.29  4310.00  4326.29  14.960579   

          date      rsi_6     rsi_12      macd  ...    signal  signal_scaled  \
69  2017-08-17  61.385237  54.148080 -9.825137  ... -0.003074       0.057652   
70  2017-08-17  62.929843  55.036479 -6.462856  ... -0.003336       0.046021   
71  2017-08-17  52.490243  50.604590 -5.414198  ... -0.001307       0.233702   
72  2017-08-17  52.837803  50.764705 -4.469475  ... -0.008232       0.000563   
73  2017-08-17  48.406263  48.959228 -4.318944  ...  0.008995       0.999718   

    activity  bollinger_hband  bollinger_lband  bollinger_mavg  \
69      Sell  

In [7]:
print(df)
df.set_index('timestamp', inplace=True)

# export_df = df[['dxy', 'rsi_6', 'rsi_12', 'ema_21', 'sma_50', 'activity']]
export_df_long = df.drop(columns=['date', 'next_high', 'next_low', 'buy_index', 'sell_index', 'signal', 'signal_scaled'])
print(export_df_long)

%store export_df_long

                 timestamp       open       high        low      close  \
69     2017-08-17 21:15:00    4346.73    4369.69    4335.98    4349.33   
70     2017-08-17 21:30:00    4349.33    4369.69    4322.65    4354.35   
71     2017-08-17 21:45:00    4354.03    4368.68    4309.23    4333.55   
72     2017-08-17 22:00:00    4333.55    4349.63    4322.39    4334.32   
73     2017-08-17 22:15:00    4325.24    4326.29    4310.00    4326.29   
...                    ...        ...        ...        ...        ...   
285440 2025-10-13 09:15:00  114801.34  114995.11  114781.62  114950.00   
285441 2025-10-13 09:30:00  114950.00  115169.64  114928.00  115113.08   
285442 2025-10-13 09:45:00  115113.08  115199.58  115088.67  115127.04   
285443 2025-10-13 10:00:00  115127.05  115162.00  114942.40  114986.55   
285444 2025-10-13 10:15:00  114986.56  115051.50  114823.26  114884.69   

            volume        date      rsi_6     rsi_12       macd  ...  \
69       18.736552  2017-08-17  61.3852