In [1]:
# import packages
import pandas as pd
import numpy as np
import matplotlib as plt
from binance.client import Client 

In [None]:
# establish anonymous connection (public endpoints only)
client = Client()

# obtains BTC/USDT funding rate data
funding = client.futures_funding_rate(symbol = "BTCUSDT", limit = 1000)

# creates a df with each row representing an 8-hour funding event
funding_df = pd.DataFrame(funding)

# rounding down timestamps to enable merge
funding_df["fundingTime"] = pd.to_datetime(funding_df["fundingTime"], unit = "ms")
funding_df["timestamp"] = funding_df["fundingTime"].dt.floor("8h")

# formating and defining final df
funding_df["fundingRate"] = funding_df["fundingRate"].astype(float)
funding_df = funding_df[["timestamp","fundingRate"]]
funding_df

Unnamed: 0,timestamp,fundingRate
0,2025-11-11 00:00:00,0.000028
1,2025-11-11 08:00:00,-0.000028
2,2025-11-11 16:00:00,0.000058
3,2025-11-12 00:00:00,0.000025
4,2025-11-12 08:00:00,0.000007
...,...,...
195,2026-01-15 00:00:00,-0.000004
196,2026-01-15 08:00:00,0.000017
197,2026-01-15 16:00:00,0.000015
198,2026-01-16 00:00:00,0.000073


In [30]:
# fetches mark price data (OHLC) for BTC/USDT
mark_klines = client.futures_mark_price_klines(symbol = "BTCUSDT", interval = "8h", limit = 200)

# creates a mark price df and labels columns
price_df = pd.DataFrame(mark_klines, columns = 
                       ["open_time", "open", "high", "low", "close_price",
                        "ignore1", "close_time", "ignore2", "ignore3", "ignore4",
                        "ignore5", "ignore6"])

## price_df = price_df.drop(columns = ["ignore1", "ignore2", "ignore3", "ignore4", "ignore5", "ignore6", "open", "high", "low"])

# format column types and define df
price_df["timestamp"] = pd.to_datetime(price_df["open_time"], unit = "ms")
price_df["close_price"] = price_df["close_price"].astype(float)

price_df = price_df[["timestamp","close_price"]]

price_df

Unnamed: 0,timestamp,close_price
0,2025-11-11 00:00:00,104737.000000
1,2025-11-11 08:00:00,103431.508949
2,2025-11-11 16:00:00,103007.545812
3,2025-11-12 00:00:00,103070.028449
4,2025-11-12 08:00:00,102175.668333
...,...,...
195,2026-01-15 00:00:00,96606.597710
196,2026-01-15 08:00:00,96738.752233
197,2026-01-15 16:00:00,95567.400000
198,2026-01-16 00:00:00,95608.200000


In [34]:
# generate final merged and sorted df and calc log_returns

BTCUSDT_df = pd.merge(funding_df,price_df, on = "timestamp", how = "inner").sort_values("timestamp")
BTCUSDT_df["log_returns"] = np.log(BTCUSDT_df["close_price"] / BTCUSDT_df["close_price"].shift(1))

BTCUSDT_df = BTCUSDT_df.dropna()

BTCUSDT_df

Unnamed: 0,timestamp,fundingRate,close_price,log_returns
1,2025-11-11 08:00:00,-0.000028,103431.508949,-0.012543
2,2025-11-11 16:00:00,0.000058,103007.545812,-0.004107
3,2025-11-12 00:00:00,0.000025,103070.028449,0.000606
4,2025-11-12 08:00:00,0.000007,102175.668333,-0.008715
5,2025-11-12 16:00:00,0.000058,101605.800000,-0.005593
...,...,...,...,...
195,2026-01-15 00:00:00,-0.000004,96606.597710,-0.003114
196,2026-01-15 08:00:00,0.000017,96738.752233,0.001367
197,2026-01-15 16:00:00,0.000015,95567.400000,-0.012182
198,2026-01-16 00:00:00,0.000073,95608.200000,0.000427


In [None]:
a