# 0_setup.ipynb

This code aligns EEG recordings with Bitcoin price for every 1-minute.

In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import yfinance as yf
import mne
import backtrader as bt
import requests, time
from datetime import datetime, timedelta

# 1. Load & clean EEG
* Convert epoch timestamps --> UTC timezone pandas index
* Fill missing values with forward/backward fill

In [2]:
## read EEG CSV
EEG_PATH = Path("data") / "waves_metrics.csv"
eeg = pd.read_csv(EEG_PATH)

# remove unnamed columns
unnamed_cols = [c for c in eeg.columns if c.lower().startswith("unnamed")]
if unnamed_cols:
    eeg = eeg.drop(columns=unnamed_cols)

# standardize column names
eeg = eeg.rename(columns=lambda c: c.strip())
alias_map = {
    "HighBetta": "HighBeta",
    "High-Betta": "HighBeta",
    "High-Beta": "HighBeta",
    "LowBetta": "LowBeta",
    "Low-Beta": "LowBeta",
    "Low‑Beta": "LowBeta",
    "Low-Gamma": "LowGamma",
    "Low‑Gamma": "LowGamma",
}
eeg = eeg.rename(columns=alias_map)

In [3]:
# convert timestamp to UTC (standardize timezone)
eeg["ts"] = pd.to_datetime(eeg["Ts"], unit="s", utc=True)
eeg = eeg.set_index("ts").sort_index()

In [4]:
# fill missing values (forward/backward fill, then drop)
na_tot = eeg.isna().sum().sum()
if na_tot:
    print(f"Found {na_tot:,} NaNs in EEG — applying forward/backward fill …")
    eeg = eeg.ffill().bfill()
    na_after = eeg.isna().sum().sum()
    if na_after:
        print(f"   {na_after:,} NaNs remain → dropping those rows.")
        eeg = eeg.dropna()

Found 1,076 NaNs in EEG — applying forward/backward fill …


In [5]:
# check
print(f"Loaded EEG: {len(eeg):,} rows, {eeg.shape[1]} cols")

Loaded EEG: 19,350 rows, 16 cols


# 2. Download & clean Bitcoin data

In [6]:
# align time range with EEG
EEG_START: datetime = eeg.index.min()
EEG_END: datetime = eeg.index.max()
print(f"Time range: {EEG_START} to {EEG_END}")

Time range: 2022-01-24 05:38:30+00:00 to 2022-02-25 02:30:02+00:00


In [7]:
def fetch_binance_us_1min(symbol: str, start_dt: datetime, end_dt: datetime) -> pd.DataFrame:
    # pull 1-minute lines for "BTCUSDT" from Binance.US
    url = "https://api.binance.us/api/v3/klines"
    ms   = lambda dt: int(dt.timestamp() * 1000)

    frames = []
    cur = start_dt
    while cur < end_dt:
        # Binance pulls 1000 rows at once (1000 minutes = 5 days)
        chunk_end = min(cur + timedelta(days=5), end_dt)

        # request query
        params = dict(symbol=symbol, interval="1m", startTime=ms(cur), endTime=ms(chunk_end), limit=1000,)
        r = requests.get(url, params=params, timeout=10)
        r.raise_for_status()
        raw = r.json()
        if not raw:     # market closed
            break

        # convert JSON into DataFrame
        df = pd.DataFrame(raw, columns=[
            "open_time","Open","High","Low","Close","Volume",
            "close_time","q","n","taker_base","taker_quote","ignore"
        ])

        # timestamps in UTC datetimes
        df["open_time"] = pd.to_datetime(df["open_time"], unit="ms", utc=True)
        df.set_index("open_time", inplace=True)

        # filter columns
        df = df[["Open","High","Low","Close","Volume"]].astype(float)
        frames.append(df)

        # advance 1ms past last close_time
        cur = pd.to_datetime(raw[-1][6], unit="ms", utc=True) + timedelta(milliseconds=1)
        time.sleep(0.2)     # public API, stay under 5/s limit

    # combine chunks together, sort, filter window, drop duplicates
    btc_1m = (
        pd.concat(frames).sort_index()
        .loc[start_dt:end_dt]
        .loc[lambda df_: ~df_.index.duplicated()]
    )
    return btc_1m

btc_1m = fetch_binance_us_1min("BTCUSDT", EEG_START, EEG_END)

In [8]:
# check
print(f"Loaded BTC: {len(btc_1m):,} rows, {btc_1m.shape[1]} cols")

Loaded BTC: 45,858 rows, 5 cols


# 3. Align & save

In [9]:
# check both indexes in UTC
eeg.index = eeg.index.tz_localize("UTC") if eeg.index.tz is None else eeg.index.tz_convert("UTC")
btc_1m.index = btc_1m.index.tz_localize("UTC") if btc_1m.index.tz is None else btc_1m.index.tz_convert("UTC")

# forward-fill
btc_1s = (btc_1m["Close"].resample("1S").ffill(60).reindex(eeg.index))

  btc_1s = (btc_1m["Close"].resample("1S").ffill(60).reindex(eeg.index))


In [10]:
## keep only full minute (second==0)
btc_full_min = btc_1s[btc_1s.index.second == 0]
eeg_full_min = eeg[eeg.index.second == 0]

# check intersection, identical time per row
common_idx = eeg_full_min.index.intersection(btc_full_min.index)
eeg_full_min = eeg_full_min.loc[common_idx]
btc_full_min = btc_full_min.loc[common_idx]

In [11]:
# save files
btc_full_min.to_csv("data/raw/btc_1min.csv")
eeg_full_min.to_csv("data/raw/eeg_1min.csv")