In [4]:
# === STEP 0: Mount Google Drive ===
from google.colab import drive
drive.mount('/content/drive')

# === STEP 1: Set save path ===
import os
import pandas as pd
import requests
import time
from datetime import datetime

save_path = "/content/drive/MyDrive/crypto-data"  # Folder must exist or be named accordingly
os.makedirs(save_path, exist_ok=True)
csv_filename = f"{save_path}/btc_fgi_merged.csv"

# === STEP 2: Fetch Fear & Greed Index (FGI) ===
fgi_url = "https://api.alternative.me/fng/?limit=0&format=json"
response = requests.get(fgi_url).json()
fgi_df = pd.DataFrame(response['data'])

fgi_df['date'] = pd.to_datetime(fgi_df['timestamp'].astype(int), unit='s')
fgi_df['fgi_value'] = fgi_df['value'].astype(int)
fgi_df = fgi_df[['date', 'fgi_value', 'value_classification']]
fgi_df = fgi_df.sort_values('date')

# === STEP 3: Fetch all Binance BTC/USDT daily candles ===
def fetch_binance_ohlcv(symbol="BTCUSDT", interval="1d", start_date="2018-02-01"):
    url = "https://api.binance.com/api/v3/klines"
    limit = 1000
    all_data = []
    start_ts = int(pd.Timestamp(start_date).timestamp() * 1000)

    while True:
        params = {
            "symbol": symbol,
            "interval": interval,
            "startTime": start_ts,
            "limit": limit
        }
        response = requests.get(url, params=params)
        data = response.json()
        if not data or "code" in data:
            break
        all_data.extend(data)
        last_ts = data[-1][0]
        start_ts = last_ts + 24 * 60 * 60 * 1000
        if len(data) < limit:
            break
        time.sleep(0.4)
    return all_data

btc_data = fetch_binance_ohlcv()

btc_df = pd.DataFrame(btc_data, columns=[
    "open_time", "open", "high", "low", "close", "volume",
    "close_time", "quote_asset_volume", "num_trades",
    "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"
])

btc_df['date'] = pd.to_datetime(btc_df['open_time'], unit='ms')
btc_df = btc_df[['date', 'open', 'high', 'low', 'close', 'volume',
                 'quote_asset_volume', 'num_trades',
                 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']]

btc_df[['open', 'high', 'low', 'close', 'volume']] = btc_df[['open', 'high', 'low', 'close', 'volume']].astype(float)

# === STEP 4: Align datasets by shared dates only ===
common_dates = set(btc_df['date']) & set(fgi_df['date'])
btc_df = btc_df[btc_df['date'].isin(common_dates)].reset_index(drop=True)
fgi_df = fgi_df[fgi_df['date'].isin(common_dates)].reset_index(drop=True)

merged_df = pd.merge(btc_df, fgi_df, on='date', how='inner')

# === STEP 5: Save to Drive ===
merged_df.to_csv(csv_filename, index=False)

# === STEP 6: Output Summary ===
print(f"✅ Saved merged dataset to: {csv_filename}")
print(f"✅ Merged dataset shape: {merged_df.shape}")
print("\n📊 Last 5 rows:")
print(merged_df.tail())


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Saved merged dataset to: /content/drive/MyDrive/crypto-data/btc_fgi_merged.csv
✅ Merged dataset shape: (2622, 12)

📊 Last 5 rows:
           date      open      high       low     close       volume  \
2617 2025-04-06  83537.99  83817.63  77153.83  78430.00  27942.71436   
2618 2025-04-07  78430.00  81243.58  74508.00  79163.24  78387.53089   
2619 2025-04-08  79163.24  80867.99  76239.90  76322.42  35317.32063   
2620 2025-04-09  76322.42  83588.00  74620.00  82615.22  75488.28772   
2621 2025-04-10  82615.22  82753.21  80634.26  80977.69  19667.56169   

       quote_asset_volume  num_trades taker_buy_base_asset_volume  \
2617  2239085051.15296450     3948493              12493.84490000   
2618  6084614024.15064720     9736596              39075.84274000   
2619  2775277777.10246080     5512751              15704.73031000   
2620  5980231256.22249600     

In [5]:
merged_df.dtypes


Unnamed: 0,0
date,datetime64[ns]
open,float64
high,float64
low,float64
close,float64
volume,float64
quote_asset_volume,object
num_trades,int64
taker_buy_base_asset_volume,object
taker_buy_quote_asset_volume,object


In [6]:
# Convert problematic object columns to float
cols_to_convert = ['quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']
for col in cols_to_convert:
    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')

# Convert sentiment to numeric category
merged_df['sentiment_code'] = merged_df['value_classification'].astype('category').cat.codes
# Example: 'Extreme Fear' → 0, 'Fear' → 1, ..., 'Extreme Greed' → 4

# Drop rows with NaNs (if any from parsing)
merged_df.dropna(inplace=True)

# Drop unused columns for modeling
final_df = merged_df.drop(['date', 'value_classification'], axis=1)

# Confirm cleaned types
print(final_df.dtypes)


open                            float64
high                            float64
low                             float64
close                           float64
volume                          float64
quote_asset_volume              float64
num_trades                        int64
taker_buy_base_asset_volume     float64
taker_buy_quote_asset_volume    float64
fgi_value                         int64
sentiment_code                     int8
dtype: object


In [9]:
print(merged_df.tail())

           date      open      high       low     close       volume  \
2617 2025-04-06  83537.99  83817.63  77153.83  78430.00  27942.71436   
2618 2025-04-07  78430.00  81243.58  74508.00  79163.24  78387.53089   
2619 2025-04-08  79163.24  80867.99  76239.90  76322.42  35317.32063   
2620 2025-04-09  76322.42  83588.00  74620.00  82615.22  75488.28772   
2621 2025-04-10  82615.22  82753.21  80634.26  80977.69  19667.56169   

      quote_asset_volume  num_trades  taker_buy_base_asset_volume  \
2617        2.239085e+09     3948493                  12493.84490   
2618        6.084614e+09     9736596                  39075.84274   
2619        2.775278e+09     5512751                  15704.73031   
2620        5.980231e+09     7741252                  39491.62958   
2621        1.609042e+09     2625820                   9873.92045   

      taker_buy_quote_asset_volume  fgi_value value_classification  \
2617                  1.001460e+09         34                 Fear   
2618        

In [10]:
merged_df.dtypes

Unnamed: 0,0
date,datetime64[ns]
open,float64
high,float64
low,float64
close,float64
volume,float64
quote_asset_volume,float64
num_trades,int64
taker_buy_base_asset_volume,float64
taker_buy_quote_asset_volume,float64
