In [53]:
# imports
import pandas as pd
# import numpy as np
import json
import yfinance as yf
import requests
from datetime import datetime, timedelta
import time

### Data inputs 

Cryptocurrency markets
- BTC Fead/Greed index
- BTC perpertual funding rate
- BTC exchange volume
- BTC daily closing price
- BTC daily exchange value
- BTC on chain active addresses

Macro
- US Dollar Index 
- M2 money supply

## Data Loading

In [None]:
# Binance data fetching functions

# Binance API URLs
funding_rate_url = "https://dapi.binance.com/dapi/v1/fundingRate"
spot_volume_url = "https://api.binance.com/api/v3/klines"
# volume_url = "https://fapi.binance.com/fapi/v1/klines"


# Function to fetch funding rates
def fetch_historical_funding_rates(symbol, start_date, end_date):
    start_timestamp = int(start_date.timestamp() * 1000)
    end_timestamp = int(end_date.timestamp() * 1000)
    all_data = []

    while start_timestamp < end_timestamp:
        params = {
            "symbol": symbol,
            "startTime": start_timestamp,
            "endTime": end_timestamp,
            "limit": 1000
        }
        response = requests.get(funding_rate_url, params=params)
        if response.status_code == 200:
            data = response.json()
            if data:  # Check if data is not empty
                all_data.extend(data)
                start_timestamp = data[-1]["fundingTime"] + 1  # Move to the next timestamp
            else:
                print("No more data available.")
                break  # Exit the loop if no more data is available
        else:
            print(f"Error fetching funding rates: {response.status_code}")
            break

    return pd.DataFrame(all_data)

def fetch_historical_volume(symbol, start_date, end_date):
    """
    Fetch historical trading volume from Binance spot market
    
    Parameters:
    -----------
    symbol : str
        Trading pair (e.g., 'BTCUSDT')
    start_date : datetime
        Start date for data collection
    end_date : datetime
        End date for data collection
    """
    
    all_data = []
    current_start_date = start_date
    
    # Binance API limits
    MAX_LIMIT = 200  # Maximum records per request
    RATE_LIMIT_PAUSE = 0.1  # Seconds to pause between requests
    
    while current_start_date < end_date:
        # Calculate end time for current batch
        # Limit to 1000 1-hour intervals
        batch_end_date = min(
            current_start_date + timedelta(days=MAX_LIMIT),
            end_date
        )
        
        params = {
            "symbol": symbol.upper(),  # Ensure uppercase
            "interval": "1d",
            "startTime": int(current_start_date.timestamp() * 1000),
            "endTime": int(batch_end_date.timestamp() * 1000),
            "limit": MAX_LIMIT
        }
        
        try:
            response = requests.get(spot_volume_url, params=params)
            response.raise_for_status()  # Raise exception for bad status codes
            
            data = response.json()
            
            if data:
                all_data.extend(data)
                # Update start date for next batch
                last_timestamp = data[-1][0]
                current_start_date = datetime.fromtimestamp(last_timestamp / 1000) + timedelta(hours=1)
            else:
                print(f"No data returned for period: {current_start_date} to {batch_end_date}")
                current_start_date = batch_end_date
            
            # Respect rate limits
            time.sleep(RATE_LIMIT_PAUSE)
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            if response.status_code == 429:  # Rate limit exceeded
                print("Rate limit exceeded, waiting 60 seconds...")
                time.sleep(60)
                continue
            elif response.status_code == 418:  # IP ban
                raise Exception("IP banned by Binance API")
            else:
                raise
    
    if not all_data:
        return pd.DataFrame(columns=["volume"])
    
    # Convert to DataFrame
    df = pd.DataFrame(all_data, columns=[
        "timestamp", "open", "high", "low", "close", "volume", "close_time",
        "quote_asset_volume", "number_of_trades", "taker_buy_base_volume",
        "taker_buy_quote_volume", "ignore"
    ])
    
    # Convert timestamp to datetime and set as index
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
    df.set_index("timestamp", inplace=True)
    
    # Convert volume to float
    df["volume"] = df["volume"].astype(float)
    
    return df[["volume"]].sort_index()


In [28]:
# Fear Greed Index

def fetch_fear_greed_history():
    url = "https://api.alternative.me/fng/"
    params = {"limit": 0, "format": "json"}
    
    response = requests.get(url, params=params)
    data = response.json()
    
    # Create DataFrame from API response
    df = pd.DataFrame(data["data"])
    
    # Convert timestamp to datetime (assuming it's in seconds, not milliseconds)
    df["timestamp"] = pd.to_datetime(df["timestamp"].astype(int), unit='s')
    
    # Convert value column to numeric
    df["value"] = pd.to_numeric(df["value"])
    
    # Set timestamp as index and sort
    df.set_index("timestamp", inplace=True)
    df.sort_index(inplace=True)
    
    # Return data from 2018 onwards
    return df.loc["2018":]

df_fear_greed = fetch_fear_greed_history()
print('Start date:',df_fear_greed.index.min())
print('End date:',df_fear_greed.index.max())
print(df_fear_greed.head())

Start date: 2018-02-01 00:00:00
End date: 2024-12-03 00:00:00
            value value_classification time_until_update
timestamp                                               
2018-02-01     30                 Fear               NaN
2018-02-02     15         Extreme Fear               NaN
2018-02-03     40                 Fear               NaN
2018-02-04     24         Extreme Fear               NaN
2018-02-05     11         Extreme Fear               NaN


In [63]:
# BTC Volumes Binance

start_date = datetime(2018, 1, 1)
end_date = datetime.now()
volume_df = fetch_historical_volume("BTCUSDT", start_date, end_date)

volume_df

Unnamed: 0_level_0,volume
timestamp,Unnamed: 1_level_1
2018-01-02,20078.092111
2018-01-03,15905.667639
2018-01-04,21329.649574
2018-01-05,23251.491125
2018-01-06,18571.457508
...,...
2024-11-29,27701.782310
2024-11-30,14503.833060
2024-12-01,16938.604520
2024-12-02,37958.669810


In [15]:
# BTC exchange volume Blockchain.com
filepath = 'data/Exchange Trade Volume.json'

with open(filepath) as f:
    data = json.load(f)

volume_df = pd.DataFrame(data['trade-volume'])
price_df = pd.DataFrame(data['market-price'])

# Filter data after 1360540800000
volume_df = volume_df[volume_df['x'] >= 1360540800000]
price_df = price_df[price_df['x'] >= 1360540800000]

volume_df['x'] = pd.to_datetime(volume_df['x'], unit='ms')
price_df['x'] = pd.to_datetime(price_df['x'], unit='ms')

volume_df.columns = ['date', 'volume']
price_df.columns = ['date', 'price']

df_exchange_volume = pd.merge(volume_df, price_df, on='date', how='outer')

print('Start date:',df_exchange_volume['date'].min())
print('End date:',df_exchange_volume['date'].max())

df_exchange_volume.tail()

Start date: 2013-02-11 00:00:00
End date: 2024-12-02 00:00:00


Unnamed: 0,date,volume,price
1074,2024-11-16,919109500.0,91063.24
1075,2024-11-20,879056100.0,92369.46
1076,2024-11-24,387213300.0,97776.94
1077,2024-11-28,754305400.0,95956.99
1078,2024-12-02,183654500.0,97273.62


In [74]:
# BTC Binance Perp Funding rates
# funding_rates_df['fundingTime'] = pd.to_datetime(funding_rates_df['fundingTime'], unit='ms')
funding_rates_df = funding_rates_df.sort_values('fundingTime')

print('Start date:', funding_rates_df['fundingTime'].min())
print('End date:', funding_rates_df['fundingTime'].max())

funding_rates_df.head()

Start date: 2020-08-10 16:00:00
End date: 2024-12-03 16:00:00


Unnamed: 0,symbol,fundingTime,fundingRate,markPrice
0,BTCUSD_PERP,2020-08-10 16:00:00.000,0.0001,
1,BTCUSD_PERP,2020-08-11 00:00:00.000,0.0001,
2,BTCUSD_PERP,2020-08-11 08:00:00.008,0.0001,
3,BTCUSD_PERP,2020-08-11 16:00:00.000,0.0001,
4,BTCUSD_PERP,2020-08-12 00:00:00.000,0.0001,


In [140]:
# M2 Supply data
m2_supply_df = pd.read_csv('data/M2SL Data.csv')
m2_supply_df['DATE'] = pd.to_datetime(m2_supply_df['DATE'])
print('Start date:', m2_supply_df['DATE'].min())
print('End date:', m2_supply_df['DATE'].max())
m2_supply_df

Start date: 2017-12-01 00:00:00
End date: 2024-10-01 00:00:00


Unnamed: 0,DATE,M2SL
0,2017-12-01,13860.3
1,2018-01-01,13869.2
2,2018-02-01,13907.3
3,2018-03-01,13966.4
4,2018-04-01,13989.3
...,...,...
78,2024-06-01,21020.1
79,2024-07-01,21039.4
80,2024-08-01,21141.3
81,2024-09-01,21222.7


In [19]:
# Gold futures data Yahoo Finance
gold = yf.download("GC=F", start="2018-01-01", end=pd.Timestamp.today())
gold = gold.reset_index()
gold = gold.rename(columns={'Date': 'Date', 'Close': 'Gold_Price'})
gold = gold[['Date', 'Gold_Price']].copy()

print('Start date:', gold['Date'].min().strftime('%Y-%m-%d'))
print('End date:', gold['Date'].max().strftime('%Y-%m-%d'))
gold.head(3)

[*********************100%***********************]  1 of 1 completed

Start date: 2018-01-02
End date: 2024-12-02





Price,Date,Gold_Price
Ticker,Unnamed: 1_level_1,GC=F
0,2018-01-02,1313.699951
1,2018-01-03,1316.199951
2,2018-01-04,1319.400024


In [66]:
# Gold trust shares data Yahoo Finance
gold_etf_spot = yf.download("GLD", start="2018-01-01", end=pd.Timestamp.today())
gold_etf_spot = gold_etf_spot.reset_index()
gold_etf_spot = gold_etf_spot.rename(columns={'Date': 'Date', 'Close': 'Gold_Price'})
gold_etf_spot = gold_etf_spot[['Date', 'Gold_Price']].copy()

print('Start date:', gold_etf_spot['Date'].min().strftime('%Y-%m-%d'))
print('End date:', gold_etf_spot['Date'].max().strftime('%Y-%m-%d'))
gold_etf_spot.head(3)

[*********************100%***********************]  1 of 1 completed

Start date: 2018-01-02
End date: 2024-12-02





Price,Date,Gold_Price
Ticker,Unnamed: 1_level_1,GLD
0,2018-01-02,125.150002
1,2018-01-03,124.82
2,2018-01-04,125.459999


In [111]:
# Bitcoin spot price data from Yahoo Finance
btc_spot = yf.download("BTC-USD", start="2018-01-01", end=pd.Timestamp.today())
btc_spot = btc_spot.reset_index()
btc_spot = btc_spot.rename(columns={'Date': 'Date', 'Close': 'BTC_Price'})
btc_spot = btc_spot[['Date', 'BTC_Price']].copy()

print('Start date:', btc_spot['Date'].min().strftime('%Y-%m-%d'))
print('End date:', btc_spot['Date'].max().strftime('%Y-%m-%d'))
btc_spot.head(3)

[*********************100%***********************]  1 of 1 completed

Start date: 2018-01-01
End date: 2024-12-02





Price,Date,BTC_Price
Ticker,Unnamed: 1_level_1,BTC-USD
0,2018-01-01,13657.200195
1,2018-01-02,14982.099609
2,2018-01-03,15201.0


In [113]:
# US Dollar Index
dollar_index_df = pd.read_csv('data/US Dollar Index Historical Data.csv')
dollar_index_df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,12/03/2024,106.09,106.48,106.60,106.09,,-0.33%
1,12/02/2024,106.45,105.78,106.73,105.82,,0.67%
2,11/29/2024,105.74,105.95,106.13,105.61,,-0.38%
3,11/28/2024,106.14,106.16,106.42,106.10,,0.05%
4,11/27/2024,106.08,106.82,106.92,105.86,,-0.87%
...,...,...,...,...,...,...,...
1779,02/07/2018,90.25,89.63,90.40,89.47,,0.75%
1780,02/06/2018,89.58,89.66,90.03,89.37,,0.03%
1781,02/05/2018,89.55,89.35,89.70,89.02,,0.40%
1782,02/02/2018,89.19,88.64,89.42,88.59,,0.59%


# DataFrame merging

In [130]:
merge_fear_greed_df = df_fear_greed.copy()
merge_fear_greed_df = merge_fear_greed_df[['value','value_classification']]
merge_fear_greed_df.columns = ['fear_greed_value', 'fear_greed_classification']
merge_fear_greed_df.index = pd.to_datetime(merge_fear_greed_df.index).date
merge_fear_greed_df.index.name = 'timestamp'
merge_fear_greed_df.head(3)

Unnamed: 0_level_0,fear_greed_value,fear_greed_classification
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-02-01,30,Fear
2018-02-02,15,Extreme Fear
2018-02-03,40,Fear


In [128]:
merge_volume_df = volume_df.copy()
merge_volume_df.columns = ['exchange_volume_btc']
merge_volume_df.index = pd.to_datetime(merge_volume_df.index).date
merge_volume_df.index.name = 'timestamp'
merge_volume_df.head(3)

Unnamed: 0_level_0,exchange_volume_btc
timestamp,Unnamed: 1_level_1
2018-01-02,20078.092111
2018-01-03,15905.667639
2018-01-04,21329.649574


In [96]:
merge_funding_rates_df = funding_rates_df.copy()
merge_funding_rates_df = merge_funding_rates_df[['fundingTime','fundingRate']]

# Convert fundingTime to datetime and aggregate by day
merge_funding_rates_df['fundingTime'] = pd.to_datetime(merge_funding_rates_df['fundingTime'])
merge_funding_rates_df['fundingRate'] = pd.to_numeric(merge_funding_rates_df['fundingRate'], errors='coerce')

merge_funding_rates_df = merge_funding_rates_df.groupby(merge_funding_rates_df['fundingTime'].dt.date)['fundingRate'].mean().reset_index()

# Rename the date column to timestamp and set as index
merge_funding_rates_df = merge_funding_rates_df.rename(columns={'fundingTime': 'timestamp', 'fundingRate': 'perp_funding_rate_btc'})
merge_funding_rates_df = merge_funding_rates_df.set_index('timestamp')

merge_funding_rates_df.head(3)

Unnamed: 0_level_0,perp_funding_rate_btc
timestamp,Unnamed: 1_level_1
2020-08-10,0.0001
2020-08-11,0.0001
2020-08-12,7e-05


In [147]:
merge_m2_supply_df = m2_supply_df.copy()
merge_m2_supply_df.columns = ['timestamp', 'm2_supply']
# Convert to datetime with Y-m-d format
merge_m2_supply_df['timestamp'] = pd.to_datetime(merge_m2_supply_df['timestamp']).dt.date
# Set timestamp as index
merge_m2_supply_df = merge_m2_supply_df.set_index('timestamp')
merge_m2_supply_df.tail(3)

Unnamed: 0_level_0,m2_supply
timestamp,Unnamed: 1_level_1
2024-08-01,21141.3
2024-09-01,21222.7
2024-10-01,21311.2


In [148]:
# Create copy of original DataFrame
merge_m2_supply_df = m2_supply_df.copy()
merge_m2_supply_df.columns = ['timestamp', 'm2_supply']

# Convert to datetime with Y-m-d format
merge_m2_supply_df['timestamp'] = pd.to_datetime(merge_m2_supply_df['timestamp']).dt.date

# Set timestamp as index
merge_m2_supply_df = merge_m2_supply_df.set_index('timestamp')

# Calculate month-over-month percentage change
merge_m2_supply_df['m2_supply_month_pct_change'] = (
    merge_m2_supply_df['m2_supply'].pct_change()  # Calculate percentage change
    * 100  # Convert to percentage
)

# Create complete date range
date_range = pd.date_range(start=merge_m2_supply_df.index.min(), 
                          end=merge_m2_supply_df.index.max(), 
                          freq='D')

# Reindex to get all dates and forward fill the values within each month
merge_m2_supply_df = merge_m2_supply_df.reindex(date_range)
merge_m2_supply_df['m2_supply'] = merge_m2_supply_df['m2_supply'].ffill()
merge_m2_supply_df['m2_supply_month_pct_change'] = merge_m2_supply_df['m2_supply_month_pct_change'].ffill()

# Convert index back to date type
merge_m2_supply_df.index = merge_m2_supply_df.index.date
merge_m2_supply_df.index.name = 'timestamp'

merge_m2_supply_df.tail(3)

Unnamed: 0_level_0,m2_supply,m2_supply_month_pct_change
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-09-29,21222.7,0.385028
2024-09-30,21222.7,0.385028
2024-10-01,21311.2,0.417006


In [157]:
merge_gold_etf_spot_df = gold_etf_spot.copy()
merge_gold_etf_spot_df.columns = ['timestamp', 'gld_etf_price_gold']
merge_gold_etf_spot_df['timestamp'] = pd.to_datetime(merge_gold_etf_spot_df['timestamp']).dt.date
merge_gold_etf_spot_df = merge_gold_etf_spot_df.set_index('timestamp')

# Create complete date range and interpolate
date_range = pd.date_range(start=merge_gold_etf_spot_df.index.min(), 
                         end=merge_gold_etf_spot_df.index.max(), 
                         freq='D')
merge_gold_etf_spot_df = merge_gold_etf_spot_df.reindex(date_range).interpolate(method='linear')
merge_gold_etf_spot_df.index = merge_gold_etf_spot_df.index.date
merge_gold_etf_spot_df.index.name = 'timestamp'
merge_gold_etf_spot_df.head(3)

Unnamed: 0_level_0,gld_etf_price_gold
timestamp,Unnamed: 1_level_1
2018-01-02,125.150002
2018-01-03,124.82
2018-01-04,125.459999


In [152]:
merge_gold_futures_df = gold.copy()
merge_gold_futures_df.columns = ['timestamp', 'futures_price_gold']
merge_gold_futures_df['timestamp'] = pd.to_datetime(merge_gold_futures_df['timestamp']).dt.date
# Set timestamp as index
merge_gold_futures_df = merge_gold_futures_df.set_index('timestamp')

# Create complete date range and interpolate
date_range = pd.date_range(start=merge_gold_futures_df.index.min(), 
                          end=merge_gold_futures_df.index.max(), 
                          freq='D')
merge_gold_futures_df = merge_gold_futures_df.reindex(date_range).interpolate(method='linear')
merge_gold_futures_df.index = merge_gold_futures_df.index.date
merge_gold_futures_df.index.name = 'timestamp'
merge_gold_futures_df.head(3)

Unnamed: 0_level_0,futures_price_gold
timestamp,Unnamed: 1_level_1
2018-01-02,1313.699951
2018-01-03,1316.199951
2018-01-04,1319.400024


In [156]:
merge_btc_spot_df = btc_spot.copy()
merge_btc_spot_df.columns = ['timestamp', 'price_btc']
merge_btc_spot_df['timestamp'] = pd.to_datetime(merge_btc_spot_df['timestamp']).dt.date
merge_btc_spot_df = merge_btc_spot_df.set_index('timestamp')

# Create complete date range and interpolate
date_range = pd.date_range(start=merge_btc_spot_df.index.min(), 
                         end=merge_btc_spot_df.index.max(), 
                         freq='D')
merge_btc_spot_df = merge_btc_spot_df.reindex(date_range).interpolate(method='linear')
merge_btc_spot_df.index = merge_btc_spot_df.index.date
merge_btc_spot_df.index.name = 'timestamp'
merge_btc_spot_df.head(3)

Unnamed: 0_level_0,price_btc
timestamp,Unnamed: 1_level_1
2018-01-01,13657.200195
2018-01-02,14982.099609
2018-01-03,15201.0


In [118]:
merge_dollar_index_df = dollar_index_df.copy()
merge_dollar_index_df = merge_dollar_index_df[['Date','Price']]

# Convert date format
merge_dollar_index_df['Date'] = pd.to_datetime(merge_dollar_index_df['Date']).dt.date

# Set date as index
merge_dollar_index_df = merge_dollar_index_df.set_index('Date')
merge_dollar_index_df = merge_dollar_index_df.rename(columns={'Price': 'dxy_usd_index'})

# Create complete date range
date_range = pd.date_range(start=merge_dollar_index_df.index.min(), 
                          end=merge_dollar_index_df.index.max(), 
                          freq='D')

# Reindex and forward fill missing values
merge_dollar_index_df = merge_dollar_index_df.reindex(date_range).ffill()

# Convert index back to date (without time component)
merge_dollar_index_df.index = merge_dollar_index_df.index.date
merge_dollar_index_df.index.name = 'timestamp'

merge_dollar_index_df

Unnamed: 0_level_0,dxy_usd_index
timestamp,Unnamed: 1_level_1
2018-02-01,88.67
2018-02-02,89.19
2018-02-03,89.19
2018-02-04,89.19
2018-02-05,89.55
...,...
2024-11-29,105.74
2024-11-30,105.74
2024-12-01,105.74
2024-12-02,106.45


In [None]:
# Data validation
# List of all dataframes to check
dfs = {
    'Fear & Greed': merge_fear_greed_df,
    'Volume': merge_volume_df,
    'Funding Rates': merge_funding_rates_df,
    'M2 Supply': merge_m2_supply_df,
    'Gold ETF': merge_gold_etf_spot_df,
    'Gold Futures': merge_gold_futures_df,
    'BTC Spot': merge_btc_spot_df,
    'Dollar Index': merge_dollar_index_df
}

# Check index types and date ranges for each DataFrame
print("Index Type and Date Range Check:")
print("-" * 50)
for name, df in dfs.items():
    print(f"\n{name}:")
    print(f"Index type: {type(df.index[0])}")
    print(f"Index name: {df.index.name}")
    print(f"Date range: {df.index.min()} to {df.index.max()}")
    print(f"Number of rows: {len(df)}")

# Convert all dates to pandas Timestamp for comparison
start_dates = {name: pd.Timestamp(df.index.min()) for name, df in dfs.items()}
end_dates = {name: pd.Timestamp(df.index.max()) for name, df in dfs.items()}

print("\n\nDate Range Overlap Check:")
print("-" * 50)
print("\nEarliest start date:", min(start_dates.items(), key=lambda x: x[1]))
print("Latest start date:", max(start_dates.items(), key=lambda x: x[1]))
print("Earliest end date:", min(end_dates.items(), key=lambda x: x[1]))
print("Latest end date:", max(end_dates.items(), key=lambda x: x[1]))

# Check for any gaps in dates
print("\n\nChecking for gaps in dates:")
print("-" * 50)
for name, df in dfs.items():
    date_range = pd.date_range(start=pd.Timestamp(df.index.min()), 
                              end=pd.Timestamp(df.index.max()), 
                              freq='D')
    missing_dates = date_range.difference(pd.to_datetime(df.index))
    if len(missing_dates) > 0:
        print(f"\n{name} has {len(missing_dates)} missing dates")
        print(f"First few missing dates: {missing_dates[:5]}")

Index Type and Date Range Check:
--------------------------------------------------

Fear & Greed:
Index type: <class 'datetime.date'>
Index name: timestamp
Date range: 2018-02-01 to 2024-12-03
Number of rows: 2494

Volume:
Index type: <class 'datetime.date'>
Index name: timestamp
Date range: 2018-01-02 to 2024-12-03
Number of rows: 2528

Funding Rates:
Index type: <class 'datetime.date'>
Index name: timestamp
Date range: 2020-08-10 to 2024-12-03
Number of rows: 1577

M2 Supply:
Index type: <class 'datetime.date'>
Index name: timestamp
Date range: 2018-01-01 to 2024-10-01
Number of rows: 82

Gold ETF:
Index type: <class 'datetime.date'>
Index name: timestamp
Date range: 2018-01-02 to 2024-12-02
Number of rows: 1741

Gold Futures:
Index type: <class 'datetime.date'>
Index name: timestamp
Date range: 2018-01-02 to 2024-12-02
Number of rows: 1741

BTC Spot:
Index type: <class 'datetime.date'>
Index name: timestamp
Date range: 2018-01-01 to 2024-12-02
Number of rows: 2528

Dollar Index:
In

In [158]:
# Assuming all DataFrames have 'timestamp' as their index
merged_df = pd.concat([
    merge_fear_greed_df,
    merge_volume_df,
    merge_funding_rates_df,
    merge_m2_supply_df,
    merge_gold_etf_spot_df,
    merge_gold_futures_df,
    merge_btc_spot_df,
    merge_dollar_index_df
], axis=1, join='outer')

In [159]:
# First convert index to datetime
merged_df.index = pd.to_datetime(merged_df.index)

# Sort the index
merged_df = merged_df.sort_index(ascending=True)

# Filter using datetime (not date)
merged_df = merged_df[merged_df.index >= pd.to_datetime('2018-02-01')]

In [161]:
merged_df.to_csv('concat_data.csv')