## 02)Merge_all_results

This part covers merging all previously downloaded data into a single DataFrame, including crypto prices (BTC, ETH), market indices (SP500, NDX), commodities (Gold), and sentiment indicators (Crypto F&G, CBBI index). All time series are converted to daily timeframes where needed.

In [2]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path

import requests
import json

from functions_library.functions import DFinfo

# datetime manipulation for dataframes
from datetime import datetime

In [3]:
def merge_crypto_data():
    """
    Merge BTC and ETH hourly data into daily data
    """
    # Read BTC data
    btc_df = pd.read_csv('Resources/BTCUSD_1H_ccompare.csv')
    btc_df['timestamp'] = pd.to_datetime(btc_df['timestamp'])
    btc_df.set_index('timestamp', inplace=True)
    # Convert to daily data (last price of each day) and standardize time to 00:00
    btc_daily = btc_df['close'].resample('D').last()
    btc_daily.index = btc_daily.index.normalize()  # Set time to midnight
    btc_daily = btc_daily.rename('BTC')

    # Read ETH data
    eth_df = pd.read_csv('Resources/ETHUSD_1H_ccompare.csv')
    eth_df['timestamp'] = pd.to_datetime(eth_df['timestamp'])
    eth_df.set_index('timestamp', inplace=True)
    # Convert to daily data (last price of each day) and standardize time to 00:00
    eth_daily = eth_df['close'].resample('D').last()
    eth_daily.index = eth_daily.index.normalize()  # Set time to midnight
    eth_daily = eth_daily.rename('ETH')

    # Merge both series
    crypto_daily = pd.concat([btc_daily, eth_daily], axis=1)
    
    print("\nMerged crypto data info:")
    print(crypto_daily.info())
    print("\nFirst few rows:")
    print(crypto_daily.head())
    print("\nLast few rows:")
    print(crypto_daily.tail())

    return crypto_daily

# Execute the merge
crypto_data = merge_crypto_data()


Merged crypto data info:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3039 entries, 2016-10-24 00:00:00+00:00 to 2025-02-17 00:00:00+00:00
Freq: D
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   BTC     3039 non-null   float64
 1   ETH     3031 non-null   float64
dtypes: float64(2)
memory usage: 71.2 KB
None

First few rows:
                              BTC  ETH
timestamp                             
2016-10-24 00:00:00+00:00  650.79  NaN
2016-10-25 00:00:00+00:00  650.96  NaN
2016-10-26 00:00:00+00:00  675.87  NaN
2016-10-27 00:00:00+00:00  682.73  NaN
2016-10-28 00:00:00+00:00  687.66  NaN

Last few rows:
                                BTC      ETH
timestamp                                   
2025-02-13 00:00:00+00:00  96632.78  2676.25
2025-02-14 00:00:00+00:00  97508.71  2726.02
2025-02-15 00:00:00+00:00  97596.21  2693.56
2025-02-16 00:00:00+00:00  96132.69  2661.59
2025-02-17 00:00:00+00:00  95787.10  2744.

In [4]:
def merge_crypto_and_indices():
    """
    Merge crypto daily data with SP500 and NDX indices
    """
    # First get the crypto daily data
    crypto_daily = merge_crypto_data()  # Using the previous function
    
    # Read SP500 data and check columns
    sp500_df = pd.read_csv('Resources/GSPC_1D_yfinance.csv')
    print("\nSP500 columns:", sp500_df.columns.tolist())
    sp500_df['Date'] = pd.to_datetime(sp500_df['Date'])
    sp500_df.set_index('Date', inplace=True)
    sp500_df.index = sp500_df.index.normalize()  # Set time to midnight
    sp500_daily = sp500_df['close'].rename('SP500')  # Back to lowercase 'close'

    # Read NDX data and check columns
    ndx_df = pd.read_csv('Resources/NDX_1D_yfinance.csv')
    print("\nNDX columns:", ndx_df.columns.tolist())
    ndx_df['Date'] = pd.to_datetime(ndx_df['Date'])
    ndx_df.set_index('Date', inplace=True)
    ndx_df.index = ndx_df.index.normalize()  # Set time to midnight
    ndx_daily = ndx_df['close'].rename('Ndx100')  # Back to lowercase 'close'

    # Merge all series
    merged_df = pd.concat([crypto_daily, sp500_daily, ndx_daily], axis=1)
    
    # Sort by date
    merged_df.sort_index(inplace=True)

    print("\nMerged data info:")
    print(merged_df.info())
    print("\nFirst few rows:")
    print(merged_df.head())
    print("\nLast few rows:")
    print(merged_df.tail())

    return merged_df

# Execute the merge
market_data = merge_crypto_and_indices()


Merged crypto data info:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3039 entries, 2016-10-24 00:00:00+00:00 to 2025-02-17 00:00:00+00:00
Freq: D
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   BTC     3039 non-null   float64
 1   ETH     3031 non-null   float64
dtypes: float64(2)
memory usage: 71.2 KB
None

First few rows:
                              BTC  ETH
timestamp                             
2016-10-24 00:00:00+00:00  650.79  NaN
2016-10-25 00:00:00+00:00  650.96  NaN
2016-10-26 00:00:00+00:00  675.87  NaN
2016-10-27 00:00:00+00:00  682.73  NaN
2016-10-28 00:00:00+00:00  687.66  NaN

Last few rows:
                                BTC      ETH
timestamp                                   
2025-02-13 00:00:00+00:00  96632.78  2676.25
2025-02-14 00:00:00+00:00  97508.71  2726.02
2025-02-15 00:00:00+00:00  97596.21  2693.56
2025-02-16 00:00:00+00:00  96132.69  2661.59
2025-02-17 00:00:00+00:00  95787.10  2744.

In [9]:
market_data

Unnamed: 0,BTC,ETH,SP500,Ndx100
2016-10-24 00:00:00+00:00,650.79,,2151.330078,4909.970215
2016-10-25 00:00:00+00:00,650.96,,2143.159912,4891.450195
2016-10-26 00:00:00+00:00,675.87,,2139.429932,4860.589844
2016-10-27 00:00:00+00:00,682.73,,2133.040039,4836.450195
2016-10-28 00:00:00+00:00,687.66,,2126.409912,4805.589844
...,...,...,...,...
2025-02-13 00:00:00+00:00,96632.78,2676.25,6115.069824,22030.710938
2025-02-14 00:00:00+00:00,97508.71,2726.02,6114.629883,22114.689453
2025-02-15 00:00:00+00:00,97596.21,2693.56,,
2025-02-16 00:00:00+00:00,96132.69,2661.59,,


In [12]:
# Load CBBI data and convert timestamp
cbbi_df = pd.read_csv('Resources/CBBI_index.csv')

# Convert timestamp to datetime and set as index
cbbi_df['timestamp'] = pd.to_datetime(cbbi_df['timestamp'])
cbbi_df.set_index('timestamp', inplace=True)

# Add UTC timezone if not present
if cbbi_df.index.tz is None:
    cbbi_df.index = cbbi_df.index.tz_localize('UTC')

# Check the converted data
print("\nAfter conversion:")
print("Index type:", type(cbbi_df.index))
print("Timezone info:", cbbi_df.index.tz)
print("\nFirst few rows after conversion:")
print(cbbi_df.head())


After conversion:
Index type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Timezone info: UTC

First few rows after conversion:
                           Price  PiCycle    RUPL   RHODL   Puell    2YMA  \
timestamp                                                                   
2011-06-27 00:00:00+00:00  15.59      NaN  0.8242  0.7836  0.7151  0.5954   
2011-06-28 00:00:00+00:00  17.01      NaN  0.8243  0.7828  0.7004  0.6127   
2011-06-29 00:00:00+00:00  16.93      NaN  0.8205  0.7792  0.7145  0.6074   
2011-06-30 00:00:00+00:00  16.88      NaN  0.8061  0.7061  0.7279  0.6028   
2011-07-01 00:00:00+00:00  16.51      NaN  0.7913  0.7013  0.6853  0.5937   

                           Trolololo    MVRV  ReserveRisk  Woobull  Confidence  
timestamp                                                                       
2011-06-27 00:00:00+00:00     0.7479  0.5311          NaN   0.5778      0.6822  
2011-06-28 00:00:00+00:00     0.7740  0.5317       0.8307   0.5977      0.7068 

In [15]:
# Load CBBI data and convert timestamp
fng_df = pd.read_csv('Resources/CNN_Market_FNG_1D.csv')

# # Check original data
# print("Original FNG data types:")
# print(fng_df.dtypes)
# print("\nFirst few rows before conversion:")
# print(fng_df.head())

# Convert timestamp to datetime and set as index
fng_df['timestamp'] = pd.to_datetime(fng_df['timestamp'])
fng_df.set_index('timestamp', inplace=True)

# Add UTC timezone if not present
if fng_df.index.tz is None:
    fng_df.index = fng_df.index.tz_localize('UTC')

# Rename columns
fng_df = fng_df.rename(columns={
    'value': 'FNG',
    'value_classification': 'FNG_class'
})

# Check the converted data
print("\nAfter conversion:")
print("Index type:", type(fng_df.index))
print("Timezone info:", fng_df.index.tz)
print("\nFirst few rows after conversion:")
print(fng_df.head())


After conversion:
Index type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Timezone info: UTC

First few rows after conversion:
                            FNG FNG_class
timestamp                                
2021-02-01 00:00:00+00:00  39.0      Fear
2021-02-02 00:00:00+00:00  58.0     Greed
2021-02-03 00:00:00+00:00  50.0     Greed
2021-02-04 00:00:00+00:00  60.0     Greed
2021-02-05 00:00:00+00:00  60.0     Greed


In [16]:
# First check the timezone info of each DataFrame
print("Checking timezone info before merge:")
print("market_data timezone:", market_data.index.tz)
print("fng_df timezone:", fng_df.index.tz)
print("cbbi_df timezone:", cbbi_df.index.tz)

# Merge all DataFrames
merged_df = pd.concat([market_data, fng_df, cbbi_df], axis=1)

# Sort by timestamp
merged_df.sort_index(inplace=True)

# Display info about the merged DataFrame
print("\nMerged data info:")
print(merged_df.info())
print("\nFirst few rows:")
print(merged_df.head())
print("\nLast few rows:")
print(merged_df.tail())

# Save to CSV
merged_df.to_csv('Resources/merged_market_data.csv')
print("\nMerged data saved to Resources/merged_market_data.csv")

Checking timezone info before merge:
market_data timezone: UTC
fng_df timezone: UTC
cbbi_df timezone: UTC

Merged data info:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4988 entries, 2011-06-27 00:00:00+00:00 to 2025-02-17 00:00:00+00:00
Data columns (total 17 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   BTC          3039 non-null   float64
 1   ETH          3031 non-null   float64
 2   SP500        2090 non-null   float64
 3   Ndx100       2090 non-null   float64
 4   FNG          1027 non-null   float64
 5   FNG_class    1027 non-null   object 
 6   Price        4985 non-null   float64
 7   PiCycle      4635 non-null   float64
 8   RUPL         4985 non-null   float64
 9   RHODL        4985 non-null   float64
 10  Puell        4985 non-null   float64
 11  2YMA         4985 non-null   float64
 12  Trolololo    4985 non-null   float64
 13  MVRV         4985 non-null   float64
 14  ReserveRisk  4984 non-null   float64
 15  W

In [17]:
merged_df

Unnamed: 0,BTC,ETH,SP500,Ndx100,FNG,FNG_class,Price,PiCycle,RUPL,RHODL,Puell,2YMA,Trolololo,MVRV,ReserveRisk,Woobull,Confidence
2011-06-27 00:00:00+00:00,,,,,,,15.5900,,0.8242,0.7836,0.7151,0.5954,0.7479,0.5311,,0.5778,0.6822
2011-06-28 00:00:00+00:00,,,,,,,17.0100,,0.8243,0.7828,0.7004,0.6127,0.7740,0.5317,0.8307,0.5977,0.7068
2011-06-29 00:00:00+00:00,,,,,,,16.9300,,0.8205,0.7792,0.7145,0.6074,0.7708,0.5265,0.8364,0.5922,0.7059
2011-06-30 00:00:00+00:00,,,,,,,16.8800,,0.8061,0.7061,0.7279,0.6028,0.7682,0.5088,0.8372,0.5876,0.6931
2011-07-01 00:00:00+00:00,,,,,,,16.5100,,0.7913,0.7013,0.6853,0.5937,0.7594,0.4900,0.8294,0.5780,0.6786
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-14 00:00:00+00:00,97508.71,2726.02,6114.629883,22114.689453,44.485714,Fear,96737.0000,0.6420,0.8612,1.0000,0.8515,0.9420,0.5742,0.8169,0.6611,0.7007,0.7833
2025-02-14 23:59:56+00:00,,,,,44.485714,Fear,,,,,,,,,,,
2025-02-15 00:00:00+00:00,97596.21,2693.56,,,,,97544.0000,0.6432,0.8664,1.0000,0.8893,0.9465,0.5785,0.8163,0.6531,0.7057,0.7888
2025-02-16 00:00:00+00:00,96132.69,2661.59,,,,,97603.0000,0.6444,0.8666,1.0000,0.8375,0.9460,0.5784,0.8151,0.6582,0.7059,0.7836
