## 01- ETL : CCBI Index

This part covers the extraction of Crypto Fear and Greed data, as well as CBBI index 
    - Crypto F&G : https://api.alternative.me/
    - CCBI index : https://colintalkscrypto.com/ 



---
### 0. Imports

In [13]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path

import requests
import json

from functions_library.functions import DFinfo

# datetime manipulation for dataframes
from datetime import datetime


---
### 1. Data Loading

#### Get latest "Crypto Fear & Greed" (FNG) data 

In [14]:
# Download Fear & Greed JSON data
url = "https://api.alternative.me/fng/?limit=0&format=json"
response = requests.get(url)

if response.status_code == 200:
    with open("resources/fear_and_greed_index.json", "wb") as file:
        file.write(response.content)
    print("Fear & Greed JSON data downloaded successfully.")
else:
    print("Failed to download FNG data.")

# Read the JSON data from the file
fng_json = Path("resources/fear_and_greed_index.json")
with open('resources/fear_and_greed_index.json') as file:
    data = json.load(file)

# Extract the "data" section from the JSON
data = data['data']

# Convert the data to a DataFrame
fng_df = pd.DataFrame(data)


Fear & Greed JSON data downloaded successfully.


In [15]:
# Convert the "timestamp" column to datetime format
fng_df['timestamp'] = pd.to_datetime(fng_df['timestamp'], unit='s')

# Set the "timestamp" column as the index
fng_df.set_index('timestamp', inplace=True)

# Clean the dataframe by dropping the "time_until_update" column
fng_df.drop("time_until_update",axis=1,inplace=True)

DFinfo(fng_df)


  fng_df['timestamp'] = pd.to_datetime(fng_df['timestamp'], unit='s')


Unnamed: 0_level_0,value,value_classification
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-12-03,76,Extreme Greed
2024-12-02,80,Extreme Greed
2024-12-01,81,Extreme Greed


Unnamed: 0_level_0,value,value_classification
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-02-03,40,Fear
2018-02-02,15,Extreme Fear
2018-02-01,30,Fear


#### Get latest CBBI index from Colin Talks Crypto : https://colintalkscrypto.com/cbbi/

In [16]:
# Download CBBI JSON data
url_CBBI = "https://colintalkscrypto.com/cbbi/data/latest.json"
r = requests.get(url_CBBI, headers={"User-Agent": "XY"})

if r.status_code == 200:
    with open("resources/CBBI_index.json", "wb") as file:
        file.write(r.content)
    print("CBBI JSON data downloaded successfully.")
else:
    print("Failed to download CBBI data.")
    print(r)

# Load the JSON data from the file
CBBI_json = Path("resources/CBBI_index.json")
CBBI_df = pd.read_json(CBBI_json)

CBBI_df = CBBI_df.reset_index(names='timestamp')

# Convert the 'timestamp' column to a DatetimeIndex
CBBI_df['timestamp'] = pd.to_datetime(CBBI_df['timestamp'], unit='s')
CBBI_df.set_index('timestamp', inplace=True)

# Resample the DataFrame to hourly frequency and forward fill missing values
CCBI_df = CBBI_df.resample('1H').ffill()

# Print 5 rows of resulting DataFrame
DFinfo(CBBI_df)


CBBI JSON data downloaded successfully.


  CCBI_df = CBBI_df.resample('1H').ffill()


Unnamed: 0_level_0,Price,PiCycle,RUPL,RHODL,Puell,2YMA,Trolololo,MVRV,ReserveRisk,Woobull,Confidence
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2011-06-27,15.59,,0.8242,0.7836,0.7151,0.5954,0.7479,0.5311,,0.5778,0.6822
2011-06-28,17.01,,0.8243,0.7828,0.7004,0.6127,0.774,0.5317,0.8307,0.5977,0.7068
2011-06-29,16.93,,0.8205,0.7792,0.7145,0.6074,0.7708,0.5265,0.8364,0.5922,0.7059


Unnamed: 0_level_0,Price,PiCycle,RUPL,RHODL,Puell,2YMA,Trolololo,MVRV,ReserveRisk,Woobull,Confidence
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-12-01,96599.0,0.5284,0.9546,1.0,0.8746,1.0,0.6144,0.8332,0.6351,0.7644,0.8005
2024-12-02,97328.0,0.5304,0.9546,1.0,0.8749,1.0,0.6182,0.8235,0.64,0.7677,0.801
2024-12-03,95869.0145,0.5321,0.9547,1.0,0.8752,1.0,0.6095,0.8529,0.6403,0.7589,0.8026


In [17]:
# def get_cnn_market_fng():
#     """
#     Get CNN Market Fear & Greed Index data from CNN Money
#     Returns a DataFrame with daily values
#     """
#     # CNN Money Fear & Greed API endpoint
#     url = "https://production.dataviz.cnn.io/index/fearandgreed/graphdata"
    
#     # Headers to mimic a browser request
#     headers = {
#         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
#         'Accept': 'application/json',
#         'Accept-Language': 'en-US,en;q=0.9',
#         'Referer': 'https://www.cnn.com/markets/fear-and-greed',
#         'Origin': 'https://www.cnn.com'
#     }
    
#     try:
#         response = requests.get(url, headers=headers)
        
#         if response.status_code == 200:
#             # Save raw data
#             with open("resources/cnn_market_fear_and_greed.json", "wb") as file:
#                 file.write(response.content)
#             print("CNN Market Fear & Greed JSON data downloaded successfully.")
            
#             # Parse the JSON data
#             data = response.json()
            
#             # Extract the fear and greed data
#             fear_greed_data = []
#             for entry in data['fear_and_greed_historical']['data']:
#                 timestamp = pd.to_datetime(entry['x'], unit='ms')
#                 value = entry['y']
                
#                 # Get classification based on value
#                 if value >= 75:
#                     classification = "Extreme Greed"
#                 elif value >= 50:
#                     classification = "Greed"
#                 elif value >= 25:
#                     classification = "Fear"
#                 else:
#                     classification = "Extreme Fear"
                    
#                 fear_greed_data.append({
#                     'timestamp': timestamp,
#                     'value': value,
#                     'value_classification': classification
#                 })
            
#             # Convert to DataFrame
#             df = pd.DataFrame(fear_greed_data)
#             df.set_index('timestamp', inplace=True)
            
#             # Sort by timestamp
#             df = df.sort_index()
            
#             # Save processed data
#             df.to_csv('Resources/CNN_Market_FNG_1D_new.csv')
            
#             return df
            
#         else:
#             print(f"Failed to download CNN Market F&G data. Status code: {response.status_code}")
#             print(f"Response content: {response.text}")
#             return None
            
#     except Exception as e:
#         print(f"Error downloading CNN Market F&G data: {str(e)}")
#         return None

# # Get the data
# cnn_market_fng_df = get_cnn_market_fng()

# if cnn_market_fng_df is not None:
#     print("\nFirst few rows:")
#     print(cnn_market_fng_df.head())
#     print("\nLast few rows:")
#     print(cnn_market_fng_df.tail())

In [18]:
def download_and_merge_cnn_market_fng():
    """
    Download and merge CNN Market Fear & Greed Index data
    Handles existing CSV files and updates with new data
    """
    filename = 'CNN_Market_FNG_1D'
    csv_path = f'Resources/{filename}.csv'
    
    if Path(csv_path).is_file():
        # Load existing data
        result_df = pd.read_csv(csv_path, parse_dates=['timestamp'], index_col='timestamp')
        
        latest_timestamp = result_df.index.max()
        first_timestamp = result_df.index.min()
        
        print(f"Detecting CNN Market F&G data that was previously downloaded:")
        print(f" > latest timestamp available: {latest_timestamp}")
        print(f" > first timestamp available: {first_timestamp}")
        print("")
        
        # Get new data from CNN API
        url = "https://production.dataviz.cnn.io/index/fearandgreed/graphdata"
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'application/json',
            'Referer': 'https://www.cnn.com/markets/fear-and-greed'
        }
        
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            # Save raw data
            with open("Resources/cnn_market_fear_and_greed.json", "wb") as file:
                file.write(response.content)
            
            data = response.json()
            new_data = []
            
            for entry in data['fear_and_greed_historical']['data']:
                timestamp = pd.to_datetime(entry['x'], unit='ms')
                if timestamp > latest_timestamp:
                    value = entry['y']
                    
                    # Get classification
                    if value >= 75:
                        classification = "Extreme Greed"
                    elif value >= 50:
                        classification = "Greed"
                    elif value >= 25:
                        classification = "Fear"
                    else:
                        classification = "Extreme Fear"
                    
                    new_data.append({
                        'timestamp': timestamp,
                        'value': value,
                        'value_classification': classification
                    })
            
            if new_data:
                # Convert new data to DataFrame
                new_df = pd.DataFrame(new_data)
                new_df.set_index('timestamp', inplace=True)
                
                # Combine existing and new data
                result_df = pd.concat([result_df, new_df])
                
                # Remove duplicates and sort
                result_df = result_df[~result_df.index.duplicated(keep='last')].sort_index()
                
                print(f"Added {len(new_data)} new entries")
            else:
                print("No new data to add")
                
    else:
        # If no existing file, get all available data
        url = "https://production.dataviz.cnn.io/index/fearandgreed/graphdata"
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'application/json',
            'Referer': 'https://www.cnn.com/markets/fear-and-greed'
        }
        
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            # Save raw data
            with open("Resources/cnn_market_fear_and_greed.json", "wb") as file:
                file.write(response.content)
            
            data = response.json()
            fear_greed_data = []
            
            for entry in data['fear_and_greed_historical']['data']:
                timestamp = pd.to_datetime(entry['x'], unit='ms')
                value = entry['y']
                
                # Get classification
                if value >= 75:
                    classification = "Extreme Greed"
                elif value >= 50:
                    classification = "Greed"
                elif value >= 25:
                    classification = "Fear"
                else:
                    classification = "Extreme Fear"
                
                fear_greed_data.append({
                    'timestamp': timestamp,
                    'value': value,
                    'value_classification': classification
                })
            
            # Convert to DataFrame
            result_df = pd.DataFrame(fear_greed_data)
            result_df.set_index('timestamp', inplace=True)
            result_df = result_df.sort_index()
    
    # Save the updated data
    result_df.to_csv(csv_path)
    print(f"\nCNN Market F&G data has been saved to disk and is available now =)")
    print("\nFirst few rows:")
    print(result_df.head())
    print("\nLast few rows:")
    print(result_df.tail())
    
    return result_df

def download_and_merge_cbbi():
    """
    Download and merge CBBI (Colin Talks Crypto Bitcoin Bull Run Index) data
    Handles existing CSV files and updates with new data
    """
    filename = 'CBBI_index'
    csv_path = f'Resources/{filename}.csv'
    
    if Path(csv_path).is_file():
        # Load existing data
        result_df = pd.read_csv(csv_path, parse_dates=['timestamp'], index_col='timestamp')
        
        latest_timestamp = result_df.index.max()
        first_timestamp = result_df.index.min()
        
        print(f"Detecting CBBI data that was previously downloaded:")
        print(f" > latest timestamp available: {latest_timestamp}")
        print(f" > first timestamp available: {first_timestamp}")
        print("")
        
        # Get new data
        url_CBBI = "https://colintalkscrypto.com/cbbi/data/latest.json"
        response = requests.get(url_CBBI, headers={"User-Agent": "XY"})
        
        if response.status_code == 200:
            # Save raw data
            with open("Resources/CBBI_index.json", "wb") as file:
                file.write(response.content)
            
            # Parse JSON from response content
            new_df = pd.read_json(response.content.decode('utf-8'))
            new_df = new_df.reset_index(names='timestamp')
            new_df['timestamp'] = pd.to_datetime(new_df['timestamp'], unit='s')
            new_df.set_index('timestamp', inplace=True)
            
            # Only keep new data
            new_df = new_df[new_df.index > latest_timestamp]
            
            if not new_df.empty:
                # Combine existing and new data
                result_df = pd.concat([result_df, new_df])
                
                # Remove duplicates and sort
                result_df = result_df[~result_df.index.duplicated(keep='last')].sort_index()
                
                print(f"Added {len(new_df)} new entries")
            else:
                print("No new data to add")
                
    else:
        # If no existing file, get all available data
        url_CBBI = "https://colintalkscrypto.com/cbbi/data/latest.json"
        response = requests.get(url_CBBI, headers={"User-Agent": "XY"})
        
        if response.status_code == 200:
            # Save raw data
            with open("Resources/CBBI_index.json", "wb") as file:
                file.write(response.content)
            
            # Parse JSON from response content
            result_df = pd.read_json(response.content.decode('utf-8'))
            result_df = result_df.reset_index(names='timestamp')
            result_df['timestamp'] = pd.to_datetime(result_df['timestamp'], unit='s')
            result_df.set_index('timestamp', inplace=True)
    
    # Save the updated data
    result_df.to_csv(csv_path)
    print(f"\nCBBI data has been saved to disk and is available now =)")
    print("\nFirst few rows:")
    print(result_df.head())
    print("\nLast few rows:")
    print(result_df.tail())
    
    return result_df

# Download and update both indices
cnn_fng_df = download_and_merge_cnn_market_fng()
cbbi_df = download_and_merge_cbbi()

Detecting CNN Market F&G data that was previously downloaded:
 > latest timestamp available: 2024-12-02 23:59:47
 > first timestamp available: 2021-02-01 00:00:00

No new data to add

CNN Market F&G data has been saved to disk and is available now =)

First few rows:
            value value_classification
timestamp                             
2021-02-01   39.0                 Fear
2021-02-02   58.0                Greed
2021-02-03   50.0                Greed
2021-02-04   60.0                Greed
2021-02-05   60.0                Greed

Last few rows:
                         value value_classification
timestamp                                          
2024-11-27 00:00:00  63.885714                Greed
2024-11-28 00:00:00  63.742857                Greed
2024-11-29 00:00:00  67.057143                Greed
2024-12-02 00:00:00  66.571429                Greed
2024-12-02 23:59:47  66.571429                Greed
Detecting CBBI data that was previously downloaded:
 > latest timestamp availab

  new_df = pd.read_json(response.content.decode('utf-8'))
