In [1]:
# Import required libraries and dependencies
import pandas as pd
import hvplot.pandas
import requests
import json
from config import api_key
from pprint import pprint
import numpy as np
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
#Multiple time period func
def fetch_data(time_period):
    url = "https://coinranking1.p.rapidapi.com/coins"

    querystring = {"referenceCurrencyUuid":"yhjMzLPhuIDl","timePeriod": time_period,
                   "tiers[0]":"1","orderBy":"marketCap","orderDirection":"desc",
                   "limit":"50","offset":"0"}

    headers = {
        "X-RapidAPI-Key": f"{api_key}",
        "X-RapidAPI-Host": "coinranking1.p.rapidapi.com"
    }
    #API call
    response = requests.get(url, headers=headers, params=querystring)
    #Error formatting
    if response.status_code == 200:
        return pd.DataFrame(response.json()['data']['coins']) 
    else:
        return pd.DataFrame()
    
df_24hr = fetch_data('24h')
df_7d = fetch_data('7d')
df_30d = fetch_data('30d')
df_3m = fetch_data('3m')
df_1y = fetch_data('1y')
df_3y = fetch_data('3y')
df_5y = fetch_data('5y')

### Prepare the Data

In [3]:
def clean_data(df):
    # Drop unnecessary columns
    columns_to_drop = ['iconUrl', 'coinrankingUrl', 'color', 'listedAt', 'tier', 'lowVolume']
    df_cleaned = df.drop(columns=columns_to_drop, errors='ignore')
    
    # Convert sparkline from obj to float
    try:
        df_cleaned['sparkline'] = df_cleaned['sparkline'].apply(lambda x: [float(i) if i is not None else 0.0 for i in x])
    except Exception as e:
        print('Error while processing sparkline data:', e)
    
    return df_cleaned

In [4]:
timeframes = ["24h", "7d", "30d", "3m", "1y", "3y", "5y"]
datasets = {}

for timeframe in timeframes:
    #Fetch data
    data = fetch_data(timeframe)
    #Clean data
    cleaned_data = clean_data(data)
    #Store in dict
    datasets[f'df_cleaned_{timeframe}'] = cleaned_data
    print(f'Data fetched and cleaned for {timeframe}')

Data fetched and cleaned for 24h
Data fetched and cleaned for 7d
Data fetched and cleaned for 30d
Data fetched and cleaned for 3m
Data fetched and cleaned for 1y
Data fetched and cleaned for 3y
Data fetched and cleaned for 5y


In [5]:
def process_all_time_periods(datasets):
    processed_data = {}
    
    for time_period, df in datasets.items():
       # Extract 'symbol' and 'sparkline' columns
        df_sparklines = df[['symbol', 'sparkline']]
        # Explode the 'sparkline' column
        df_sparklines_explode = df_sparklines.explode('sparkline').reset_index(drop=True)
        # Fill NA values with 0
        df_sparklines_explode.fillna(0, inplace=True)
        
        df_sparklines_explode['sparkline'] = df_sparklines_explode['sparkline'].astype(float)
        
        processed_data[time_period] = df_sparklines_explode
        
        return processed_data
        
        

In [17]:
def calculate_pct_change_symbol(df):
    """
    Calculate the percent change for sparkline values grouped by "symbol".
    """
    df['pct_change'] = df.groupby('symbol', )['sparkline'].pct_change() * 100
    
    return df

In [7]:
def compute_SMAs(df):
    """
    Compute Fast and Slow SMAs for a given dataframe.
    """
    # Compute fast & slow SMAs
    def fractional_rollingmean(data, before_frac, after_frac):
        kernel = [before_frac, 1, after_frac]
        return np.convolve(data, kernel, mode='same') / sum(kernel)

    df['fast_SMA'] = df.groupby('symbol')['sparkline'].transform(lambda x: fractional_rollingmean(x, 0.5, 0.5))
    df['slow_SMA'] = df.groupby('symbol')['sparkline'].transform(lambda x: fractional_rollingmean(x, 1.5, 1.5))
    
    return df

In [60]:
def calculate_ROC(df, column='sparkline', period=1):
    """
    Calculate the Rate of Change (ROC) for a given dataframe and column.
    """
    df['ROC'] = df.groupby('symbol', group_keys=False)[column].apply(
        lambda x: ((x - x.shift(period)) / x.shift(period)) * 100
    )
    
    return df

In [62]:
def calculate_momentum(df, column='sparkline', period=1):
    """
    Calculate the Momentum for a given dataframe and column.
    """
    df['momentum'] = df.groupby('symbol',group_keys=False)[column].apply(lambda x: x - x.shift(period))
    
    return df

In [63]:
def calculate_RSI(df, column='sparkline', period=12):
    """
    Calculate the Relative Strength Index (RSI) for a given dataframe and column.
    """
    delta = df.groupby('symbol', group_keys=False)[column].apply(lambda x: x.diff()).reset_index(level=0, drop=True)
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)
    
    avg_gain = gain.rolling(window=period, min_periods=1).mean()
    avg_loss = loss.rolling(window=period, min_periods=1).mean()
    
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100/(1+rs))
    
    return df

In [64]:
def calculate_volatility(df, column='sparkline', period=6):
     """
     Calculate the volatility (standard deviation) for a given dataframe and column.
     """
     df['volatility'] = df.groupby('symbol', group_keys=False)[column].rolling(window=period).std().reset_index(level=0, drop=True)
    
     return df

In [65]:
def compute_signals(df):
    """
    Compute Buy/Sell signals based on Fast and Slow SMAs for a given dataframe.
    """
    # Create a column for the signal (1 for buy, -1 for sell, 0 for hold)
    df['signal'] = 0  # default to hold
    df.loc[df['fast_SMA'] > df['slow_SMA'], 'signal'] = 1
    df.loc[df['fast_SMA'] < df['slow_SMA'], 'signal'] = -1
    
    return df

In [66]:
print(datasets.keys())


dict_keys(['df_cleaned_24h', 'df_cleaned_7d', 'df_cleaned_30d', 'df_cleaned_3m', 'df_cleaned_1y', 'df_cleaned_3y', 'df_cleaned_5y'])


In [67]:
# cleaned_3y = clean_data(datasets['df_cleaned_3y'])

# processed_data = process_all_time_periods({'3y': cleaned_3y})
# processed_3y = processed_data['3y']

# processed_3y_with_pct_change = calculate_pct_change_symbol(processed_3y)

# smas_3y = compute_SMAs(processed_3y_with_pct_change)

# signals_3y = compute_signals(smas_3y)

# new_3y_df = pd.DataFrame(signals_3y)

# new_3y_df

In [68]:
# List of timeframes
timeframes = ['24h', '7d', '30d', '3m', '1y', '3y', '5y']
# Dictionary to store the final processed data for each timeframe
final_datasets = {}
# Iterate over each timeframe
for timeframe in timeframes:
    
    #Clean data
    cleaned_data = clean_data(datasets[f'df_cleaned_{timeframe}'])
    #Process Sparklines
    processed_data_dict = process_all_time_periods({timeframe: cleaned_data})
    processed_data = processed_data_dict[timeframe]
    #Calculate % change
    processed_pct_change = calculate_pct_change_symbol(processed_data)
    #Compute SMAs
    processed_SMAs = compute_SMAs(processed_pct_change)
    #Rate of change
    processed_ROC = calculate_ROC(processed_SMAs)
    #Momentum
    processed_momentum = calculate_momentum(processed_ROC)
    #Relative Strength Index
    processed_RSI = calculate_RSI(processed_momentum)
    #Volatility
    processed_volatility = calculate_volatility(processed_RSI)
    #Signals
    processed_signals = compute_signals(processed_volatility)
    
    final_datasets[f'new_df_{timeframe}'] = processed_signals

In [70]:
new_df_30d = final_datasets['new_df_30d']
new_df_30d.head(25)

Unnamed: 0,symbol,sparkline,pct_change,fast_SMA,slow_SMA,ROC,momentum,RSI,volatility,signal
0,BTC,26357.176581,,19731.722066,16418.994808,,,,,1
1,BTC,26212.535101,-0.548775,26211.477862,26210.949243,-0.548775,-144.64148,0.0,,1
2,BTC,26063.664666,-0.567936,26137.00295,26173.672092,-0.567936,-148.870435,0.0,,-1
3,BTC,26208.147367,0.554345,26212.162738,26214.170424,0.554345,144.482701,32.987324,,-1
4,BTC,26368.691555,0.612574,26272.96195,26225.097148,0.612574,160.544188,50.961924,,1
5,BTC,26146.317325,-0.843327,26200.577611,26227.707755,-0.843327,-222.37423,37.157028,118.970784,-1
6,BTC,26140.984241,-0.020397,26187.788933,26211.191279,-0.020397,-5.333083,36.917195,102.994318,-1
7,BTC,26322.869926,0.695787,26237.758611,26195.202954,0.695787,181.885685,48.298503,116.742672,1
8,BTC,26164.310352,-0.602364,26358.972469,26456.303527,-0.602364,-158.559574,41.73448,97.408778,-1
9,BTC,26784.399244,2.36998,26799.675715,26807.313951,2.36998,620.088892,61.955098,246.774304,-1
