# Preparing the Data
This notebook contains the data preparation aspect of this project, defining key functions to use to extract the data from the required APIs

In [1]:
# Initialize notebook and define libraries
import pandas as pd
import requests
import json
from datetime import datetime
import pytz
import math
import numpy as np
import time
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import recall_score

%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_context("notebook")
import warnings
warnings.filterwarnings('ignore')

## Repeated Functions
These functions are utilized various time in the data processing phase

In [2]:
# Cooldown function to ensure maximum requests per minute for API is not exceeded
def cooldown(start_time, max_requests_per_minute = 39, cooldown_time = 60):
    
    elapsed_time = time.time() - start_time
    
    if elapsed_time < (cooldown_time / max_requests_per_minute):
        time.sleep(cooldown_time / max_requests_per_minute - elapsed_time)

        
# Formating function to format the API output       
def format_df(data, drop=None):
    
    df = pd.DataFrame(data[0]['history']).set_index('t')
    df.index = pd.to_datetime(df.index, unit='s')

    if drop is not None:
        df.drop(drop, axis=1, inplace=True)

    return df



# Get all function performs cummulative operations for all symbols in a category, takes a function as an input
def get_all(func, api_key):
    
    all_data = pd.DataFrame()
    symbols = get_all_symbols(api_key)

    for symbol in symbols:
        start_time = time.time()
        df = func(api_key, symbol)
        all_data = pd.concat([all_data, df], axis=1)
        cooldown(start_time)
        
    return all_data


# Interpolate data function interpolates missing data for missing days in the period from 2022-01-02 to 2023-12-02
def interpolate(data):
    
    reference_dates = pd.date_range(start='2022-01-02', end='2023-12-02', freq='D')
    data = data.reindex(reference_dates)
    data = data.interpolate(method='time')
    
    return data

## Defining Time Limits for Project
For this project we are focusing on the daily activity of each cryptocurrency token with time horizons between January 1, 2022 to January 1, 2024

In [3]:
# Define time intervals and define start and end dates for analysis
interval = 'daily'
start = int(time.mktime(time.strptime('2022-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')))
end = int(time.mktime(time.strptime('2024-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')))

## API Key
The API Key is required to extract the cryptocurrency data from the Coinalyze API. The URL to use for the JSON requests is below

API DOCS: https://api.coinalyze.net/v1/doc/

In [4]:
# API key, which changes for each user
api_key = '86bcc4d5-757a-46ed-bb59-72dd3b510e26'

## Crypto Token Symbols
Main Suffixes:

- USDT.6: USD tether trading on ".6" exchange
- USDT_PERP.A:USD tether trading perpetually on ".A" exchange
- Misc suffixes for coins trading on other platforms

The coins we are interested in are the ones with USDT_PERP.A suffix. The get_all_symbols function defined below is utilzied to extract all the names of the available tokens on the Coinalyze API.

In [5]:
def get_all_symbols(api_key):

    url = 'https://api.coinalyze.net/v1/future-markets'
    params = {'api_key': api_key}
    
    # Perform the JSON request using the url for future markets and the API key
    response = requests.get(url, params=params)
    data = response.json()
    
    # Extract the values for each dictionary corresponding to the symbol key and convert to a list
    symbols = pd.Series([d['symbol'] for d in data])
    
    # Return only symbols that are trading in perpetual contract with ".A" exchange
    symbols = symbols[symbols.str.endswith('USDT_PERP.A')]
    
    # Filter out the following symbol
    symbols = symbols[~symbols.str.contains('BTCDOMUSDT')]
    
    symbols = symbols.reset_index(drop=True)

    return symbols

The process of extracting the data from the Coinalyze API is a very tedious process, as a maximum of 40 requests per minute can be made, and over 234 symbols are available. Therefore the following list "symbols" contains the symbols that have data for the entire time horizon, minimizing time spent waiting.

In [6]:
symbols_cv = ['RSRUSDT_PERP.A', 'DYDXUSDT_PERP.A', 'LTCUSDT_PERP.A', 'THETAUSDT_PERP.A', '1000SHIBUSDT_PERP.A', 'MASKUSDT_PERP.A', 'BTCUSDT_PERP.A', 'BALUSDT_PERP.A', 'BATUSDT_PERP.A', 'ROSEUSDT_PERP.A', 'CHRUSDT_PERP.A', 'EGLDUSDT_PERP.A', 'STMXUSDT_PERP.A', 'ZILUSDT_PERP.A', 'FLMUSDT_PERP.A', 'KNCUSDT_PERP.A', 'QTUMUSDT_PERP.A', 'TRXUSDT_PERP.A', 'MATICUSDT_PERP.A', 'BAKEUSDT_PERP.A', 'HBARUSDT_PERP.A', 'XEMUSDT_PERP.A', 'KLAYUSDT_PERP.A', 'XLMUSDT_PERP.A', 'SXPUSDT_PERP.A', 'BLZUSDT_PERP.A', 'PEOPLEUSDT_PERP.A', 'ETHUSDT_PERP.A', 'NEOUSDT_PERP.A', 'MANAUSDT_PERP.A', 'CELOUSDT_PERP.A', 'SUSHIUSDT_PERP.A', 'FTMUSDT_PERP.A', 'WAVESUSDT_PERP.A', 'AAVEUSDT_PERP.A', 'ICXUSDT_PERP.A', 'XRPUSDT_PERP.A', 'IOTXUSDT_PERP.A', 'AUDIOUSDT_PERP.A', 'RENUSDT_PERP.A', 'SANDUSDT_PERP.A', 'SFPUSDT_PERP.A', 'ANKRUSDT_PERP.A', '1000XECUSDT_PERP.A', 'UNIUSDT_PERP.A', 'ARPAUSDT_PERP.A', 'SNXUSDT_PERP.A', 'HOTUSDT_PERP.A', 'IOTAUSDT_PERP.A', 'NEARUSDT_PERP.A', 'ALICEUSDT_PERP.A', 'OGNUSDT_PERP.A', 'STORJUSDT_PERP.A', 'COMPUSDT_PERP.A', 'VETUSDT_PERP.A', 'ENJUSDT_PERP.A', 'AXSUSDT_PERP.A', 'BELUSDT_PERP.A', 'ONTUSDT_PERP.A', 'LPTUSDT_PERP.A', 'DASHUSDT_PERP.A', 'C98USDT_PERP.A', 'ETCUSDT_PERP.A', 'DENTUSDT_PERP.A', 'ZRXUSDT_PERP.A', 'KSMUSDT_PERP.A', 'RVNUSDT_PERP.A', 'CRVUSDT_PERP.A', 'LINKUSDT_PERP.A', 'IOSTUSDT_PERP.A', 'ATOMUSDT_PERP.A', 'REEFUSDT_PERP.A', 'SOLUSDT_PERP.A', 'SKLUSDT_PERP.A', 'ANTUSDT_PERP.A', 'OCEANUSDT_PERP.A', 'LINAUSDT_PERP.A', 'ADAUSDT_PERP.A', 'DGBUSDT_PERP.A', 'UNFIUSDT_PERP.A', 'YFIUSDT_PERP.A', 'ENSUSDT_PERP.A', 'ALPHAUSDT_PERP.A', 'CTSIUSDT_PERP.A', 'BNBUSDT_PERP.A', 'KAVAUSDT_PERP.A', 'LRCUSDT_PERP.A', '1INCHUSDT_PERP.A', 'LITUSDT_PERP.A', 'COTIUSDT_PERP.A', 'TRBUSDT_PERP.A', 'GALAUSDT_PERP.A', 'EOSUSDT_PERP.A', 'ATAUSDT_PERP.A', 'OMGUSDT_PERP.A', 'DOTUSDT_PERP.A', 'MTLUSDT_PERP.A', 'BANDUSDT_PERP.A', 'GRTUSDT_PERP.A', 'CELRUSDT_PERP.A', 'DEFIUSDT_PERP.A', 'CHZUSDT_PERP.A', 'AVAXUSDT_PERP.A', 'ARUSDT_PERP.A', 'ALGOUSDT_PERP.A', 'XMRUSDT_PERP.A', 'MKRUSDT_PERP.A', 'ONEUSDT_PERP.A', 'RLCUSDT_PERP.A', 'NKNUSDT_PERP.A', 'BCHUSDT_PERP.A', 'GTCUSDT_PERP.A', 'ZENUSDT_PERP.A', 'ZECUSDT_PERP.A', 'RUNEUSDT_PERP.A', 'XTZUSDT_PERP.A', 'CTKUSDT_PERP.A', 'DOGEUSDT_PERP.A', 'FILUSDT_PERP.A']

## Close and Volume Data
The following function creates a dataframe with a time series index spanning from early January 2022 to late December 2023. The function takes in the inputs of the API Key, the daily interval, the start and end periods, as well as the symbols defined above, normally get_all_symbols() would be used.

Since there is a requests limit, the function ensures that the requests per minute limit is not exceeded. The function creates a dataframe for each coin that has the close and volume information for each day. Then all the dataframes are joined together to return all the data in one dataframe.

In [7]:
def get_symbol_close_volume(api_key, symbol = "BTCUSDT_PERP.A", threshold=700):
    
    # Coinalyze URL for the close and volume data
    url = 'https://api.coinalyze.net/v1/ohlcv-history'
    
    # Define parameters for the request, can be found on the API site
    params = {'api_key': api_key,"symbols": symbol,"interval":interval,"from":start,"to":end,
              "history": [{
                  "t": 0,   # Unix Timestamp in seconds
                  "o": 0,   # Open
                  "h": 0,   # High
                  "l": 0,   # Low
                  "c": 0,   # Close
                  "v": 0,   # Volume
                  "bv": 0,  # Base Volume
                  "tx": 0,  # Total trades
                  "btx": 0  # Buy Trades
              }]
             }
    
    # Perform the JSON request with the URL and parameters
    response = requests.get(url, params=params)
    data = response.json()
    
    # Columns to drop (see column names above)
    drop = ['o', 'h', 'l', 'bv', 'tx', 'btx']
    
    #print(f"Symbol: {symbol}, Data Points: {len(data[0]['history'])}")
    
    # Estalish a threshold limit of minimum days in analysis periods with data
    if len(data[0]['history']) >= threshold:
        df = format_df(data, drop)
        df.rename(columns={'c': f"{data[0]['symbol']}_close", 'v': f"{data[0]['symbol']}_volume"}, inplace=True)
        
        return df
    

In [8]:
# Create dataframe containing only the Bitcoin close and volume data
close_volume_btc = get_symbol_close_volume(api_key)

# View the dataframe
close_volume_btc.head()

Unnamed: 0_level_0,BTCUSDT_PERP.A_close,BTCUSDT_PERP.A_volume
t,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-02,47280.0,184794.653
2022-01-03,46445.81,244781.473999
2022-01-04,45833.42,292139.742999
2022-01-05,43421.69,485460.771998
2022-01-06,43080.01,299507.786999


In [9]:
# Create a dataframe containing the data for all coins
close_volume_all = get_all(get_symbol_close_volume,api_key)

In [10]:
# View the dataframe
close_volume_all

Unnamed: 0_level_0,PEOPLEUSDT_PERP.A_close,PEOPLEUSDT_PERP.A_volume,OMGUSDT_PERP.A_close,OMGUSDT_PERP.A_volume,BALUSDT_PERP.A_close,BALUSDT_PERP.A_volume,RENUSDT_PERP.A_close,RENUSDT_PERP.A_volume,CRVUSDT_PERP.A_close,CRVUSDT_PERP.A_volume,...,1000XECUSDT_PERP.A_close,1000XECUSDT_PERP.A_volume,ENSUSDT_PERP.A_close,ENSUSDT_PERP.A_volume,KLAYUSDT_PERP.A_close,KLAYUSDT_PERP.A_volume,ARPAUSDT_PERP.A_close,ARPAUSDT_PERP.A_volume,ANTUSDT_PERP.A_close,ANTUSDT_PERP.A_volume
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-02,0.12081,366228687,6.2810,7454170.5,19.593,2154143.0,0.56380,69983534,6.038,1.036064e+08,...,0.11298,145318525,41.085,974072.9,1.3920,1.100433e+07,0.09996,161401473,13.054,6.937153e+06
2022-01-03,0.12359,1620448582,6.1890,8643863.6,19.154,1712422.2,0.58140,144762720,6.507,1.369175e+08,...,0.11641,321535079,39.300,845290.1,1.3991,4.737673e+07,0.10030,164729769,11.969,9.752468e+06
2022-01-04,0.11922,592464466,6.0250,7992392.2,19.314,1491750.5,0.57860,75248782,6.077,1.093639e+08,...,0.11271,145264202,38.666,1268892.0,1.4024,1.512157e+07,0.09913,228777805,11.048,6.519602e+06
2022-01-05,0.10148,1091596364,5.5630,9675976.9,17.529,1319813.0,0.51790,91429399,5.394,1.059897e+08,...,0.10421,153789159,33.780,2120905.6,1.3459,3.414819e+07,0.09138,204856943,10.260,7.712332e+06
2022-01-06,0.10189,1627929359,5.6440,9182662.0,17.671,1350058.6,0.51900,63385696,5.398,9.355741e+07,...,0.10317,107552156,33.171,1549395.1,1.3470,2.448704e+07,0.09215,137694411,10.855,1.984296e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-01,0.01242,526097636,0.6438,10348808.5,3.822,1865168.5,0.06040,211717248,0.572,6.693228e+07,...,0.02926,121489288,8.587,1388433.0,0.1907,5.565839e+07,0.05065,150709986,5.301,9.929284e+05
2023-12-02,0.01291,772333755,0.6603,8017030.2,3.947,1266611.3,0.06178,325850198,0.610,8.012060e+07,...,0.03018,485892947,8.977,2718034.7,0.1936,6.579686e+07,0.05144,118965640,5.496,9.119628e+05
2023-12-03,0.01284,913860735,0.6514,10472159.6,4.048,1662038.5,0.06111,267312149,0.604,6.181809e+07,...,0.03085,479349859,8.817,1783621.9,0.1980,7.530923e+07,0.05108,144404370,5.591,9.850237e+05
2023-12-04,0.01359,3368519325,0.6723,27653936.7,4.346,5007642.2,0.06784,1243114590,0.607,1.205764e+08,...,0.03144,669522066,9.233,3562089.7,0.2056,1.075502e+08,0.05118,368412112,5.739,2.137553e+06


In [11]:
# Save the data in a csv to be used in the model building notebook
close_volume_all.to_csv('close volume.csv')

## Long-Short Ratio
This function returns the long-short ratio for the Bitcoin crypto token. This function calls for the Bitcoin symbol directly from the API, and stores it in a dataframe with a time series index.

Long-short ratio definition: represents the amount of a security available for short selling versus the amount actually borrowed and sold; high long-short ratio indicates positive investor expectations.

From: https://www.investopedia.com

In [12]:
def get_symbol_lsr(api_key, symbol = "BTCUSDT_PERP.A", threshold=693):
    
    url = 'https://api.coinalyze.net/v1/long-short-ratio-history'
    
    params = {'api_key': api_key,"symbols": symbol,"interval":interval,"from":start,"to":end,
              "history": [{
                  "t": 0, # Unix Timestamp in seconds
                  "r": 0, # Ratio
                  "l": 0, # Long
                  "s": 0  # Short
              }]
             }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    drop = ['l','s']
    
    #print(f"Symbol: {symbol}, Data Points: {len(data[0]['history'])}")
    
    # Convert the data into a DataFrame
    if len(data[0]['history']) >= threshold:
        
        df = format_df(data, drop)
        df.rename(columns={'r': f"{data[0]['symbol']}_ls_ratio"}, inplace=True)
        
        return interpolate(df)

In [13]:
# Call the function to create the Bitcoin long-short ratio DF
lsr_btc = get_symbol_lsr(api_key)

# View the dataframe
lsr_btc

Unnamed: 0,BTCUSDT_PERP.A_ls_ratio
2022-01-02,2.8432
2022-01-03,2.8911
2022-01-04,4.3677
2022-01-05,4.1680
2022-01-06,3.7824
...,...
2023-11-28,1.2046
2023-11-29,0.6793
2023-11-30,0.8077
2023-12-01,0.9305


In [14]:
lsr_all = get_all(get_symbol_lsr,api_key)

In [15]:
lsr_all

Unnamed: 0,PEOPLEUSDT_PERP.A_ls_ratio,OMGUSDT_PERP.A_ls_ratio,BALUSDT_PERP.A_ls_ratio,RENUSDT_PERP.A_ls_ratio,CRVUSDT_PERP.A_ls_ratio,STORJUSDT_PERP.A_ls_ratio,RLCUSDT_PERP.A_ls_ratio,SXPUSDT_PERP.A_ls_ratio,KAVAUSDT_PERP.A_ls_ratio,RSRUSDT_PERP.A_ls_ratio,...,COTIUSDT_PERP.A_ls_ratio,DENTUSDT_PERP.A_ls_ratio,NKNUSDT_PERP.A_ls_ratio,IOTXUSDT_PERP.A_ls_ratio,CTSIUSDT_PERP.A_ls_ratio,1000XECUSDT_PERP.A_ls_ratio,ENSUSDT_PERP.A_ls_ratio,KLAYUSDT_PERP.A_ls_ratio,ARPAUSDT_PERP.A_ls_ratio,ANTUSDT_PERP.A_ls_ratio
2022-01-02,4.8754,5.2112,2.5174,3.6339,1.1758,3.2337,2.5026,1.1478,0.9324,3.7733,...,3.7778,4.7537,3.4072,3.7259,3.0371,7.6281,5.5833,1.5349,5.2657,0.9260
2022-01-03,5.5359,4.6022,1.7115,3.5893,1.9886,2.9078,2.7793,1.8121,2.5174,3.6882,...,4.3967,5.1958,3.4863,4.2466,3.0967,6.1480,5.3694,1.6096,5.2150,0.7224
2022-01-04,5.8213,5.2696,2.3693,4.7904,1.5265,3.8193,4.0968,3.2644,2.1114,3.8333,...,4.3792,4.3505,4.4259,6.2993,3.2499,5.4144,6.0771,1.7078,5.0060,1.8547
2022-01-05,7.6881,5.6667,2.6792,4.5279,2.1496,4.1813,3.3066,3.9480,1.2795,3.9628,...,5.2933,4.8241,4.2165,5.9204,3.0225,6.3584,6.1174,1.9638,5.7385,2.5920
2022-01-06,5.4851,5.1958,2.7341,3.9727,3.3122,4.0125,3.6468,3.5809,3.0967,4.7241,...,4.5218,4.8241,4.5127,5.1996,3.6948,6.3475,4.1706,1.6048,5.7159,3.2589
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-28,3.0258,3.4763,3.4326,2.7651,3.6860,0.9697,2.1546,2.3400,3.9727,2.4002,...,2.0157,2.0441,2.1666,2.8095,2.1279,2.7495,2.2031,1.1195,2.4400,1.6048
2023-11-29,2.0321,3.2974,3.3630,2.8476,3.4783,0.8443,1.7762,2.2982,3.4643,2.4990,...,2.1606,1.7716,1.9762,2.4483,2.1928,3.0339,1.8257,1.3624,2.6697,1.6103
2023-11-30,2.3422,3.5662,3.1824,2.8491,3.6468,1.0296,1.6738,2.1939,2.9185,2.3223,...,1.8019,1.8952,2.1496,2.0012,2.1878,2.9293,2.7064,1.3663,2.6805,1.8377
2023-12-01,2.8476,3.6104,3.3668,3.5228,3.3975,1.0024,2.0826,2.9417,2.9841,1.8760,...,1.9949,1.9922,2.0618,1.9197,2.3864,3.3764,3.4623,1.8466,2.9651,1.7352


In [16]:
# Save the data in a csv to be used in the model building notebook
lsr_all.to_csv('long short.csv')

## Liquidations


Short liquidation definition: the liquidation margin is equal to what the trader would owe to purchase the security.

Long liquidation definition: the liquidation margin is equal to what the investor or trader would retain if the position were closed.

From: https://www.investopedia.com

In [17]:
def get_symbol_liquidations(api_key, symbol = "BTCUSDT_PERP.A", threshold=696):
    
    url = 'https://api.coinalyze.net/v1/liquidation-history'
    
    params = {'api_key': api_key,"symbols": symbol,"interval":interval,"from":start,"to":end,
              #"convert_to_usd":'true',
              "history": [{
                  "t": 0, # Unix Timestamp in seconds
                  "l": 0, # Long liquidation
                  "s": 0  # Short liquidation
              }]
             }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    print(f"Symbol: {symbol}, Data Points: {len(data[0]['history'])}")
    
    if len(data[0]['history'])>=threshold:
        df = format_df(data)
        df = df.rename(columns = {'l':'{}_longliq'.format(data[0]['symbol']),'s':'{}_shortliq'.format(data[0]['symbol'])})
    
        return interpolate(df)

In [18]:
liq_btc = get_symbol_liquidations(api_key)

liq_btc

Symbol: BTCUSDT_PERP.A, Data Points: 703


Unnamed: 0,BTCUSDT_PERP.A_longliq,BTCUSDT_PERP.A_shortliq
2022-01-02,114.778,79.263
2022-01-03,174.321,69.337
2022-01-04,385.271,109.397
2022-01-05,507.792,182.757
2022-01-06,120.827,118.182
...,...,...
2023-11-28,85.946,198.702
2023-11-29,87.249,88.287
2023-11-30,48.229,47.969
2023-12-01,51.462,151.664


In [19]:
liq_all = get_all(get_symbol_liquidations,api_key)

Symbol: NMRUSDT_PERP.A, Data Points: 158
Symbol: BIGTIMEUSDT_PERP.A, Data Points: 55
Symbol: POWRUSDT_PERP.A, Data Points: 40
Symbol: MAVUSDT_PERP.A, Data Points: 159
Symbol: AGLDUSDT_PERP.A, Data Points: 128
Symbol: CYBERUSDT_PERP.A, Data Points: 107
Symbol: ARKUSDT_PERP.A, Data Points: 78
Symbol: WLDUSDT_PERP.A, Data Points: 135
Symbol: PENDLEUSDT_PERP.A, Data Points: 130
Symbol: ARKMUSDT_PERP.A, Data Points: 131
Symbol: FRONTUSDT_PERP.A, Data Points: 75
Symbol: BONDUSDT_PERP.A, Data Points: 52
Symbol: MDTUSDT_PERP.A, Data Points: 156
Symbol: SEIUSDT_PERP.A, Data Points: 110
Symbol: XVGUSDT_PERP.A, Data Points: 154
Symbol: BNTUSDT_PERP.A, Data Points: 110
Symbol: YGGUSDT_PERP.A, Data Points: 123
Symbol: DODOXUSDT_PERP.A, Data Points: 118
Symbol: OXTUSDT_PERP.A, Data Points: 113
Symbol: MINAUSDT_PERP.A, Data Points: 294
Symbol: PEOPLEUSDT_PERP.A, Data Points: 702
Symbol: RIFUSDT_PERP.A, Data Points: 46
Symbol: ASTRUSDT_PERP.A, Data Points: 285
Symbol: XVSUSDT_PERP.A, Data Points: 225


Symbol: HOOKUSDT_PERP.A, Data Points: 314
Symbol: QNTUSDT_PERP.A, Data Points: 390
Symbol: ANTUSDT_PERP.A, Data Points: 695
Symbol: API3USDT_PERP.A, Data Points: 641
Symbol: STGUSDT_PERP.A, Data Points: 460
Symbol: INJUSDT_PERP.A, Data Points: 470
Symbol: MBLUSDT_PERP.A, Data Points: 22
Symbol: GALUSDT_PERP.A, Data Points: 573
Symbol: APTUSDT_PERP.A, Data Points: 413
Symbol: RADUSDT_PERP.A, Data Points: 198
Symbol: UMAUSDT_PERP.A, Data Points: 191
Symbol: BLUEBIRDUSDT_PERP.A, Data Points: 304
Symbol: DARUSDT_PERP.A, Data Points: 562
Symbol: ICPUSDT_PERP.A, Data Points: 587
Symbol: KASUSDT_PERP.A, Data Points: 19
Symbol: MEMEUSDT_PERP.A, Data Points: 33
Symbol: HIFIUSDT_PERP.A, Data Points: 81
Symbol: GLMRUSDT_PERP.A, Data Points: 71
Symbol: BICOUSDT_PERP.A, Data Points: 69
Symbol: TWTUSDT_PERP.A, Data Points: 33
Symbol: TOKENUSDT_PERP.A, Data Points: 33
Symbol: BSVUSDT_PERP.A, Data Points: 47
Symbol: POLYXUSDT_PERP.A, Data Points: 42
Symbol: GASUSDT_PERP.A, Data Points: 42
Symbol: SLPU

In [20]:
liq_all

Unnamed: 0,PEOPLEUSDT_PERP.A_longliq,PEOPLEUSDT_PERP.A_shortliq,RENUSDT_PERP.A_longliq,RENUSDT_PERP.A_shortliq,CRVUSDT_PERP.A_longliq,CRVUSDT_PERP.A_shortliq,STORJUSDT_PERP.A_longliq,STORJUSDT_PERP.A_shortliq,RLCUSDT_PERP.A_longliq,RLCUSDT_PERP.A_shortliq,...,C98USDT_PERP.A_longliq,C98USDT_PERP.A_shortliq,MANAUSDT_PERP.A_longliq,MANAUSDT_PERP.A_shortliq,HBARUSDT_PERP.A_longliq,HBARUSDT_PERP.A_shortliq,DYDXUSDT_PERP.A_longliq,DYDXUSDT_PERP.A_shortliq,COTIUSDT_PERP.A_longliq,COTIUSDT_PERP.A_shortliq
2022-01-02,438865.0,59767.0,15226.0,39673.0,195064.4,143533.8,29917.0,21160.0,3693.7,2303.0,...,5853.0,1889.0,22516.0,17497.0,62358.0,73906.0,1632.1,10535.0,76885.0,26845.0
2022-01-03,2109983.0,974617.0,279242.0,134105.0,210800.7,259412.7,41807.0,1551.0,25650.7,356.9,...,40169.0,77.0,145557.0,11590.0,341073.0,36839.0,33328.0,5.5,143211.0,26779.0
2022-01-04,578912.0,102480.0,88626.0,26938.0,198043.0,50376.3,23616.0,3004.0,28555.9,12089.4,...,14214.0,20723.0,212523.0,9730.0,217191.0,195997.0,11749.7,463.0,273429.0,147684.0
2022-01-05,5147538.0,458971.0,1065753.0,38093.0,527518.7,71815.9,239899.0,4017.0,86365.0,8362.9,...,205544.0,44861.0,653472.0,88627.0,1822568.0,98327.0,154618.6,15609.7,818186.0,104982.0
2022-01-06,1382699.0,1372846.0,80025.0,22704.0,118396.4,73880.6,21597.0,7564.0,8362.6,487.3,...,32085.0,832.0,412973.0,513847.0,352089.0,95069.0,25594.7,2284.8,156865.0,81615.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-28,5335868.0,405770.0,275800.0,56843.0,1434.6,52.7,112033.0,45837.0,2761.1,73.8,...,46376.0,73360.0,431810.0,2961.0,276678.0,40845.0,103745.7,35993.5,118392.0,392701.0
2023-11-29,583670.0,955866.0,575287.0,50670.0,3749.2,9.9,16308.0,2305.0,787.3,1816.8,...,18347.0,20556.0,55680.0,8172.0,124723.0,5313.0,19551.0,9592.6,115362.0,688066.0
2023-11-30,679953.0,39825.0,209877.0,2492.0,8726.2,9.4,2931.0,2209.0,8186.8,1815.8,...,52723.0,58230.0,51375.0,86.0,37052.0,71281.0,65989.3,15724.2,405477.0,12166.0
2023-12-01,33936.0,161156.0,131033.0,28477.0,5068.7,8245.8,3839.0,18519.0,1306.8,29.1,...,11425.0,231398.0,2726.0,12170.0,34777.0,5409.0,33615.3,20198.6,2448.0,32477.0


In [21]:
# Save the data in a csv to be used in the model building notebook
liq_all.to_csv('liquidations.csv')

## Open Interest

Open interest definition: total number of outstanding derivative contracts for an asset, such as options or futures, that have not been settled. Can provide accurate picture of a contract's liquidity and interest. 

From: https://www.investopedia.com

In [22]:
def get_symbol_oi(api_key, symbol = "BTCUSDT_PERP.A", threshold=700):
    
    url = 'https://api.coinalyze.net/v1/open-interest-history'
    params = {'api_key': api_key,"symbols": symbol,"interval":interval,"from":start,"to":end,
              #"convert_to_usd":'true',
              "history": [{"t": 0, # Unix Timestamp in seconds
                           "o": 0, # Open
                           "h": 0, # High
                           "l": 0, # Low
                           "c": 0  # Close
                          }]
             }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    drop =['o','h','l']
    
    print(f"Symbol: {symbol}, Data Points: {len(data[0]['history'])}")

    if len(data[0]['history']) >= threshold:
        df = format_df(data,drop)
        df = df.rename(columns = {'c':'{}_oi'.format(data[0]['symbol'])})
    
        return df

In [23]:
oi_btc = get_symbol_oi(api_key)

oi_btc

Symbol: BTCUSDT_PERP.A, Data Points: 703


Unnamed: 0_level_0,BTCUSDT_PERP.A_oi
t,Unnamed: 1_level_1
2022-01-02,73297.806
2022-01-03,79508.429
2022-01-04,81155.755
2022-01-05,77849.139
2022-01-06,79293.451
...,...
2023-12-01,89044.936
2023-12-02,81673.055
2023-12-03,79997.634
2023-12-04,83191.992


In [24]:
oi_all = get_all(get_symbol_oi,api_key)

Symbol: NMRUSDT_PERP.A, Data Points: 167
Symbol: BIGTIMEUSDT_PERP.A, Data Points: 55
Symbol: POWRUSDT_PERP.A, Data Points: 40
Symbol: MAVUSDT_PERP.A, Data Points: 160
Symbol: AGLDUSDT_PERP.A, Data Points: 130
Symbol: CYBERUSDT_PERP.A, Data Points: 107
Symbol: ARKUSDT_PERP.A, Data Points: 78
Symbol: WLDUSDT_PERP.A, Data Points: 135
Symbol: PENDLEUSDT_PERP.A, Data Points: 131
Symbol: ARKMUSDT_PERP.A, Data Points: 131
Symbol: FRONTUSDT_PERP.A, Data Points: 75
Symbol: BONDUSDT_PERP.A, Data Points: 52
Symbol: MDTUSDT_PERP.A, Data Points: 159
Symbol: SEIUSDT_PERP.A, Data Points: 111
Symbol: XVGUSDT_PERP.A, Data Points: 154
Symbol: BNTUSDT_PERP.A, Data Points: 118
Symbol: YGGUSDT_PERP.A, Data Points: 123
Symbol: DODOXUSDT_PERP.A, Data Points: 120
Symbol: OXTUSDT_PERP.A, Data Points: 117
Symbol: MINAUSDT_PERP.A, Data Points: 303
Symbol: PEOPLEUSDT_PERP.A, Data Points: 703
Symbol: RIFUSDT_PERP.A, Data Points: 46
Symbol: ASTRUSDT_PERP.A, Data Points: 295
Symbol: XVSUSDT_PERP.A, Data Points: 237


Symbol: HOOKUSDT_PERP.A, Data Points: 317
Symbol: QNTUSDT_PERP.A, Data Points: 412
Symbol: ANTUSDT_PERP.A, Data Points: 703
Symbol: API3USDT_PERP.A, Data Points: 652
Symbol: STGUSDT_PERP.A, Data Points: 468
Symbol: INJUSDT_PERP.A, Data Points: 476
Symbol: MBLUSDT_PERP.A, Data Points: 22
Symbol: GALUSDT_PERP.A, Data Points: 580
Symbol: APTUSDT_PERP.A, Data Points: 413
Symbol: RADUSDT_PERP.A, Data Points: 210
Symbol: UMAUSDT_PERP.A, Data Points: 210
Symbol: BLUEBIRDUSDT_PERP.A, Data Points: 399
Symbol: DARUSDT_PERP.A, Data Points: 586
Symbol: ICPUSDT_PERP.A, Data Points: 595
Symbol: KASUSDT_PERP.A, Data Points: 19
Symbol: MEMEUSDT_PERP.A, Data Points: 33
Symbol: HIFIUSDT_PERP.A, Data Points: 81
Symbol: GLMRUSDT_PERP.A, Data Points: 71
Symbol: BICOUSDT_PERP.A, Data Points: 69
Symbol: TWTUSDT_PERP.A, Data Points: 33
Symbol: TOKENUSDT_PERP.A, Data Points: 33
Symbol: BSVUSDT_PERP.A, Data Points: 47
Symbol: POLYXUSDT_PERP.A, Data Points: 42
Symbol: GASUSDT_PERP.A, Data Points: 42
Symbol: SLPU

In [25]:
# Save the data in a csv to be used in the model building notebook
oi_all.to_csv('open interest.csv')

## Funding Rate
Funding rate definition: represents the difference between the mark price of the futures market and the index price, equivalent to the spot market. A positive funding rate means users with long positions pay a funding fee to those with short positions, and vice versa for negative funding rate. 

From: https://help.coinbase.com/en

In [26]:
def get_symbol_funding_rate(api_key, symbol='BTCUSDT_PERP.A', threshold=700):
    
    url = 'https://api.coinalyze.net/v1/funding-rate-history'
    params = {'api_key': api_key,"symbols": symbol,"interval":interval,"from":start,"to":end,
              "history": [{"t": 0, # Unix Timestamp in seconds
                           "o": 0, # Open
                           "h": 0, # High
                           "l": 0, # Low
                           "c": 0  # Close
                          }]
             }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    drop =['o','h','l']
    
    #print(f"Symbol: {symbol}, Data Points: {len(data[0]['history'])}")

    if len(data[0]['history']) >= threshold:
        df = format_df(data,drop)
        df = df.rename(columns = {'c':'{}_fr'.format(data[0]['symbol'])})
    
        return df

In [27]:
fr_btc = get_symbol_funding_rate(api_key)

fr_btc

Unnamed: 0_level_0,BTCUSDT_PERP.A_fr
t,Unnamed: 1_level_1
2022-01-02,0.010000
2022-01-03,0.010000
2022-01-04,0.010000
2022-01-05,0.010000
2022-01-06,0.010000
...,...
2023-12-01,0.010000
2023-12-02,0.008541
2023-12-03,0.010000
2023-12-04,0.010000


In [28]:
fr_all = get_all(get_symbol_funding_rate,api_key)

In [29]:
fr_all

Unnamed: 0_level_0,PEOPLEUSDT_PERP.A_fr,OMGUSDT_PERP.A_fr,BALUSDT_PERP.A_fr,RENUSDT_PERP.A_fr,CRVUSDT_PERP.A_fr,STORJUSDT_PERP.A_fr,RLCUSDT_PERP.A_fr,SXPUSDT_PERP.A_fr,KAVAUSDT_PERP.A_fr,RSRUSDT_PERP.A_fr,...,COTIUSDT_PERP.A_fr,DENTUSDT_PERP.A_fr,NKNUSDT_PERP.A_fr,IOTXUSDT_PERP.A_fr,CTSIUSDT_PERP.A_fr,1000XECUSDT_PERP.A_fr,ENSUSDT_PERP.A_fr,KLAYUSDT_PERP.A_fr,ARPAUSDT_PERP.A_fr,ANTUSDT_PERP.A_fr
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-02,0.010000,0.010000,0.010000,0.010000,0.01000,-0.019553,0.010000,0.010000,0.01,0.01,...,0.01000,0.010000,0.010000,0.010000,0.010000,-0.085688,0.010000,0.010000,0.010000,0.010000
2022-01-03,0.010000,0.010000,0.010000,0.010000,0.01000,0.010000,0.010000,0.010000,0.01,0.01,...,0.01000,0.010000,0.010000,0.010000,0.010000,0.008957,0.010000,0.010000,0.010000,0.010000
2022-01-04,0.010000,0.010000,0.010000,0.010000,0.01000,0.010000,0.010000,0.010000,0.01,0.01,...,0.01000,0.010000,0.010000,0.010000,0.010000,-0.049702,0.010000,0.010000,0.010000,0.010000
2022-01-05,0.010000,0.010000,0.010000,0.010000,0.01000,0.010000,0.010000,0.010000,0.01,0.01,...,0.01000,0.010000,0.010000,0.010000,0.010000,-0.108208,0.010000,0.010000,0.010000,0.010000
2022-01-06,0.010000,0.010000,-0.075603,0.010000,0.01000,0.007112,0.008475,0.010000,0.01,0.01,...,0.01000,0.010000,-0.040119,0.010000,0.010000,0.004942,0.010000,0.010000,0.002022,0.010000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-01,0.010000,0.010000,0.010000,0.010000,0.01000,0.010000,0.010000,0.010000,0.01,0.01,...,0.00716,0.010000,0.010000,0.010000,0.010000,0.010000,0.010000,0.010000,0.010000,0.010000
2023-12-02,0.011709,0.010000,0.010000,0.013913,0.01000,0.020603,0.010000,0.010000,0.01,0.01,...,0.01000,0.022164,0.022252,0.015485,0.010000,0.010000,0.023625,0.010000,0.010000,0.010000
2023-12-03,0.019545,0.010000,0.010000,0.010000,0.01000,0.021040,0.019361,0.017163,0.01,0.01,...,0.01000,0.015805,0.010000,0.010000,0.010000,0.010000,0.023881,0.010000,0.010000,0.010000
2023-12-04,0.020599,0.019837,0.010000,0.010000,0.02269,0.010000,0.010000,0.010000,0.01,0.01,...,0.01000,0.015978,0.028447,0.010000,0.022216,0.010107,0.017057,0.010000,0.010000,0.010000


In [30]:
# Save the data in a csv to be used in the model building notebook
fr_all.to_csv('funding rate.csv')

## Older Bitcoin Close Data
Extracting older Bitcoin data close will facilitate the calculation of the Simple Moving Average for a window of 200 days.

In [31]:
# Define an older time horizon
older = int(time.mktime(time.strptime('2019-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')))

In [32]:
def get_symbol_old_close(api_key, symbol='BTCUSDT_PERP.A'):
    
    url = 'https://api.coinalyze.net/v1/ohlcv-history'
    
    params = {'api_key': api_key,"symbols": symbol,"interval":interval,"from":older,"to":end,
              "history": [{
                  "t": 0,   # Unix Timestamp in seconds
                  "o": 0,   # Open
                  "h": 0,   # High
                  "l": 0,   # Low
                  "c": 0,   # Close
                  "v": 0,   # Volume
                  "bv": 0,  # Base Volume
                  "tx": 0,  # Total trades
                  "btx": 0  # Buy Trades
              }]
             }
    
    # Perform the JSON request with the URL and parameters
    response = requests.get(url, params=params)
    data = response.json()
    
    # Columns to drop (see column names above)
    drop = ['o', 'h', 'l', 'v', 'bv', 'tx', 'btx']
    
    #print(f"Symbol: {symbol}, Data Points: {len(data[0]['history'])}")
    
    # Estalish a threshold limit of minimum days in analysis periods with data

    df = format_df(data, drop)
    df.rename(columns={'c': f"{data[0]['symbol']}_close"}, inplace=True)
        
    return df

In [33]:
# Create dataframe containing only the older Bitcoin close data
close_volume_older_btc = get_symbol_old_close(api_key)

# View the dataframe
close_volume_older_btc.head()

Unnamed: 0_level_0,BTCUSDT_PERP.A_close
t,Unnamed: 1_level_1
2019-09-12,10415.13
2019-09-13,10341.34
2019-09-14,10332.25
2019-09-15,10302.22
2019-09-16,10249.27


In [34]:
# Save the data in a csv to be used in the model building notebook
close_volume_older_btc.to_csv('older btc closes.csv')