In [None]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt

%matplotlib inline

Get the time series for the following cryptocurrencies from `cryptowat.ch` starting from 2021-11-22, hourly data
    1. ETH
    2. SOL
    3. AVAX
    4. USDT
    5. FLOW

In [None]:
def get_data(token):
    res = requests.get(
        f'https://api.cryptowat.ch/markets/coinbase-pro/{token}usd/ohlc',
        params={
            'periods': '3600',
            'after': str(int(pd.Timestamp('2021-11-22').timestamp()))
        }
    )
    if res.status_code !=200:
        res = requests.get(
            f'https://api.cryptowat.ch/markets/kraken/{token}usd/ohlc',
            params={
                'periods': '3600',
                'after': str(int(pd.Timestamp('2021-11-22').timestamp()))
            }
        )

    df = pd.DataFrame(
        res.json()['result']['3600'],
        columns=['ts', 'open', 'high', 'low', 'close', 'volume', 'volumeUSD']
    )
    df['ts'] = pd.to_datetime(df.ts, unit='s')
    df['token'] = token
    
    return df

In [None]:
tokens = ['ETH', 'SOL', 'AVAX', 'USDT', 'FLOW']

In [None]:
df_base = pd.concat(get_data(token) for token in tokens)
df = df_base.set_index('ts')

Get the total USD volume traded for each token in a dataframe, sorted from highest volume to lowest volume

In [None]:
df_sorted_by_volumeUSD = df[['volumeUSD','token']].sort_values('volumeUSD',ascending=False)

Change the name of the `volume` and `volumeUSD` columns to `volumeBase` and `volumeTerm`

In [None]:
df_renamed = df.rename(columns={'volume':'volumeBase','volumeUSD':'volumeTerm'})

Add a column that calculates the close price ratio between ETH and SOL for each hour (i.e. close price of ETH / close price of SOL for each period)

In [None]:
df_close_ratio_ETH_SOL = pd.merge(
    df.loc[df['token'] == 'ETH'][['close']].rename(columns={'close':'close_ETH'}),
    df.loc[df['token'] == 'SOL'][['close']].rename(columns={'close':'close_SOL'}),
    on='ts',
    how='inner'
)

df_close_ratio_ETH_SOL['close price ratio']=df_close_ratio_ETH_SOL['close_ETH']/df_close_ratio_ETH_SOL['close_SOL']

Create a fat table indexed by the timestamp, and each column is the close price of each token (i.e. this should be a table of  200 rows and 5 columns)

In [None]:
df_close_price = df_base.loc[df_base['token'] == 'ETH',['ts']]

for token in tokens:
    df_close_price = df_close_price.join(
        df.loc[df['token'] == token][['close']].rename(columns={'close':f'close_{token}'}),
        on = 'ts'
    )

Calculate the hour by hour log return of the close price of each token (return is calculated by np.log(price_t / price_{t-1}))

In [None]:
no_entries = df.loc[df['token']=='ETH'].shape[0]

log_return_all = []

for token in tokens:
    log_return_token = []
    for j in range(no_entries):
        if j==0:
            log_return_token.append(np.NaN)
        else:
            log_return_token.append(np.log(df_base[df_base['token']==token]['close'].loc[j]/df_base[df_base['token']==token]['close'].loc[j-1]))
    
    log_return_all.append(log_return_token)

Convert the log return values from a list to a DataFrame

In [None]:
df_log_return_all = pd.DataFrame(log_return_all).transpose()
df_log_return_all.columns = ['ETH', 'SOL', 'AVAX', 'USDT', 'FLOW']

df_log_return_by_hour_unidexed = df_log_return_all.assign(
    ts = df_base.loc[df_base['token'] == 'ETH','ts']
)

df_log_return_by_hour = df_log_return_by_hour_unidexed.set_index('ts')

\[Stretch\] calculate the correlation of the tokens using the table above

In [None]:
corr_matrix = df_log_return_by_hour.corr()

corr_matrix

\[Stretch\] visualize the correlation in a matplpotlib plot

In [None]:
corr_matrix.plot()