In [10]:
import pandas as pd
import numpy as np
import requests
from functools import reduce

In [13]:
def get_data(token):
    exchange = 'kraken' if token == 'FLOW' else 'coinbase-pro'

    res = requests.get(
        f'https://api.cryptowat.ch/markets/{exchange}/{token}usd/ohlc',
        params={
            'periods': '3600',
            'after': str(int(pd.Timestamp('2021-11-22').timestamp()))
        }
    )

    df = pd.DataFrame(
        res.json()['result']['3600'],
        columns=['ts', 'open', 'high', 'low', 'close', 'volume', 'volumeUSD']
    )
    df['ts'] = pd.to_datetime(df.ts, unit='s')
    df['token'] = token
    
    return df


In [14]:
tokens = ['ETH', 'SOL', 'AVAX', 'USDT', 'FLOW']

Get the time series for the following cryptocurrencies from `cryptowat.ch` starting from 2021-11-22, hourly data
    1. ETH
    2. SOL
    3. AVAX
    4. USDT
    5. FLOW

In [15]:
dfs = [get_data(token) for token in tokens]

Get the total USD volume traded for each token in a dataframe, sorted from highest volume to lowest volume

In [16]:
pd.DataFrame([df['volumeUSD'].sum() for df in dfs], tokens, ['totalVolumeUSD']).sort_values('totalVolumeUSD', ascending=False)

Unnamed: 0,totalVolumeUSD
ETH,20893500000.0
SOL,6832326000.0
AVAX,3405661000.0
USDT,1958465000.0
FLOW,85691650.0


Add a column that calculates the close price ratio between ETH and SOL for each house (i.e. close price of ETH / close price of SOL for each period)

In [17]:
dfs[0]['ethSolRatio'] = dfs[0]['close'] / dfs[1]['close']

dfs[0]

Unnamed: 0,ts,open,high,low,close,volume,volumeUSD,token,ethSolRatio
0,2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.99,7262.562789,3.123077e+07,ETH,18.471694
1,2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.37,8437.946084,3.574586e+07,ETH,18.736150
2,2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.89,9259.725370,3.895639e+07,ETH,18.577985
3,2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.47,9259.899519,3.882032e+07,ETH,18.794937
4,2021-11-22 04:00:00,4192.95,4213.59,4147.00,4168.35,7934.546906,3.315476e+07,ETH,18.891059
...,...,...,...,...,...,...,...,...,...
529,2021-12-14 01:00:00,3784.00,3833.33,3765.70,3817.54,9401.830890,3.576877e+07,ETH,24.326388
530,2021-12-14 02:00:00,3817.54,3821.81,3767.11,3783.98,7399.535909,2.804187e+07,ETH,24.231429
531,2021-12-14 03:00:00,3784.00,3791.31,3751.43,3781.76,6141.517589,2.316375e+07,ETH,24.437868
532,2021-12-14 04:00:00,3781.76,3800.46,3757.35,3775.99,5082.790148,1.921604e+07,ETH,24.552897


Change the name of the `volume` and `volumeUSD` columns to `volumeBase` and `volumeTerm`

In [18]:
dfs[0].rename(columns={'volume' : 'volumeBase', 'volumeUSD' : 'volumeTerm'}, inplace=True)

dfs[0]

Unnamed: 0,ts,open,high,low,close,volumeBase,volumeTerm,token,ethSolRatio
0,2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.99,7262.562789,3.123077e+07,ETH,18.471694
1,2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.37,8437.946084,3.574586e+07,ETH,18.736150
2,2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.89,9259.725370,3.895639e+07,ETH,18.577985
3,2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.47,9259.899519,3.882032e+07,ETH,18.794937
4,2021-11-22 04:00:00,4192.95,4213.59,4147.00,4168.35,7934.546906,3.315476e+07,ETH,18.891059
...,...,...,...,...,...,...,...,...,...
529,2021-12-14 01:00:00,3784.00,3833.33,3765.70,3817.54,9401.830890,3.576877e+07,ETH,24.326388
530,2021-12-14 02:00:00,3817.54,3821.81,3767.11,3783.98,7399.535909,2.804187e+07,ETH,24.231429
531,2021-12-14 03:00:00,3784.00,3791.31,3751.43,3781.76,6141.517589,2.316375e+07,ETH,24.437868
532,2021-12-14 04:00:00,3781.76,3800.46,3757.35,3775.99,5082.790148,1.921604e+07,ETH,24.552897


create a fat table indexed by the timestamp, and each column is the close price of each token (i.e. this should be a table of  200 rows and 5 columns)

In [34]:
df_close = reduce(
    lambda left, right: left.join(right),
    [dfs[i][['ts', 'close']].rename(columns={'close' : f'close_{tokens[i]}'}).set_index('ts') for i in range(5)]
)

df_close

Unnamed: 0_level_0,close_ETH,close_SOL,close_AVAX,close_USDT,close_FLOW
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-11-22 00:00:00,4262.99,230.785,128.67,1.0012,14.150
2021-11-22 01:00:00,4234.37,226.000,134.85,1.0013,13.621
2021-11-22 02:00:00,4217.89,227.037,132.54,1.0012,13.753
2021-11-22 03:00:00,4193.47,223.117,133.38,1.0011,13.652
2021-11-22 04:00:00,4168.35,220.652,136.29,1.0009,13.581
...,...,...,...,...,...
2021-12-14 01:00:00,3817.54,156.930,79.73,1.0003,8.668
2021-12-14 02:00:00,3783.98,156.160,79.13,1.0005,8.585
2021-12-14 03:00:00,3781.76,154.750,78.92,1.0005,8.558
2021-12-14 04:00:00,3775.99,153.790,79.02,1.0005,8.532


calculate the hour by hour log return of the close price of each token (return is calculated by np.log(price_t / price_{t-1}))

In [35]:
df_close_next = df_close.copy()
df_close_next.rename(index={ts : ts - pd.Timedelta(1, 'h') for ts in df_close.index}, inplace=True)

np.log(df_close_next.iloc[1:] / df_close[:-1]).rename(columns={f'close_{token}' : f'logReturn_{token}' for token in tokens})

Unnamed: 0_level_0,logReturn_ETH,logReturn_SOL,logReturn_AVAX,logReturn_USDT,logReturn_FLOW
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-11-22 00:00:00,-0.006736,-0.020952,0.046912,0.0001,-0.038102
2021-11-22 01:00:00,-0.003900,0.004578,-0.017279,-0.0001,0.009644
2021-11-22 02:00:00,-0.005806,-0.017417,0.006318,-0.0001,-0.007371
2021-11-22 03:00:00,-0.006008,-0.011109,0.021583,-0.0002,-0.005214
2021-11-22 04:00:00,-0.002472,-0.015161,-0.031832,-0.0001,-0.014014
...,...,...,...,...,...
2021-12-14 00:00:00,0.008875,0.011150,0.007427,-0.0002,0.004741
2021-12-14 01:00:00,-0.008830,-0.004919,-0.007554,0.0002,-0.009622
2021-12-14 02:00:00,-0.000587,-0.009070,-0.002657,0.0000,-0.003150
2021-12-14 03:00:00,-0.001527,-0.006223,0.001266,0.0000,-0.003043
