# BTC Metrics

## Spec

## Load libraries

In [1]:
from os import environ
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import logging
from datetime import date, datetime, timedelta
from coinmetrics.api_client import CoinMetricsClient
import json
import logging
from pytz import timezone as timezone_conv
from datetime import timezone as timezone_info

import matplotlib.pyplot as plt
%matplotlib inline

## API + Config

In [2]:
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S'
)

In [3]:
from coinmetrics.api_client import CoinMetricsClient

client = CoinMetricsClient("https://community-api.coinmetrics.io/v4")

# or to use community API:
client = CoinMetricsClient()

In [4]:
assets = ['btc']
asset_mapping = {i: assets[i] for i in range(len(assets))}
print(asset_mapping)

{0: 'btc'}


## Load assets 

In [5]:
asset_catalog = client.catalog_assets(assets=assets)
full_asset_catalog = client.catalog_full_assets(assets=assets)

In [6]:
print(f"Asset Catalog metadata includes: {list(asset_catalog[0].keys())}")

Asset Catalog metadata includes: ['asset', 'full_name', 'metrics', 'exchanges', 'markets', 'atlas']


In [7]:
metrics = [
    "AdrActCnt", "AdrBal1in100KCnt", "AdrBal1in100MCnt", "AdrBal1in10BCnt", 
    "AdrBal1in10KCnt", "AdrBal1in10MCnt", "AdrBal1in1BCnt", "AdrBal1in1KCnt", 
    "AdrBal1in1MCnt", "AdrBalCnt", "AdrBalNtv0.001Cnt", "AdrBalNtv0.01Cnt", 
    "AdrBalNtv0.1Cnt", "AdrBalNtv100Cnt", "AdrBalNtv100KCnt", "AdrBalNtv10Cnt", 
    "AdrBalNtv10KCnt", "AdrBalNtv1Cnt", "AdrBalNtv1KCnt", "AdrBalNtv1MCnt", 
    "AdrBalUSD100Cnt", "AdrBalUSD100KCnt", "AdrBalUSD10Cnt", "AdrBalUSD10KCnt", 
    "AdrBalUSD10MCnt", "AdrBalUSD1Cnt", "AdrBalUSD1KCnt", "AdrBalUSD1MCnt", 
    "AssetEODCompletionTime", "BlkCnt", "BlkSizeMeanByte", "BlkWghtMean", 
    "BlkWghtTot", "CapAct1yrUSD", "CapMVRVCur", "CapMVRVFF", "CapMrktCurUSD", "CapMrktFFUSD", "CapRealUSD", "DiffLast", "DiffMean", 
    "FeeByteMeanNtv", "FeeMeanNtv", "FeeMeanUSD", "FeeMedNtv", "FeeMedUSD", 
    "FeeTotNtv", "FeeTotUSD", "FlowInExNtv", "FlowInExUSD", "FlowOutExNtv", 
    "FlowOutExUSD", "FlowTfrFromExCnt", "HashRate", "HashRate30d", "IssContNtv", 
    "IssContPctAnn", "IssContPctDay", "IssContUSD", "IssTotNtv", "IssTotUSD", 
    "NDF", "NVTAdj", "NVTAdj90", "NVTAdjFF", "NVTAdjFF90", "PriceBTC", "PriceUSD", 
    "ROI1yr", "ROI30d", "ReferenceRate", "ReferenceRateETH", "ReferenceRateEUR", 
    "ReferenceRateUSD", "RevAllTimeUSD", "RevHashNtv", "RevHashRateNtv", 
    "RevHashRateUSD", "RevHashUSD", "RevNtv", "RevUSD", "SER", "SplyAct10yr", 
    "SplyAct180d", "SplyAct1d", "SplyAct1yr", "SplyAct2yr", "SplyAct30d", 
    "SplyAct3yr", "SplyAct4yr", "SplyAct5yr", "SplyAct7d", "SplyAct90d", 
    "SplyActEver", "SplyActPct1yr", "SplyAdrBal1in100K", "SplyAdrBal1in100M", 
    "SplyAdrBal1in10B", "SplyAdrBal1in10K", "SplyAdrBal1in10M", "SplyAdrBal1in1B", 
    "SplyAdrBal1in1K", "SplyAdrBal1in1M", "SplyAdrBalNtv0.001", "SplyAdrBalNtv0.01", 
    "SplyAdrBalNtv0.1", "SplyAdrBalNtv1", "SplyAdrBalNtv10", "SplyAdrBalNtv100", 
    "SplyAdrBalNtv100K", "SplyAdrBalNtv10K", "SplyAdrBalNtv1K", "SplyAdrBalNtv1M", 
    "SplyAdrBalUSD1", "SplyAdrBalUSD10", "SplyAdrBalUSD100", "SplyAdrBalUSD100K", 
    "SplyAdrBalUSD10K", "SplyAdrBalUSD10M", "SplyAdrBalUSD1K", "SplyAdrBalUSD1M", "SplyAdrTop100", "SplyAdrTop10Pct", "SplyAdrTop1Pct",
    "SplyCur", "SplyExpFut10yr", "SplyFF", "SplyMiner0HopAllUSD", "SplyMiner1HopAllNtv", "SplyMiner1HopAllUSD",
    "TxCnt", "TxCntSec", "TxTfrCnt", "TxTfrValAdjNtv", "TxTfrValAdjUSD", "TxTfrValMeanNtv",
    "TxTfrValMeanUSD", "TxTfrValMedNtv", "TxTfrValMedUSD", "VelCur1yr", "VtyDayRet180d",
    "VtyDayRet30d"
]

In [8]:
#Query API for prices, daily CM reference rates as dataframe
from datetime import datetime, timedelta
import logging
import pandas as pd
import pytz


frequency = "1d"
start_time = "2015-01-01"
end_time = (datetime.now(pytz.timezone('US/Pacific')) + timedelta(days=1)).strftime('%Y-%m-%d')

logging.info("Getting prices...")
df = client.get_asset_metrics(
    assets='btc',
    metrics=metrics,
    frequency=frequency,
    start_time=start_time,
    end_time=end_time
).to_dataframe()

# Assign datatypes
df["time"] = pd.to_datetime(df.time)
for metric in metrics:
    if df[metric].dtype == 'object':
        df[metric] = pd.to_numeric(df[metric], errors='coerce')

# Reshape dataset so assets are in columns, dates are the rows, and the values are prices
df_pivot = df.pivot(
    index="time",
    columns="asset",
    values=metrics
)

2024-02-06 21:11:48 INFO     Getting prices...
2024-02-06 21:12:01 INFO     Sleeping for a rate limit window because 429 (too many requests) error was returned. Pleasesee Coin Metrics APIV4 documentation for more information: https://docs.coinmetrics.io/api/v4/#tag/Rate-limits
2024-02-06 21:12:22 INFO     Sleeping for a rate limit window because 429 (too many requests) error was returned. Pleasesee Coin Metrics APIV4 documentation for more information: https://docs.coinmetrics.io/api/v4/#tag/Rate-limits
  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():


In [9]:
# Reset the row index (if 'time' is set as index)
df_pivot = df_pivot.reset_index()

# Dropping the 'asset' level from the multi-level column index
df_pivot.columns = df_pivot.columns.droplevel('asset')

df_final = df_pivot

df_final['time'] = df_final['time'].dt.date

# Your DataFrame should now have a single-level column index and no multi-level row index
df_final.head()

Unnamed: 0,time,AdrActCnt,AdrBal1in100KCnt,AdrBal1in100MCnt,AdrBal1in10BCnt,AdrBal1in10KCnt,AdrBal1in10MCnt,AdrBal1in1BCnt,AdrBal1in1KCnt,AdrBal1in1MCnt,...,TxTfrCnt,TxTfrValAdjNtv,TxTfrValAdjUSD,TxTfrValMeanNtv,TxTfrValMeanUSD,TxTfrValMedNtv,TxTfrValMedUSD,VelCur1yr,VtyDayRet180d,VtyDayRet30d
0,2015-01-01,145765,8798,618611,1843111,954,267747,1153066,52,98574,...,152648,102135.88207,32149972.617237,2.786937,877.262055,0.019577,6.162407,16.104075,0.029546,0.024775
1,2015-01-02,212932,8811,618583,1845386,958,267503,1154176,52,98619,...,241133,149397.417177,47201028.097042,2.867718,906.034662,0.01209,3.819748,16.099986,0.029541,0.024736
2,2015-01-03,241755,8822,618286,1848808,957,267470,1154986,53,98642,...,266646,277500.536361,79267281.668469,3.084672,881.128133,0.004608,1.316124,16.094621,0.030392,0.030157
3,2015-01-04,235011,8804,617486,1850874,956,267071,1154614,55,98436,...,244195,226786.02074,59720600.344437,3.787183,997.296192,0.010009,2.635763,16.11231,0.030924,0.032231
4,2015-01-05,242515,8814,619398,1852934,950,267229,1156281,54,98438,...,264832,254454.056878,69975845.683627,3.914129,1076.400546,0.021808,5.997284,16.119254,0.031131,0.033784


In [10]:
# Check if there are any NaN values in df_final
empty_values = df_final.isna().any()

# Display columns with empty values
print("Columns with empty values:")
print(empty_values[empty_values])


Columns with empty values:
AdrActCnt           True
AdrBal1in100KCnt    True
AdrBal1in100MCnt    True
AdrBal1in10BCnt     True
AdrBal1in10KCnt     True
                    ... 
TxTfrValMedNtv      True
TxTfrValMedUSD      True
VelCur1yr           True
VtyDayRet180d       True
VtyDayRet30d        True
Length: 139, dtype: bool


In [11]:
print(len(df_final))
print(len(df_final.columns))


3325
143


In [12]:
import pandas as pd

# Assuming df_final is your DataFrame
# df_final = pd.read_csv('your_file.csv')  # or however you are getting your DataFrame

# Function to find and display duplicate column names
def show_duplicate_column_names(df):
    duplicate_columns = df.columns[df.columns.duplicated()]
    if len(duplicate_columns) > 0:
        print("Duplicate column names:")
        print(duplicate_columns)
    else:
        print("No duplicate column names.")

# Displaying the duplicate column names
show_duplicate_column_names(df_final)


No duplicate column names.


In [13]:
import pandas as pd

# Assuming df_final is your DataFrame
# df_final = pd.read_csv('your_file.csv')  # or however you are getting your DataFrame

# Function to check for duplicate column names in a DataFrame
def check_duplicate_column_names(df):
    if df_final.columns.duplicated().any():
        print("Duplicate column names found!")
        return True
    else:
        print("No duplicate column names.")
        return False

# Check for duplicate column names before saving to Parquet
if not check_duplicate_column_names(df_final):
    df_final.to_parquet('btc-causal.parquet')


No duplicate column names.


In [14]:
df_final.set_index('time', inplace=True)
df_final

Unnamed: 0_level_0,AdrActCnt,AdrBal1in100KCnt,AdrBal1in100MCnt,AdrBal1in10BCnt,AdrBal1in10KCnt,AdrBal1in10MCnt,AdrBal1in1BCnt,AdrBal1in1KCnt,AdrBal1in1MCnt,AdrBalCnt,...,TxTfrCnt,TxTfrValAdjNtv,TxTfrValAdjUSD,TxTfrValMeanNtv,TxTfrValMeanUSD,TxTfrValMedNtv,TxTfrValMedUSD,VelCur1yr,VtyDayRet180d,VtyDayRet30d
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-01,145765,8798,618611,1843111,954,267747,1153066,52,98574,3940851,...,152648,102135.88207,32149972.617237,2.786937,877.262055,0.019577,6.162407,16.104075,0.029546,0.024775
2015-01-02,212932,8811,618583,1845386,958,267503,1154176,52,98619,3948156,...,241133,149397.417177,47201028.097042,2.867718,906.034662,0.01209,3.819748,16.099986,0.029541,0.024736
2015-01-03,241755,8822,618286,1848808,957,267470,1154986,53,98642,3962389,...,266646,277500.536361,79267281.668469,3.084672,881.128133,0.004608,1.316124,16.094621,0.030392,0.030157
2015-01-04,235011,8804,617486,1850874,956,267071,1154614,55,98436,3970882,...,244195,226786.02074,59720600.344437,3.787183,997.296192,0.010009,2.635763,16.11231,0.030924,0.032231
2015-01-05,242515,8814,619398,1852934,950,267229,1156281,54,98438,3968971,...,264832,254454.056878,69975845.683627,3.914129,1076.400546,0.021808,5.997284,16.119254,0.031131,0.033784
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-03,820828,8873,3060176,20661508,985,551443,9571466,41,100317,52029863,...,1311660,62979.645684,2707014999.963335,0.114277,4911.885663,0.0001,4.298238,6.073204,0.02242,0.026302
2024-02-04,802004,8870,3060299,20640642,985,551512,9564224,41,100310,52022980,...,800359,65009.861615,2767878805.804325,0.184315,7847.46502,0.000623,26.504806,6.068425,0.022392,0.026347
2024-02-05,852467,8867,3060156,20615216,983,551307,9558694,42,100307,51974787,...,687664,153044.767194,6522656909.209644,0.45803,19520.918742,0.001879,80.09014,6.070497,0.022383,0.026339
2024-02-06,865722,8852,3059915,20574629,988,551131,9545647,42,100222,51905391,...,700325,165213.01945,7121128248.23035,0.492959,21247.885154,0.001866,80.42965,6.070471,0.022387,0.026433


In [15]:
df_final.to_parquet('coin-metrics.parquet')