# BTC Metrics

## Spec

## Load libraries

In [1]:
from os import environ
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import logging
from datetime import date, datetime, timedelta
from coinmetrics.api_client import CoinMetricsClient
import json
import logging
from pytz import timezone as timezone_conv
from datetime import timezone as timezone_info

import matplotlib.pyplot as plt
%matplotlib inline

## API + Config

In [2]:
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S'
)

In [3]:
from coinmetrics.api_client import CoinMetricsClient

client = CoinMetricsClient("https://community-api.coinmetrics.io/v4")

# or to use community API:
client = CoinMetricsClient()

In [4]:
assets = ['btc']
asset_mapping = {i: assets[i] for i in range(len(assets))}
print(asset_mapping)

{0: 'btc'}


## Load assets 

In [5]:
asset_catalog = client.catalog_assets(assets=assets)
full_asset_catalog = client.catalog_full_assets(assets=assets)

In [6]:
print(f"Asset Catalog metadata includes: {list(asset_catalog[0].keys())}")

Asset Catalog metadata includes: ['asset', 'full_name', 'metrics', 'exchanges', 'markets', 'atlas']


In [7]:
metrics = [
    "AdrActCnt", "AdrBal1in100KCnt", "AdrBal1in100MCnt", "AdrBal1in10BCnt", 
    "AdrBal1in10KCnt", "AdrBal1in10MCnt", "AdrBal1in1BCnt", "AdrBal1in1KCnt", 
    "AdrBal1in1MCnt", "AdrBalCnt", "AdrBalNtv0.001Cnt", "AdrBalNtv0.01Cnt", 
    "AdrBalNtv0.1Cnt", "AdrBalNtv100Cnt", "AdrBalNtv100KCnt", "AdrBalNtv10Cnt", 
    "AdrBalNtv10KCnt", "AdrBalNtv1Cnt", "AdrBalNtv1KCnt", "AdrBalNtv1MCnt", 
    "AdrBalUSD100Cnt", "AdrBalUSD100KCnt", "AdrBalUSD10Cnt", "AdrBalUSD10KCnt", 
    "AdrBalUSD10MCnt", "AdrBalUSD1Cnt", "AdrBalUSD1KCnt", "AdrBalUSD1MCnt", 
    "AssetEODCompletionTime", "BlkCnt", "BlkSizeMeanByte", "BlkWghtMean", 
    "BlkWghtTot", "CapAct1yrUSD", "CapMVRVCur", "CapMVRVFF", "CapMrktCurUSD", "CapMrktFFUSD", "CapRealUSD", "DiffLast", "DiffMean", 
    "FeeByteMeanNtv", "FeeMeanNtv", "FeeMeanUSD", "FeeMedNtv", "FeeMedUSD", 
    "FeeTotNtv", "FeeTotUSD", "FlowInExNtv", "FlowInExUSD", "FlowOutExNtv", 
    "FlowOutExUSD", "FlowTfrFromExCnt", "HashRate", "HashRate30d", "IssContNtv", 
    "IssContPctAnn", "IssContPctDay", "IssContUSD", "IssTotNtv", "IssTotUSD", 
    "NDF", "NVTAdj", "NVTAdj90", "NVTAdjFF", "NVTAdjFF90", "PriceBTC", "PriceUSD", 
    "ROI1yr", "ROI30d", "ReferenceRate", "ReferenceRateETH", "ReferenceRateEUR", 
    "ReferenceRateUSD", "RevAllTimeUSD", "RevHashNtv", "RevHashRateNtv", 
    "RevHashRateUSD", "RevHashUSD", "RevNtv", "RevUSD", "SER", "SplyAct10yr", 
    "SplyAct180d", "SplyAct1d", "SplyAct1yr", "SplyAct2yr", "SplyAct30d", 
    "SplyAct3yr", "SplyAct4yr", "SplyAct5yr", "SplyAct7d", "SplyAct90d", 
    "SplyActEver", "SplyActPct1yr", "SplyAdrBal1in100K", "SplyAdrBal1in100M", 
    "SplyAdrBal1in10B", "SplyAdrBal1in10K", "SplyAdrBal1in10M", "SplyAdrBal1in1B", 
    "SplyAdrBal1in1K", "SplyAdrBal1in1M", "SplyAdrBalNtv0.001", "SplyAdrBalNtv0.01", 
    "SplyAdrBalNtv0.1", "SplyAdrBalNtv1", "SplyAdrBalNtv10", "SplyAdrBalNtv100", 
    "SplyAdrBalNtv100K", "SplyAdrBalNtv10K", "SplyAdrBalNtv1K", "SplyAdrBalNtv1M", 
    "SplyAdrBalUSD1", "SplyAdrBalUSD10", "SplyAdrBalUSD100", "SplyAdrBalUSD100K", 
    "SplyAdrBalUSD10K", "SplyAdrBalUSD10M", "SplyAdrBalUSD1K", "SplyAdrBalUSD1M", "SplyAdrTop100", "SplyAdrTop10Pct", "SplyAdrTop1Pct",
    "SplyCur", "SplyExpFut10yr", "SplyFF", "SplyMiner0HopAllUSD", "SplyMiner1HopAllNtv", "SplyMiner1HopAllUSD",
    "TxCnt", "TxCntSec", "TxTfrCnt", "TxTfrValAdjNtv", "TxTfrValAdjUSD", "TxTfrValMeanNtv",
    "TxTfrValMeanUSD", "TxTfrValMedNtv", "TxTfrValMedUSD", "VelCur1yr", "VtyDayRet180d",
    "VtyDayRet30d"
]

In [8]:
#Query API for prices, daily CM reference rates as dataframe
from datetime import datetime, timedelta
import logging
import pandas as pd
import pytz


frequency = "1d"
start_time = "2018-01-01"
end_time = (datetime.now(pytz.timezone('US/Pacific')) + timedelta(days=1)).strftime('%Y-%m-%d')

logging.info("Getting prices...")
df = client.get_asset_metrics(
    assets='btc',
    metrics=metrics,
    frequency=frequency,
    start_time=start_time,
    end_time=end_time
).to_dataframe()

# Assign datatypes
df["time"] = pd.to_datetime(df.time)
for metric in metrics:
    if df[metric].dtype == 'object':
        df[metric] = pd.to_numeric(df[metric], errors='coerce')

# Reshape dataset so assets are in columns, dates are the rows, and the values are prices
df_pivot = df.pivot(
    index="time",
    columns="asset",
    values=metrics
)

2024-02-11 11:16:18 INFO     Getting prices...
2024-02-11 11:16:26 INFO     Sleeping for a rate limit window because 429 (too many requests) error was returned. Pleasesee Coin Metrics APIV4 documentation for more information: https://docs.coinmetrics.io/api/v4/#tag/Rate-limits
  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():


In [9]:
# Reset the row index (if 'time' is set as index)
df_pivot = df_pivot.reset_index()

# Dropping the 'asset' level from the multi-level column index
df_pivot.columns = df_pivot.columns.droplevel('asset')

df_final = df_pivot

df_final['time'] = df_final['time'].dt.date

# Your DataFrame should now have a single-level column index and no multi-level row index
df_final.head()

Unnamed: 0,time,AdrActCnt,AdrBal1in100KCnt,AdrBal1in100MCnt,AdrBal1in10BCnt,AdrBal1in10KCnt,AdrBal1in10MCnt,AdrBal1in1BCnt,AdrBal1in1KCnt,AdrBal1in1MCnt,...,TxTfrCnt,TxTfrValAdjNtv,TxTfrValAdjUSD,TxTfrValMeanNtv,TxTfrValMeanUSD,TxTfrValMedNtv,TxTfrValMedUSD,VelCur1yr,VtyDayRet180d,VtyDayRet30d
0,2018-01-01,972783,9151,1870827,10968209,867,449960,5134311,64,109174,...,677625,336097.721957,4525439405.811561,1.4372,19351.39431,0.01739,234.150326,37.984407,0.055074,0.076801
1,2018-01-02,1142721,9157,1879893,11021304,864,450926,5168246,64,108934,...,960051,403034.430288,5946499844.124645,1.62884,24032.424689,0.026026,383.99599,38.001934,0.055408,0.078283
2,2018-01-03,1130917,9141,1898856,11200226,863,452150,5268696,64,108938,...,1049679,375339.408088,5633951922.35587,1.297389,19474.186072,0.032746,491.533735,37.9832,0.055275,0.078195
3,2018-01-04,1253986,9128,1937852,11433416,863,455151,5409471,64,108986,...,1191394,489404.10353,7375467052.299311,1.241063,18703.185575,0.0333,501.841017,37.96554,0.055268,0.07819
4,2018-01-05,1069525,9145,1958755,11620327,856,457654,5515627,67,108995,...,1002989,519845.721221,8835935940.63457,1.669385,28374.91674,0.031467,534.853625,37.928875,0.055835,0.074672


In [10]:
# Check if there are any NaN values in df_final
empty_values = df_final.isna().any()

# Display columns with empty values
print("Columns with empty values:")
print(empty_values[empty_values])


Columns with empty values:
AdrActCnt           True
AdrBal1in100KCnt    True
AdrBal1in100MCnt    True
AdrBal1in10BCnt     True
AdrBal1in10KCnt     True
                    ... 
TxTfrValMedNtv      True
TxTfrValMedUSD      True
VelCur1yr           True
VtyDayRet180d       True
VtyDayRet30d        True
Length: 138, dtype: bool


In [11]:
print(len(df_final))
print(len(df_final.columns))


2233
143


In [12]:
import pandas as pd

# Assuming df_final is your DataFrame
# df_final = pd.read_csv('your_file.csv')  # or however you are getting your DataFrame

# Function to find and display duplicate column names
def show_duplicate_column_names(df):
    duplicate_columns = df.columns[df.columns.duplicated()]
    if len(duplicate_columns) > 0:
        print("Duplicate column names:")
        print(duplicate_columns)
    else:
        print("No duplicate column names.")

# Displaying the duplicate column names
show_duplicate_column_names(df_final)


No duplicate column names.


In [13]:
import pandas as pd

# Assuming df_final is your DataFrame
# df_final = pd.read_csv('your_file.csv')  # or however you are getting your DataFrame

# Function to check for duplicate column names in a DataFrame
def check_duplicate_column_names(df):
    if df_final.columns.duplicated().any():
        print("Duplicate column names found!")
        return True
    else:
        print("No duplicate column names.")
        return False

# Check for duplicate column names before saving to Parquet
if not check_duplicate_column_names(df_final):
    df_final.to_parquet('btc-causal.parquet')


No duplicate column names.


In [14]:
df_final.set_index('time', inplace=True)
df_final

Unnamed: 0_level_0,AdrActCnt,AdrBal1in100KCnt,AdrBal1in100MCnt,AdrBal1in10BCnt,AdrBal1in10KCnt,AdrBal1in10MCnt,AdrBal1in1BCnt,AdrBal1in1KCnt,AdrBal1in1MCnt,AdrBalCnt,...,TxTfrCnt,TxTfrValAdjNtv,TxTfrValAdjUSD,TxTfrValMeanNtv,TxTfrValMeanUSD,TxTfrValMedNtv,TxTfrValMedUSD,VelCur1yr,VtyDayRet180d,VtyDayRet30d
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01,972783,9151,1870827,10968209,867,449960,5134311,64,109174,26829117,...,677625,336097.721957,4525439405.811561,1.4372,19351.39431,0.01739,234.150326,37.984407,0.055074,0.076801
2018-01-02,1142721,9157,1879893,11021304,864,450926,5168246,64,108934,26909375,...,960051,403034.430288,5946499844.124645,1.62884,24032.424689,0.026026,383.99599,38.001934,0.055408,0.078283
2018-01-03,1130917,9141,1898856,11200226,863,452150,5268696,64,108938,27133378,...,1049679,375339.408088,5633951922.35587,1.297389,19474.186072,0.032746,491.533735,37.9832,0.055275,0.078195
2018-01-04,1253986,9128,1937852,11433416,863,455151,5409471,64,108986,27415056,...,1191394,489404.10353,7375467052.299311,1.241063,18703.185575,0.0333,501.841017,37.96554,0.055268,0.07819
2018-01-05,1069525,9145,1958755,11620327,856,457654,5515627,67,108995,27645350,...,1002989,519845.721221,8835935940.634569,1.669385,28374.91674,0.031467,534.853625,37.928875,0.055835,0.074672
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-07,901234,8844,3059755,20554639,990,551267,9543529,42,100229,51881194,...,1074090,139387.902644,6168337242.687056,0.261107,11554.756656,0.000227,10.055174,6.067797,0.022458,0.023541
2024-02-08,818130,8843,3058487,20548282,990,550946,9537848,42,100191,51868849,...,834120,166778.844208,7560433837.955264,0.412595,18703.80032,0.001122,50.87666,6.071094,0.022517,0.023773
2024-02-09,950564,8835,3057719,20499575,994,550973,9528617,42,100151,51776443,...,911134,169857.749981,8013373382.966112,0.408913,19291.290897,0.001079,50.924233,6.070679,0.022683,0.024732
2024-02-10,963958,8834,3057293,20459487,994,550775,9524914,42,100158,51730176,...,989395,79932.409212,3820201417.662771,0.185591,8869.954815,0.000636,30.381945,6.029585,0.022696,0.024768


In [15]:
df_final.to_parquet('coin-metrics.parquet')