# ETH Metrics

## Spec

## Load libraries

In [1]:
from os import environ
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import logging
from datetime import date, datetime, timedelta
from coinmetrics.api_client import CoinMetricsClient
import json
import logging
from pytz import timezone as timezone_conv
from datetime import timezone as timezone_info

import matplotlib.pyplot as plt
%matplotlib inline

## API + Config

In [2]:
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S'
)

In [3]:
from coinmetrics.api_client import CoinMetricsClient

client = CoinMetricsClient("https://community-api.coinmetrics.io/v4")

# or to use community API:
client = CoinMetricsClient()

In [4]:
assets = ['eth']
asset_mapping = {i: assets[i] for i in range(len(assets))}
print(asset_mapping)

{0: 'eth'}


## Load assets 

In [5]:
asset_catalog = client.catalog_assets(assets=assets)
full_asset_catalog = client.catalog_full_assets(assets=assets)

In [6]:
print("*** catalog endpoint ***")
metrics = []  # Initialize an empty list to collect metric names
for asset_metadata in asset_catalog:
    asset_name = asset_metadata['asset']
    if asset_name.lower() == 'eth':  # Ensure case-insensitive comparison
        
        # Collect all available metric names for Ethereum
        for metric_info in asset_metadata['metrics']:
            metrics.append(metric_info['metric'])  # Add metric name to the list

        # Since we only want to process Ethereum, break after processing it
        break

*** catalog endpoint ***


In [7]:
metrics = [
    "AdrBalNtv0.01Cnt",
    "AdrBalNtv0.1Cnt",
    "AdrBalNtv1Cnt",
    "AdrBalNtv10Cnt",
    "BlkSizeMeanByte",
    "CapRealUSD",
    "FeeByteMeanNtv",
    "FlowInExNtv",
    "FlowOutExNtv",
    "FlowTfrFromExCnt",
    "GasUsedTxMean",
    "NDF",
    "SplyAct1d",
    "SplyActPct1yr",
    "TxCnt",
    "VelCur1yr",
]


In [8]:
#Query API for prices, daily CM reference rates as dataframe
from datetime import datetime, timedelta
import logging
import pandas as pd
import pytz


frequency = "1d"
start_time = "2019-01-01"
end_time = (datetime.now(pytz.timezone('US/Pacific')) + timedelta(days=1)).strftime('%Y-%m-%d')

logging.info("Getting prices...")
df = client.get_asset_metrics(
    assets='ETH',
    metrics=metrics,
    frequency=frequency,
    start_time=start_time,
    end_time=end_time
).to_dataframe()

# Assign datatypes
df["time"] = pd.to_datetime(df.time)
for metric in metrics:
    if df[metric].dtype == 'object':
        df[metric] = pd.to_numeric(df[metric], errors='coerce')

# Reshape dataset so assets are in columns, dates are the rows, and the values are prices
df_pivot = df.pivot(
    index="time",
    columns="asset",
    values=metrics
)

2024-02-06 21:10:17 INFO     Getting prices...
2024-02-06 21:10:21 INFO     Sleeping for a rate limit window because 429 (too many requests) error was returned. Pleasesee Coin Metrics APIV4 documentation for more information: https://docs.coinmetrics.io/api/v4/#tag/Rate-limits
  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():


In [9]:
# Reset the row index (if 'time' is set as index)
df_pivot = df_pivot.reset_index()

# Dropping the 'asset' level from the multi-level column index
df_pivot.columns = df_pivot.columns.droplevel('asset')

df_final = df_pivot

df_final['time'] = df_final['time'].dt.date

# Your DataFrame should now have a single-level column index and no multi-level row index
df_final.head()

Unnamed: 0,time,AdrBalNtv0.01Cnt,AdrBalNtv0.1Cnt,AdrBalNtv1Cnt,AdrBalNtv10Cnt,BlkSizeMeanByte,CapRealUSD,FeeByteMeanNtv,FlowInExNtv,FlowOutExNtv,FlowTfrFromExCnt,GasUsedTxMean,NDF,SplyAct1d,SplyActPct1yr,TxCnt,VelCur1yr
0,2019-01-01,5725623,2553149,948660,240984,13665.922841,28877687727.31675,4e-06,392978.197625,419726.133113,15602,73640.350123,0.635803,11221087.882036,72.295246,448168,11.758691
1,2019-01-02,5729156,2554121,948649,241344,18126.132355,28990812437.411285,3e-06,790231.396805,729568.379931,21191,60064.995705,0.636376,10343209.345505,72.264375,589959,11.643999
2,2019-01-03,5733108,2555952,948798,240985,19342.790323,28981719897.825188,3e-06,654169.139428,650444.543236,20336,56967.882419,0.636168,9498193.91421,72.221087,596620,11.529535
3,2019-01-04,5732036,2553742,948728,240912,18872.124595,29094778907.793,3e-06,680241.364599,1184203.323531,21142,61527.769639,0.636041,14212229.620577,72.193628,549398,11.365511
4,2019-01-05,5730961,2549563,944626,240101,17351.082028,29046828453.278557,5e-06,631193.304059,1490287.90147,17419,63305.365839,0.637131,12886643.253894,72.080216,511189,11.22179


In [10]:
# Check if there are any NaN values in df_final
empty_values = df_final.isna().any()

# Display columns with empty values
print("Columns with empty values:")
print(empty_values[empty_values])


Columns with empty values:
Series([], dtype: bool)


In [11]:
print(len(df_final))
print(len(df_final.columns))


1863
17


In [12]:
import pandas as pd

# Assuming df_final is your DataFrame
# df_final = pd.read_csv('your_file.csv')  # or however you are getting your DataFrame

# Function to find and display duplicate column names
def show_duplicate_column_names(df):
    duplicate_columns = df.columns[df.columns.duplicated()]
    if len(duplicate_columns) > 0:
        print("Duplicate column names:")
        print(duplicate_columns)
    else:
        print("No duplicate column names.")

# Displaying the duplicate column names
show_duplicate_column_names(df_final)


No duplicate column names.


In [13]:
import pandas as pd

# Assuming df_final is your DataFrame
# df_final = pd.read_csv('your_file.csv')  # or however you are getting your DataFrame

# Function to check for duplicate column names in a DataFrame
def check_duplicate_column_names(df):
    if df_final.columns.duplicated().any():
        print("Duplicate column names found!")
        return True
    else:
        print("No duplicate column names.")
        return False

# Check for duplicate column names before saving to Parquet
#if not check_duplicate_column_names(df_final):
#    df_final.to_parquet('ETH-causal.parquet')


In [14]:
df_final.set_index('time', inplace=True)
df_final

Unnamed: 0_level_0,AdrBalNtv0.01Cnt,AdrBalNtv0.1Cnt,AdrBalNtv1Cnt,AdrBalNtv10Cnt,BlkSizeMeanByte,CapRealUSD,FeeByteMeanNtv,FlowInExNtv,FlowOutExNtv,FlowTfrFromExCnt,GasUsedTxMean,NDF,SplyAct1d,SplyActPct1yr,TxCnt,VelCur1yr
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2019-01-01,5725623,2553149,948660,240984,13665.922841,28877687727.31675,0.000004,392978.197625,419726.133113,15602,73640.350123,0.635803,11221087.882036,72.295246,448168,11.758691
2019-01-02,5729156,2554121,948649,241344,18126.132355,28990812437.411285,0.000003,790231.396805,729568.379931,21191,60064.995705,0.636376,10343209.345505,72.264375,589959,11.643999
2019-01-03,5733108,2555952,948798,240985,19342.790323,28981719897.825188,0.000003,654169.139428,650444.543236,20336,56967.882419,0.636168,9498193.91421,72.221087,596620,11.529535
2019-01-04,5732036,2553742,948728,240912,18872.124595,29094778907.792999,0.000003,680241.364599,1184203.323531,21142,61527.769639,0.636041,14212229.620577,72.193628,549398,11.365511
2019-01-05,5730961,2549563,944626,240101,17351.082028,29046828453.278553,0.000005,631193.304059,1490287.90147,17419,63305.365839,0.637131,12886643.253894,72.080216,511189,11.22179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-02,24732407,5195599,1729374,344511,149793.691422,166740016751.461548,0.000003,224978.990905,335365.852424,103742,97339.905076,0.677616,12488463.874684,35.971409,1108172,6.045875
2024-02-03,24733683,5185916,1728662,344395,153626.986371,166579820212.011688,0.000002,88094.093613,110625.486725,93780,102296.542796,0.677607,11762353.905596,35.937981,1054604,6.036129
2024-02-04,24723204,5169224,1728661,344281,153334.124877,166467916361.649323,0.000002,155456.643029,152343.252309,89438,102710.415043,0.678024,9964788.28142,35.919804,1049649,6.033078
2024-02-05,24713162,5146142,1728854,344070,161689.073003,166403323892.444855,0.000002,230617.498979,369227.537384,95967,96329.680096,0.677587,11357696.26874,35.852078,1120237,6.044788


In [15]:
df_final.to_parquet('coin-metrics.parquet')