# Project Inscriptions -- Exploratory Data Analysis

**[Krzysztof Gogol](https://www.linkedin.com/in/krzysztofgogol), February 2025**


In [1]:
import pandas as pd

In [2]:
import os
import sys
code_dir = os.path.realpath(os.path.join(os.getcwd(), "..", "src"))
sys.path.append(code_dir)

In [3]:
from plot_utils import get_plotly_layout
from plot_utils import colors
width, height = 800, 450

In [4]:
# Existing dataset dir
data_dir = os.path.realpath(os.path.join(os.getcwd(), "..", "data"))
plots_dir = os.path.realpath(os.path.join(os.getcwd(), "..", "plots"))

# Existing plots dir
os.makedirs(data_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)

In [5]:
import plotly.graph_objects as go
import plotly.io as pio
pio.kaleido.scope.mathjax = None

## Exploratory Data Analysis


In [6]:
plot_settings = {
    'arbitrum': {'color': colors['blue'], 'label': 'Arbitrum', 'style': 'solid', 'width': 4, 'marker_symbol': 'circle'},
    'base': {'color': colors['red'], 'label': 'Base', 'style': 'dash', 'width': 3.5, 'marker_symbol': 'diamond'},
    'ethereum': {'color': colors['green'], 'label': 'Ethereum', 'style': 'dot', 'width': 3, 'marker_symbol': 'square'},
    'optimism': {'color': colors['grey'], 'label': 'Optimism', 'style': 'dashdot', 'width': 2.5, 'marker_symbol': 'triangle-up'},
    'zksync_era': {'color': colors['pink'], 'label': 'zkSync', 'style': 'solid', 'width': 2, 'marker_symbol': 'cross'},
    'fantom': {'color': colors['brown'], 'label': 'Fantom', 'style': 'dash', 'width': 1.5, 'marker_symbol': 'star'},
}

In [7]:
import requests as rq
url = 'https://api.growthepie.xyz/v1/fundamentals_full.json'
response = rq.get(url)
df = pd.DataFrame(response.json())

print("There are {} data points".format(df.shape[0]))
print("The data was collected from the time period {} to {}".format(
    df['date'].min(), df['date'].max()))
print("There are {} chains".format(df['origin_key'].nunique()))
print("There are {} metrics".format(df['metric_key'].nunique()))

There are 393053 data points
The data was collected from the time period 2021-06-01 to 2025-02-25
There are 30 chains
There are 26 metrics


In [8]:
# Distribution of data points per chain
df['origin_key'].value_counts()

origin_key
arbitrum         29794
optimism         29438
metis            26059
starknet         24548
loopring         22239
polygon_zkevm    20515
imx              18835
zksync_era       16395
ethereum         15212
zora             14335
base             14071
linea            13841
rhino            13743
manta            13377
mantle           13260
scroll           12796
blast            11388
mode             10856
derive            9996
orderly           9957
fraxtal           9522
taiko             7398
worldchain        6359
mint              6337
redstone          6000
gravity           5988
real              5904
ink               1733
soneium           1709
swell             1448
Name: count, dtype: int64

In [9]:
# Distribution of data points per metric
df['metric_key'].value_counts()

metric_key
stables_mcap          24330
stables_mcap_eth      24330
tvl                   19100
tvl_eth               19100
daa                   18940
txcount               18898
costs_l1_eth          18268
costs_l1_usd          18268
costs_total_eth       18268
costs_total_usd       18268
fees_paid_eth         15254
fees_paid_usd         15254
txcosts_median_eth    15186
txcosts_median_usd    15186
rent_paid_eth         14876
rent_paid_usd         14876
gas_per_second        12704
aa_last7d             12453
market_cap_usd        12123
market_cap_eth        12123
profit_usd            11004
profit_eth            11004
fdv_usd               10941
fdv_eth               10941
costs_blobs_eth        5679
costs_blobs_usd        5679
Name: count, dtype: int64

In [10]:
tx_count_data = {}  # Dictionary to store tx count data for each chain

chains = [
    'arbitrum',
    'base',
    'ethereum',
    'optimism',
    'zksync_era'
]

for chain in chains:
    # Filter data based on conditions
    filtered_data = df[(df['metric_key'] == 'txcount') &
                       (df['origin_key'] == chain)  # &
                       #    (df['date'] >= '2023-11-01') &
                       #    (df['date'] <= '2024-03-24')
                       ].sort_values('date').copy()
    # Store filtered data in the dictionary
    tx_count_data[chain] = filtered_data

# Access the tx count data for a specific chain, for example, 'arbitrum'
print(tx_count_data['zksync_era'])

       metric_key  origin_key        date     value
374158    txcount  zksync_era  2023-02-14       6.0
374159    txcount  zksync_era  2023-02-15      18.0
374160    txcount  zksync_era  2023-02-16     112.0
374162    txcount  zksync_era  2023-02-17      30.0
374319    txcount  zksync_era  2023-02-18       1.0
...           ...         ...         ...       ...
375479    txcount  zksync_era  2025-02-21  100200.0
375736    txcount  zksync_era  2025-02-22   63554.0
375868    txcount  zksync_era  2025-02-23   50878.0
375074    txcount  zksync_era  2025-02-24   79939.0
375320    txcount  zksync_era  2025-02-25   96333.0

[743 rows x 4 columns]


In [11]:
fig = go.Figure(layout=get_plotly_layout(height=height, width=width))

for protocol, df_data in tx_count_data.items():
    fig.add_trace(go.Scatter(
        x=df_data['date'], y=df_data['value'],
        line=dict(color=plot_settings[protocol]['color'], width=plot_settings[protocol]
                  ['width'], dash=plot_settings[protocol]['style']),
        name=plot_settings[protocol]['label'],
        marker=dict(size=8, symbol=plot_settings[protocol]['marker_symbol'])
    ))

fig.update_layout(yaxis_title="Number of transactions",
                  xaxis_title="Date",
                  # yaxis=dict(type="log", range=[0, 10]),
                  legend=dict(xanchor='center', x=0.5, y=1.02, orientation='h'))

file_dir = os.path.realpath(os.path.join(
    plots_dir, "all-transactions-count.pdf"))
fig.write_image(file_dir)
fig.show()

In [12]:
tx_cost_data = {}  # Dictionary to store tx count data for each chain

for chain in chains:
    # Filter data based on conditions
    filtered_data = df[(df['metric_key'] == 'txcosts_median_usd') &
                       (df['origin_key'] == chain) &
                       (df['date'] >= '2023-11-01') &
                       (df['date'] <= '2024-03-24')].sort_values('date').copy()

    # Store filtered data in the dictionary
    tx_cost_data[chain] = filtered_data

# Access the tx count data for a specific chain, for example, 'arbitrum'
print(tx_cost_data['arbitrum'])

                metric_key origin_key        date     value
368950  txcosts_median_usd   arbitrum  2023-11-01  0.152080
364088  txcosts_median_usd   arbitrum  2023-11-02  0.154832
368043  txcosts_median_usd   arbitrum  2023-11-03  0.131348
369239  txcosts_median_usd   arbitrum  2023-11-04  0.113501
369016  txcosts_median_usd   arbitrum  2023-11-05  0.123952
...                    ...        ...         ...       ...
369069  txcosts_median_usd   arbitrum  2024-03-20  0.009184
368976  txcosts_median_usd   arbitrum  2024-03-21  0.008960
369009  txcosts_median_usd   arbitrum  2024-03-22  0.008479
369034  txcosts_median_usd   arbitrum  2024-03-23  0.007420
368657  txcosts_median_usd   arbitrum  2024-03-24  0.007874

[145 rows x 4 columns]


In [13]:
fig = go.Figure(layout=get_plotly_layout(height=height, width=width))

for protocol, df_data in tx_cost_data.items():
    fig.add_trace(go.Scatter(
        x=df_data['date'], y=df_data['value'],
        line=dict(color=plot_settings[protocol]['color'], width=plot_settings[protocol]
                  ['width'], dash=plot_settings[protocol]['style']),
        name=plot_settings[protocol]['label'],
        marker=dict(size=8, symbol=plot_settings[protocol]['marker_symbol'])
    ))

fig.update_layout(yaxis_title="Median fees (USD)",
                  xaxis_title="Date",
                  # yaxis=dict(type="log", range=[0, 10]),
                  legend=dict(xanchor='center', x=0.5, y=1.02, orientation='h'))
file_dir = os.path.realpath(os.path.join(plots_dir, "all-transactions-fees.pdf"))
fig.write_image(file_dir)
fig.show()