# Project Inscriptions -- Exploratory Data Analysis

**[Johnnatan Messias](https://johnnatan-messias.github.io) and [Krzysztof Gogol](https://www.linkedin.com/in/krzysztofgogol), February 2025**


In [None]:
import polars as pl
import pandas as pd

In [None]:
import sys
import os
code_dir = os.path.realpath(os.path.join(os.getcwd(), "..", "src"))
sys.path.append(code_dir)

In [3]:
# Existing dataset dir
data_dir = os.path.realpath(os.path.join(os.getcwd(), "..", "data"))
plots_dir = os.path.realpath(os.path.join(os.getcwd(), "..", "plots"))

# Existing plots dir
os.makedirs(data_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)

## Exploratory Data Analysis


In [4]:
# Define the chain variables
chains = [
    'arbitrum',
    'base',
    'ethereum',
    # 'bsc',
    # 'fantom',
    'optimism',
    'zksync'
]

# Create a Polars DataFrame
chains_df = pl.DataFrame({"chain": chains})

# Print the DataFrame
print(chains_df)

shape: (5, 1)
┌──────────┐
│ chain    │
│ ---      │
│ str      │
╞══════════╡
│ arbitrum │
│ base     │
│ ethereum │
│ optimism │
│ zksync   │
└──────────┘


In [None]:
def process_inscriptions(chain):
    file_dir = os.path.realpath(os.path.join(
        data_dir, "chains", chain+"_inscriptions.parquet"))
    inscriptions_df = pl.scan_parquet(file_dir).collect(new_streaming=True)

    n_txs = inscriptions_df['tx_hash'].n_unique()
    issuer = inscriptions_df['issuer'].n_unique()
    block_numbers = inscriptions_df['block_number'].n_unique()
    avgInBlock = n_txs / block_numbers
    minBlock = inscriptions_df['block_number'].min()
    maxBlock = inscriptions_df['block_number'].max()
    minTimestamp = inscriptions_df['timestamp'].min()
    maxTimestamp = inscriptions_df['timestamp'].max()

    return {
        'minTimestamp': minTimestamp,
        'maxTimestamp': maxTimestamp,
        'issuer': issuer,
        'block_numbers': block_numbers,
        'n_transactions': n_txs,
        'avgInBlock': avgInBlock,
        'minBlock': minBlock,
        'maxBlock': maxBlock
    }

In [6]:
# Create a dictionary to store the results for each chain
chain_data = {}

for chain in chains:
    chain_data[chain] = process_inscriptions(chain)

# Display the dictionary
pd.DataFrame(chain_data).T

Unnamed: 0,minTimestamp,maxTimestamp,issuer,block_numbers,n_transactions,avgInBlock,minBlock,maxBlock
arbitrum,2023-06-17 12:06:10,2024-04-30 22:05:02,118544,3575299,16309035,4.561586,102082055,206512846
base,2023-07-28 12:14:03,2024-04-30 23:29:09,79573,780770,2020661,2.588036,1878548,13865201
ethereum,2023-06-14 15:04:35,2024-04-30 23:59:47,245008,930824,6493580,6.976163,17478950,19771558
optimism,2023-06-18 09:46:59,2024-04-30 19:13:29,49112,588053,1475663,2.509405,105741421,119452816
zksync,2023-06-18 02:04:06,2024-04-30 23:59:59,481687,2809054,17161306,6.109283,6332862,32843524


In [None]:
def process_issuerss(chain):
    file_dir = os.path.realpath(os.path.join(
        data_dir, "chains", chain+"_inscriptions.parquet"))
    inscriptions_df = pl.scan_parquet(file_dir).collect(new_streaming=True)

    grouped_df = inscriptions_df.group_by('issuer').len()

    # Calculate average, min, and max values
    average_values = grouped_df.mean()
    min_values = grouped_df.min()
    max_values = grouped_df.max()
    median = grouped_df.median()
    std_values = grouped_df.std()

    return {
        'average_values': average_values[0, 1],
        'std_values': std_values[0, 1],
        'median': median[0, 1],
        'min_values': min_values[0, 1],
        'max_values': max_values[0, 1],
    }

In [31]:
# Create a dictionary to store the results for each chain
issuer_data = {}

for chain in chains:
    issuer_data[chain] = process_issuerss(chain)

# Display the dictionary
pd.DataFrame(issuer_data).T

Unnamed: 0,average_values,std_values,median,min_values,max_values
arbitrum,137.577904,676.765659,6.0,1.0,38050.0
base,25.393802,154.910133,3.0,1.0,19674.0
ethereum,26.503543,245.605462,3.0,1.0,67713.0
optimism,30.046893,168.050358,3.0,1.0,19612.0
zksync,35.627505,245.345834,3.0,1.0,40770.0


In [67]:
def process_top_minters(chain):
    file_dir = os.path.realpath(os.path.join(
        data_dir, "chains", chain+"_inscriptions.parquet"))
    inscriptions_df = pl.scan_parquet(file_dir).collect(new_streaming=True)
    inscriptions_df = (inscriptions_df
                       .filter(pl.col('decoded_input_data').str.starts_with('data:,{'))
                       .with_columns(pl.col('decoded_input_data').str.slice(6).alias('decoded_input_data'))
                       .with_columns([
                           pl.col('decoded_input_data').str.json_path_match(
                               r"$.p").alias('p'),
                           pl.col('decoded_input_data').str.json_path_match(
                               r"$.op").alias('op'),
                           pl.col('decoded_input_data').str.json_path_match(
                               r"$.tick").alias('tick'),
                           pl.col('decoded_input_data').str.json_path_match(
                               r"$.amt").alias('amt'),
                           pl.col('decoded_input_data').str.json_path_match(
                               r"$.tx").alias('tx'),
                           pl.col('decoded_input_data').str.json_path_match(
                               r"$.price").alias('price'),
                       ])
                       .select(['issuer', 'p', 'op', 'tick']))
    top_minters = (inscriptions_df
                   .filter(pl.col("op").eq("mint"))
                   .group_by(['p', 'tick', 'issuer'])
                   .agg(pl.len())
                   .sort(by='len', descending=True)
                   .top_k(3, by='len')
                   )

    return top_minters

In [68]:
for chain in chains:
    print(process_top_minters(chain))

shape: (3, 4)
┌─────────┬──────┬─────────────────────────────────┬───────┐
│ p       ┆ tick ┆ issuer                          ┆ len   │
│ ---     ┆ ---  ┆ ---                             ┆ ---   │
│ str     ┆ str  ┆ str                             ┆ u32   │
╞═════════╪══════╪═════════════════════════════════╪═══════╡
│ fair-20 ┆ fair ┆ 0xd27845b25c9bb7f394e410170bed… ┆ 34698 │
│ fair-20 ┆ fair ┆ 0x472491a68bce7221bf765d834935… ┆ 31552 │
│ fair-20 ┆ fair ┆ 0xb2c107418b5f969b8a8f5a79891b… ┆ 30608 │
└─────────┴──────┴─────────────────────────────────┴───────┘
shape: (3, 4)
┌───────────┬──────┬─────────────────────────────────┬───────┐
│ p         ┆ tick ┆ issuer                          ┆ len   │
│ ---       ┆ ---  ┆ ---                             ┆ ---   │
│ str       ┆ str  ┆ str                             ┆ u32   │
╞═══════════╪══════╪═════════════════════════════════╪═══════╡
│ layer2-20 ┆ $L2  ┆ 0x0a88bc5c32b684d467b43c06d9e0… ┆ 19674 │
│ basc-20   ┆ basc ┆ 0xfc3bdd4ec9f4760c06cb52