In [1]:
import polars as pl
import sys

# Read the CSV file
file_path = "src/data/ethereum-supply-macro-distribution.csv"
df = pl.read_csv(file_path)

# Process timestamps
timestamp_cols = [col for col in df.columns if 'timestamp' in col.lower()]
for col in timestamp_cols:
    df = df.with_columns(pl.col(col).str.to_datetime(format='%Y-%m-%dT%H:%M:%S%.fZ', strict=False))

df = df.with_columns(pl.coalesce(*[pl.col(col) for col in timestamp_cols]).alias('unified_timestamp'))
df = df.drop(timestamp_cols).sort('unified_timestamp').filter(pl.col('unified_timestamp').is_not_null())

# Select and process relevant columns
df = df.select([
    'unified_timestamp',
    'ETH: Price',
    'ETH: Circulating Supply',
    'Beacon Chain Staking',
    'Smart Contracts',
    'Exchange Balances'
]).fill_null(strategy='forward')

# Convert all relevant columns to float
df = df.with_columns([
    pl.col('ETH: Price').cast(pl.Float64),
    pl.col('ETH: Circulating Supply').cast(pl.Float64),
    pl.col('Beacon Chain Staking').cast(pl.Float64),
    pl.col('Smart Contracts').cast(pl.Float64),
    pl.col('Exchange Balances').cast(pl.Float64)
])

# Calculate percentages
df = df.with_columns([
    (pl.col('Exchange Balances') * 100 / pl.col('ETH: Circulating Supply')).alias('Exchange Balances %'),
    ((pl.col('Smart Contracts') - pl.col('Exchange Balances')) * 100 / pl.col('ETH: Circulating Supply')).alias('Smart Contracts %'),
    ((pl.col('Beacon Chain Staking') - pl.col('Smart Contracts')) * 100 / pl.col('ETH: Circulating Supply')).alias('Beacon Chain Staking %')
])

# Select final columns
df = df.select([
    'unified_timestamp',
    'ETH: Price',
    'ETH: Circulating Supply',
    'Beacon Chain Staking %',
    'Smart Contracts %',
    'Exchange Balances %'
])

# Output CSV to stdout
# df.write_csv(sys.stdout)
print(df.tail())

shape: (5, 6)
┌─────────────────────┬─────────────┬─────────────┬──────────────┬─────────────┬────────────┐
│ unified_timestamp   ┆ ETH: Price  ┆ ETH:        ┆ Beacon Chain ┆ Smart       ┆ Exchange   │
│ ---                 ┆ ---         ┆ Circulating ┆ Staking %    ┆ Contracts % ┆ Balances % │
│ datetime[ns]        ┆ f64         ┆ Supply      ┆ ---          ┆ ---         ┆ ---        │
│                     ┆             ┆ ---         ┆ f64          ┆ f64         ┆ f64        │
│                     ┆             ┆ f64         ┆              ┆             ┆            │
╞═════════════════════╪═════════════╪═════════════╪══════════════╪═════════════╪════════════╡
│ 2024-08-01 15:00:00 ┆ 3232.466575 ┆ 1.1998e8    ┆ 3.3930e-7    ┆ 3.3081e-7   ┆ 8.5421e-8  │
│ 2024-08-01 16:00:00 ┆ 3232.466575 ┆ 1.1998e8    ┆ 3.3930e-7    ┆ 3.3081e-7   ┆ 8.5421e-8  │
│ 2024-08-01 17:00:00 ┆ 3232.466575 ┆ 1.1998e8    ┆ 3.3930e-7    ┆ 3.3081e-7   ┆ 8.5421e-8  │
│ 2024-08-01 18:00:00 ┆ 3232.466575 ┆ 1.1998e8