### Initial Setup
Make sure you have the following packages installed:
- pandas
- jupysql
- duckdb
- duckdb-engine

`pip install jupysql duckdb duckdb-engine`

In [12]:
# pip install jupysql duckdb duckdb-engine

# TODO: Build a combined database

In [13]:
import duckdb
import pandas as pd
# use an in memory database
conn = duckdb.connect(':memory:')

In [14]:
# Create tables from CSV files
conn.execute("CREATE TABLE defillama_chains AS SELECT * FROM 'downloads/defillama_chains.csv'")
conn.execute("CREATE TABLE defillama_tvl AS SELECT * FROM 'downloads/defillama_tvl.csv'")

conn.execute("CREATE TABLE growthepie_fundamentals AS SELECT * FROM 'downloads/growthepie_fundamentals.csv'")
conn.execute("CREATE TABLE growthepie_metadata AS SELECT * FROM 'downloads/growthepie_metadata.csv'")

conn.execute("CREATE TABLE l2beat_activity AS SELECT * FROM 'downloads/l2beat_activity.csv'")
conn.execute("CREATE TABLE l2beat_assets_onchain AS SELECT * FROM 'downloads/l2beat_assets_onchain.csv'")
conn.execute("CREATE TABLE l2beat_summary AS SELECT * FROM 'downloads/l2beat_summary.csv'")

<duckdb.duckdb.DuckDBPyConnection at 0x143e63bf0>

### Intermediate Data x Metadata Views
Join the data tables with the metadata tables for each data source, to then be used for joining

In [15]:
#Build Growthepie Model
conn.execute("""
CREATE OR REPLACE VIEW growthepie AS
SELECT
    f.*
    , m.* EXCLUDE (origin_key)
FROM growthepie_fundamentals f
JOIN growthepie_metadata m
ON f.origin_key = m.origin_key
""")

#Build L2Beat Model
conn.execute("""
CREATE OR REPLACE VIEW l2beat AS
SELECT
    s.*
    , t.* EXCLUDE (slug)
    , aoc.* EXCLUDE (slug)
FROM l2beat_summary s
RIGHT JOIN l2beat_activity t
    ON s.slug = t.slug
RIGHT JOIN l2beat_assets_onchain aoc
    ON s.slug = aoc.slug
""")

#Build Defillama Model
conn.execute("""
CREATE OR REPLACE VIEW defillama AS
SELECT
    ch.*
    , tvl.* EXCLUDE (name, chain_id)
FROM defillama_chains ch
JOIN defillama_tvl tvl
ON ch.name = tvl.name
""")

<duckdb.duckdb.DuckDBPyConnection at 0x143e63bf0>

In [21]:
# Function to check out the views

pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)        # Don't wrap to multiple lines
pd.set_option('display.max_colwidth', None) # Show full contents of each column

def print_view_sample(conn, view_name):
    print(f"\n--- {view_name.upper()} ---")
    
    # Fetch 5 random rows and convert to pandas DataFrame
    query = f"SELECT * FROM {view_name} ORDER BY RANDOM() LIMIT 5"
    df = duckdb.query(query).to_df()
    
    # Print column names
    print("Columns:", ", ".join(df.columns))
    
    # Print the DataFrame
    print("\nRandom Sample Rows:")
    print(df)

# List of views
views = ['growthepie', 'l2beat', 'defillama']

# Print sample for each view
for view in views:
    print_view_sample(conn, view)

NameError: name 'pd' is not defined