# Explore NOAH Database Schema

This notebook helps you explore the NOAH PostgreSQL database schema and understand the data structure before converting to Neo4j.

In [None]:
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import pandas as pd
from noah_converter.utils.config import load_config
from noah_converter.utils.db_connection import PostgreSQLConnection
from noah_converter.schema_analyzer import SchemaAnalyzer

## Load Configuration

In [None]:
# Load config
config = load_config()

# Connect to PostgreSQL
pg_conn = PostgreSQLConnection(config.source_db)
print(f"Connected to: {config.source_db.host}:{config.source_db.port}/{config.source_db.database}")

## List All Tables

In [None]:
# Get all tables
tables = pg_conn.get_table_names()
print(f"Total tables: {len(tables)}")
print("\nTables:")
for table in sorted(tables):
    print(f"  - {table}")

## Analyze Schema

In [None]:
# Create analyzer
analyzer = SchemaAnalyzer(pg_conn, config.schema_analyzer)

# Analyze schema
analyzed_tables = analyzer.analyze(schema="public")

print(f"Analyzed {len(analyzed_tables)} tables")

## View Table Details

In [None]:
# Create summary dataframe
summary_data = []
for table_name, table in analyzed_tables.items():
    summary_data.append({
        "Table": table_name,
        "Type": table.table_type.value,
        "Columns": len(table.columns),
        "Rows": table.row_count or 0,
        "Primary Key": ", ".join(table.primary_key.columns) if table.primary_key else "None",
        "Foreign Keys": len(table.foreign_keys)
    })

df_summary = pd.DataFrame(summary_data)
df_summary = df_summary.sort_values("Rows", ascending=False)
df_summary

## Explore Key Tables

In [None]:
# Look at zipcode table structure
if "zip_shapes" in analyzed_tables:
    zip_table = analyzed_tables["zip_shapes"]
    print(f"Table: {zip_table.name}")
    print(f"Type: {zip_table.table_type.value}")
    print(f"Rows: {zip_table.row_count}")
    print(f"\nColumns:")
    for col in zip_table.columns:
        pk = "[PK]" if col.is_primary_key else ""
        fk = "[FK]" if col.is_foreign_key else ""
        print(f"  {col.name:30} {col.data_type:20} {pk} {fk}")

## Sample Data from Key Tables

In [None]:
# Sample rent burden data
query = "SELECT * FROM rent_burden LIMIT 5"
df_rent = pd.DataFrame(pg_conn.execute_query(query))
df_rent

In [None]:
# Sample ZIP median income
query = "SELECT * FROM zip_median_income LIMIT 5"
df_income = pd.DataFrame(pg_conn.execute_query(query))
df_income

## Identify Relationships for Graph Conversion

In [None]:
# Print all foreign key relationships
print("Foreign Key Relationships:")
print("=" * 80)
for table_name, table in analyzed_tables.items():
    if table.foreign_keys:
        print(f"\n{table_name}:")
        for fk in table.foreign_keys:
            print(f"  {fk.column} -> {fk.referenced_table}.{fk.referenced_column}")

## Export Schema Analysis

In [None]:
# Export to JSON
output_path = "../data/schemas/noah_schema_analyzed.json"
analyzer.export_schema(output_path)
print(f"Schema exported to: {output_path}")

## Close Connection

In [None]:
pg_conn.close()
print("Connection closed")