# Zillow Data - Modelo Dimensional (Star Schema)
Este notebook cria as tabelas de dimensão e fato para análise de indicadores do mercado imobiliário Zillow.

In [21]:
import duckdb
import pandas as pd
from datetime import datetime

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)

## Conectar ao DuckDB e Carregar Dados de Origem

In [22]:
# Conectar ao DuckDB (pode ser persistente ou em memória)
# Para persistência: conn = duckdb.connect('zillow_dwh.duckdb')
conn = duckdb.connect('zillow_dwh.duckdb')


conn.execute('DROP SCHEMA IF EXISTS zillow_dwh CASCADE;')
# Carregar os CSVs originais
csv_paths = [
    r"C:\Users\JOHNV\Documents\CS\ED\ZILLOW_DATA_962c837a6ccefddddf190101e0bafdaf\ZILLOW_DATA_962c837a6ccefddddf190101e0bafdaf.csv",
    r"C:\Users\JOHNV\Documents\CS\ED\ZILLOW_INDICATORS_e93833a53d6c88463446a364cda611cc\ZILLOW_INDICATORS_e93833a53d6c88463446a364cda611cc.csv",
    r"C:\Users\JOHNV\Documents\CS\ED\ZILLOW_REGIONS_1a51d107db038a83ac171d604cb48d5b\ZILLOW_REGIONS_1a51d107db038a83ac171d604cb48d5b.csv"
]

names = ["ZILLOW_DATA", "ZILLOW_INDICATORS", "ZILLOW_REGIONS"]

# # Criar tabelas staging (temporárias) a partir dos CSVs
# for i, csv_path in enumerate(csv_paths):
#     conn.execute(f"DROP TABLE IF EXISTS {names[i]}")
#     conn.execute(f"CREATE TABLE {names[i]} AS SELECT * FROM read_csv_auto('{csv_path.replace(chr(92), chr(92)*2)}')")
#     print(f"✓ Loaded {names[i]}")

# print("\n✓ Dados de origem carregados!")

## Criar Tabelas de Dimensão e Fato

### 1. Dimensão Tempo (dim_time)

In [23]:
# Criar tabela de dimensão tempo
conn.execute("""
DROP TABLE IF EXISTS dim_time
""")
conn.execute("""
DROP TABLE IF EXISTS dim_year
""")

conn.execute("""
CREATE TABLE dim_year (
    year_key INTEGER PRIMARY KEY
)
""")

conn.execute("""
CREATE TABLE dim_time (
    date_key DATE PRIMARY KEY,
    day_name VARCHAR,
    week_number INTEGER,
    month_number INTEGER,
    month_name VARCHAR,
    year_key INTEGER,
    FOREIGN KEY (year_key) REFERENCES dim_year(year_key)
)
""")


print("✓ Tabela dim_year criada!")

✓ Tabela dim_year criada!


### 2. Dimensão Região (dim_region)

In [None]:
# ============================================================================
# CRIAR SUBDIMENSÕES DE REGIÃO (MODELO HIERÁRQUICO)
# ============================================================================

# 6. Dimensão COUNTRY (nível mais alto)
conn.execute("DROP TABLE IF EXISTS dim_country")
conn.execute("""
CREATE TABLE dim_country (
    country_key INTEGER PRIMARY KEY,
    country_name VARCHAR,
    state_code VARCHAR
)
""")
print("✓ dim_country criada")

# 6. Dimensão STATE (nível mais alto)
conn.execute("DROP TABLE IF EXISTS dim_state")
conn.execute("""
CREATE TABLE dim_state (
    state_key INTEGER PRIMARY KEY,
    state_name VARCHAR,
    state_code VARCHAR
    country_key INTEGER,
    FOREIGN KEY (country_key) REFERENCES dim_country(country_key)
)
""")
print("✓ dim_state criada")


# 5. Dimensão METRO AREA
conn.execute("DROP TABLE IF EXISTS dim_metro")
conn.execute("""
CREATE TABLE dim_metro (
    metro_key INTEGER PRIMARY KEY,
    metro_name VARCHAR,
    state_key INTEGER,
    FOREIGN KEY (state_key) REFERENCES dim_state(state_key)
)
""")
print("✓ dim_metro criada")

# 4. Dimensão COUNTY
conn.execute("DROP TABLE IF EXISTS dim_county")
conn.execute("""
CREATE TABLE dim_county (
    county_key INTEGER PRIMARY KEY,
    county_name VARCHAR,
    metro_key INTEGER,
    FOREIGN KEY (metro_key) REFERENCES dim_metro(metro_key)
)
""")
print("✓ dim_county criada")


# 3. Dimensão CITY
conn.execute("DROP TABLE IF EXISTS dim_city")
conn.execute("""
CREATE TABLE dim_city (
    city_key INTEGER PRIMARY KEY,
    city_name VARCHAR,
    county_key INTEGER,
    FOREIGN KEY (county_key) REFERENCES dim_county(county_key)
)
""")
print("✓ dim_city criada")

# 2. Dimensão NEIGHBORHOOD (mesmo nível de ZIP)
conn.execute("DROP TABLE IF EXISTS dim_neighborhood")
conn.execute("""
CREATE TABLE dim_neighborhood (
    neighborhood_key INTEGER PRIMARY KEY,
    neighborhood_name VARCHAR,
    city_key INTEGER,
    FOREIGN KEY (city_key) REFERENCES dim_city(city_key)
)
""")
print("✓ dim_neighborhood criada")
# 1. Dimensão ZIP (nível mais granular)
conn.execute("DROP TABLE IF EXISTS dim_zip")
conn.execute("""
CREATE TABLE dim_zip (
    zip_key INTEGER PRIMARY KEY,
    city_key INTEGER,
    FOREIGN KEY (city_key) REFERENCES dim_city(city_key)
)
""")
print("✓ dim_zip criada")
# 7. Dimensão REGION (consolidada - aponta para a dimensão específica)
conn.execute("DROP TABLE IF EXISTS dim_region")
conn.execute("""
CREATE TABLE dim_region (
    region_key INTEGER PRIMARY KEY,
    region_id VARCHAR UNIQUE,
    region_type VARCHAR,
    zip_key INTEGER,
    neighborhood_key INTEGER,
    city_key INTEGER,
    county_key INTEGER,
    metro_key INTEGER,
    state_key INTEGER,
    FOREIGN KEY (zip_key) REFERENCES dim_zip(zip_key),
    FOREIGN KEY (neighborhood_key) REFERENCES dim_neighborhood(neighborhood_key),
    FOREIGN KEY (city_key) REFERENCES dim_city(city_key),
    FOREIGN KEY (county_key) REFERENCES dim_county(county_key),
    FOREIGN KEY (metro_key) REFERENCES dim_metro(metro_key),
    FOREIGN KEY (state_key) REFERENCES dim_state(state_key), 
    FOREIGN KEY (country_key) REFERENCES dim_country(country_key)
)
""")
print("✓ dim_region criada (tabela principal consolidada)")

print("\n" + "="*80)
print("✓ Todas as subdimensões de região foram criadas com sucesso!")
print("="*80)

✓ dim_state criada
✓ dim_metro criada
✓ dim_county criada
✓ dim_city criada
✓ dim_neighborhood criada
✓ dim_zip criada
✓ dim_region criada (tabela principal consolidada)

✓ Todas as subdimensões de região foram criadas com sucesso!


### 3. Dimensão Indicador (dim_indicator)

In [25]:
# Criar tabela de dimensão indicador
conn.execute("""
DROP TABLE IF EXISTS dim_indicator
""")

conn.execute("""
CREATE TABLE dim_realestate_indicator (
    realestate_indicator_key VARCHAR PRIMARY KEY,
    indicator_name VARCHAR,
    indicator_description VARCHAR,
    region_key INTEGER,
    FOREIGN KEY (region_key) REFERENCES dim_region(region_key)
)
""")

print("✓ Tabela dim_indicator criada!")

✓ Tabela dim_indicator criada!


### 4. Dim Value

In [26]:
# Criar nova tabela
conn.execute("""
DROP TABLE IF EXISTS dim_value
""")

conn.execute("""
CREATE TABLE dim_value (
    value_key INTEGER PRIMARY KEY,
    value DECIMAL,
    unit ENUM('USD', 'PERCENTAGE', 'DAYS')
)
""")

print("✓ Tabela dim_value criada!")

✓ Tabela dim_value criada!


### 5. Dim asset

In [27]:
# Criar tabela 7
conn.execute("""
DROP TABLE IF EXISTS dim_asset
""")

conn.execute("""
CREATE TABLE dim_asset (
    asset_key VARCHAR PRIMARY KEY,
    type ENUM('REAL_ESTATE', 'CRYPTOCURRENCY', 'STOCK', 'ORE')
)
""")

print("✓ Tabela dim_asset criada!")

✓ Tabela dim_asset criada!


### 6. Dim Socioeconomical Indicator

In [28]:
# Criar nova tabela fato
conn.execute("""
DROP TABLE IF EXISTS dim_socioeconomical_indicator
""")

conn.execute("""
CREATE TABLE dim_socioeconomical_indicator (
    socioeconomical_indicator_key VARCHAR PRIMARY KEY,
    name VARCHAR,
    description TEXT,
)
""")

print("✓ Tabela dim_socioeconomical_indicator criada!")

✓ Tabela dim_socioeconomical_indicator criada!


### 7. Dim CryptoStock Value

In [29]:
conn.execute("""
DROP TABLE IF EXISTS dim_crypto_price
""")

conn.execute("""
CREATE TABLE dim_crypto_price (
    crypto_price_key INTEGER PRIMARY KEY,
    mid DECIMAL,
    last DECIMAL,
    bid DECIMAL,
    ask DECIMAL,
)
""")

print("✓ Tabela dim_crypto_price criada!")


conn.execute("""
DROP TABLE IF EXISTS dim_stock_price
""")

conn.execute("""
CREATE TABLE dim_stock_price (
    stock_price_key INTEGER PRIMARY KEY,
    open DECIMAL,
)
""")

print("✓ Tabela dim_stock_price criada!")
# Criar tabela 8
conn.execute("""
DROP TABLE IF EXISTS dim_cryptostock_value
""")

conn.execute("""
CREATE TABLE dim_cryptostock_value (
    cryptostock_value_key INTEGER PRIMARY KEY,
    stock_price_key INTEGER,
    crypto_price_key INTEGER,
    high DECIMAL,
    low DECIMAL,
    volume DECIMAL,
    FOREIGN KEY (stock_price_key) REFERENCES dim_stock_price(stock_price_key),
    FOREIGN KEY (crypto_price_key) REFERENCES dim_crypto_price(crypto_price_key)
)
""")

print("✓ Tabela dim_cryptostock_value criada!")



✓ Tabela dim_crypto_price criada!
✓ Tabela dim_stock_price criada!
✓ Tabela dim_cryptostock_value criada!


### 8. Tabela Fato (fact_value)

In [30]:
# Criar tabela fato
conn.execute("""
DROP TABLE IF EXISTS fact_value
""")

conn.execute("""
CREATE TABLE fact_value (
    value_key INTEGER,
    asset_key VARCHAR,
    year_key INTEGER,
    socioeconomical_indicator_key VARCHAR,
    realestate_indicator_key VARCHAR,
    cryptostock_value_key INTEGER,
    FOREIGN KEY (value_key) REFERENCES dim_value(value_key),
    FOREIGN KEY (asset_key) REFERENCES dim_asset(asset_key),
    FOREIGN KEY (year_key) REFERENCES dim_year(year_key),
    FOREIGN KEY (socioeconomical_indicator_key) REFERENCES dim_socioeconomical_indicator(socioeconomical_indicator_key),
    FOREIGN KEY (realestate_indicator_key) REFERENCES dim_realestate_indicator(realestate_indicator_key),
    FOREIGN KEY (cryptostock_value_key) REFERENCES dim_cryptostock_value(cryptostock_value_key)
)
""")

print("✓ Tabela fact_value criada!")

✓ Tabela fact_value criada!


## Fechar Conexão

In [31]:
# Fechar a conexão com o DuckDB
# (O banco de dados persiste no arquivo zillow_dwh.duckdb)
conn.close()
print("✓ Conexão fechada. Data Warehouse pronto para uso!")

✓ Conexão fechada. Data Warehouse pronto para uso!
