# Settings

In [1]:
import os

if os.getcwd().endswith("notebooks"):
    os.chdir("..")
print(os.getcwd())

/Users/cmcoutosilva/Projects/github/nl2sql-agent


In [2]:
import pandas as pd
from sqlalchemy import create_engine, text, inspect

# Dattabase connection

In [3]:
# Set URI - TODO: get URI from config and/or .env files
uri = "postgresql+psycopg://postgres:postgres@localhost:5432/olist_ecommerce"

# Set up engine
engine = create_engine(uri)

# Test connection
with engine.connect() as conn:
    result = conn.execute(text("SELECT 1"))
    print(result.fetchall())

[(1,)]


## Inspect Database

In [4]:
# Set up engine and inspector
inspector = inspect(engine)

for schema in ("ecommerce", "marketing"):
    print(f"Tables in {schema}:")
    print(inspector.get_table_names(schema=schema))
    print()

Tables in ecommerce:
['geolocation', 'product_category_name_translations', 'customers', 'orders', 'order_items', 'products', 'sellers', 'order_payments', 'order_reviews']

Tables in marketing:
['marketing_qualified_leads', 'closed_deals']



In [5]:
## -- Inspect ecommerce.orders table -- ##

target_table = "geolocation"
target_schema = "ecommerce"

# Show table columns
columns = inspector.get_columns(table_name=target_table, schema=target_schema)
df_columns = pd.DataFrame(columns)
print(f"Columns in table {target_table}:")
print(df_columns.to_markdown())
print()

# Show table primary keys
primary_keys = inspector.get_pk_constraint(
    table_name=target_table, schema=target_schema
)
print(f"Primary keys in table {target_table}:")
print(primary_keys)
print()

# Show table foreign keys
foreign_keys = inspector.get_foreign_keys(table_name=target_table, schema=target_schema)
print(f"Foreign keys in table {target_table}:")
print(foreign_keys)

Columns in table geolocation:
|    | name                        | type             | nullable   | default                                                       | autoincrement   | comment                    |
|---:|:----------------------------|:-----------------|:-----------|:--------------------------------------------------------------|:----------------|:---------------------------|
|  0 | geolocation_id              | INTEGER          | False      | nextval('ecommerce.geolocation_geolocation_id_seq'::regclass) | True            |                            |
|  1 | geolocation_zip_code_prefix | TEXT             | True       |                                                               | False           | first 5 digits of zip code |
|  2 | geolocation_lat             | DOUBLE PRECISION | True       |                                                               | False           | latitude                   |
|  3 | geolocation_lng             | DOUBLE PRECISION | True       |  

In [6]:
## -- Inspect ecommerce.order_payments table -- ##

target_table = "order_payments"
target_schema = "ecommerce"

# Show table columns
columns = inspector.get_columns(table_name=target_table, schema=target_schema)
df_columns = pd.DataFrame(columns)
print(f"Columns in table {target_table}:")
print(df_columns.to_markdown())
print()

# Show table primary keys
primary_keys = inspector.get_pk_constraint(
    table_name=target_table, schema=target_schema
)
print(f"Primary keys in table {target_table}:")
print(primary_keys)
print()

# Show table foreign keys
foreign_keys = inspector.get_foreign_keys(table_name=target_table, schema=target_schema)
print(f"Foreign keys in table {target_table}:")
print(foreign_keys)

Columns in table order_payments:
|    | name                 | type             | nullable   | default   | autoincrement   | comment                                                                                                                               |
|---:|:---------------------|:-----------------|:-----------|:----------|:----------------|:--------------------------------------------------------------------------------------------------------------------------------------|
|  0 | order_id             | TEXT             | False      |           | False           | unique identifier of an order.                                                                                                        |
|  1 | payment_sequential   | INTEGER          | False      |           | False           | a customer may pay an order with more than one payment method. If he does so, a sequence will be created to accommodate all payments. |
|  2 | payment_type         | TEXT             | True  