# Imports

In [1]:
# manipulação de dados
import pandas as pd

# banco de dados
import sqlite3
from sqlalchemy import create_engine

# 1.0. Connect to Database

In [2]:
# criando e conectando ao banco de dados
db = create_engine('sqlite:///database/db_olist.sqlite', echo=False)
conn = db.connect()

# 2.0. Loading Dataset

## 2.1. Subindo Dados no SQL

In [3]:
# carregando dados (customer)
df_customer = pd.read_csv('data_olist/olist_customers_dataset.csv')

# criando query de schema
schema_customer = """
CREATE TABLE customer(
    customer_id                 TEXT,
    customer_unique_id          TEXT,
    customer_zip_code_prefix    INTEGER,
    customer_city               TEXT,
    customer_state              TEXT
)
"""

# criando schema
#conn.execute(schema_customer)

# inserindo os dados no schema
df_customer.to_sql('customer', con=conn, if_exists='append', index=False)

In [4]:
# carregando dados (geolocation)
df_geolocation = pd.read_csv('data_olist/olist_geolocation_dataset.csv')

# criando query de schema
schema_geolocation = """
CREATE TABLE geolocation(
    geolocation_zip_code_prefix      INTEGER,
    geolocation_lat                     REAL,
    geolocation_lng                     REAL,
    geolocation_city                    TEXT,
    geolocation_state                   TEXT
)
"""

# criando schema
#conn.execute(schema_geolocation)

# inserindo os dados no schema
df_geolocation.to_sql('geolocation', con=conn, if_exists='append', index=False)

In [5]:
# carregando dados (order items)
df_order_items = pd.read_csv('data_olist/olist_order_items_dataset.csv')

# criando query de schema
schema_order_items = """
CREATE TABLE order_items(
    order_id                TEXT,
    order_item_id           REAL,
    product_id              TEXT,
    seller_id               TEXT,
    shipping_limit_date     TEXT,
    price                   REAL,
    freight_value           REAL
)
"""

# criando schema
#conn.execute(schema_order_items)

# inserindo os dados no schema
df_order_items.to_sql('order_items', con=conn, if_exists='append', index=False)

In [6]:
# carregando dados (order payments)
df_order_payments = pd.read_csv('data_olist/olist_order_payments_dataset.csv')

# criando query de schema
schema_order_payments = """
CREATE TABLE order_payment(
    order_id                 TEXT,
    payment_sequential       INTEGER,
    payment_type             TEXT,
    payment_installments     INTEGER,
    payment_value            REAL
)
"""

# criando schema
#conn.execute(schema_order_payments)

# inserindo os dados no schema
df_order_payments.to_sql('order_payment', con=conn, if_exists='append', index=False)

In [7]:
# carregando dados (order reviews)
df_order_reviews = pd.read_csv('data_olist/olist_order_reviews_dataset.csv')

# criando query de schema
schema_order_reviews = """
CREATE TABLE order_reviews(
    review_id                  TEXT,
    order_id                   TEXT,
    review_score               INTEGER,
    review_comment_title       TEXT,
    review_comment_message     TEXT,
    review_creation_date       TEXT,
    review_answer_timestamp    TEXT
)
"""

# criando schema
#conn.execute(schema_order_reviews)

# inserindo os dados no schema
df_order_reviews.to_sql('order_reviews', con=conn, if_exists='append', index=False)

In [8]:
# carregando dados (order reviews)
df_orders = pd.read_csv('data_olist/olist_orders_dataset.csv')

# criando query de schema
schema_orders = """
CREATE TABLE orders(
    order_id                         TEXT,
    customer_id                      TEXT,
    order_status                     TEXT,
    order_purchase_timestamp         TEXT,
    order_approved_at                TEXT,
    order_delivered_carrier_date     TEXT,
    order_delivered_customer_date    TEXT,
    order_estimated_delivery_date    TEXT
)
"""

# criando schema
#conn.execute(schema_orders)

# inserindo os dados no schema
df_orders.to_sql('orders', con=conn, if_exists='append', index=False)

In [9]:
# carregando dados (order reviews)
df_products = pd.read_csv('data_olist/olist_products_dataset.csv')

# criando query de schema
schema_products = """
CREATE TABLE products(
    product_id                     TEXT,
    product_category_name          TEXT,
    product_name_lenght            REAL,
    product_description_lenght     REAL,
    product_photos_qty             REAL,
    product_weight_g               REAL,
    product_length_cm              REAL,
    product_height_cm              REAL,
    product_width_cm               REAL
)
"""

# criando schema
#conn.execute(schema_products)

# inserindo os dados no schema
df_products.to_sql('products', con=conn, if_exists='append', index=False)

In [10]:
# carregando dados (sellers)
df_sellers = pd.read_csv('data_olist/olist_sellers_dataset.csv')

# criando query de schema
schema_sellers = """
CREATE TABLE sellers(
    seller_id                 TEXT,
    seller_zip_code_prefix    INTEGER,
    seller_city               INTEGER,
    seller_state              TEXT
)
"""

# criando schema
#conn.execute(schema_sellers)

# inserindo os dados no schema
df_sellers.to_sql('sellers', con=conn, if_exists='append', index=False)

In [11]:
# carregando dados (sellers)
df_product_category = pd.read_csv('data_olist/product_category_name_translation.csv')

# criando query de schema
schema_product_category = """
CREATE TABLE product_category(
    product_category_name            TEXT,
    product_category_name_english    TEXT
)
"""

# criando schema
#conn.execute(schema_product_category)

# inserindo os dados no schema
df_product_category.to_sql('product_category', con=conn, if_exists='append', index=False)