# Creating database on SQLite 3
this script creates a SQLite database and loads csv files

### 1. Connect to database

In [1]:
# Libraries
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# Connect to database
db = create_engine( 'sqlite:///db_olist_sqlite' )
conn = db.connect()

### 2. Loading dataset

In [7]:
# Dataset customers
### Reading csv
df_customers = pd.read_csv( 'data/olist_customers_dataset.csv' )
### creating schema
schema = '''
    CREATE TABLE customers(
        customer_id                 TEXT,
        customer_unique_id          TEXT,
        customer_zip_code_prefix    INTEGER,
        customer_city               TEXT,
        customer_state  #            TEXT
)
'''
conn.execute( schema )
### inserting data
df_customers.to_sql( 'customers', con=conn, if_exists='append', index=False )

In [18]:
# Dataset geolocation
### Reading csv
df_geolocation = pd.read_csv( 'data/olist_geolocation_dataset.csv' )
### Schema
schema = '''
    CREATE TABLE geolocation(
        geolocation_zip_code_prefix    INTEGER,
        geolocation_lat                REAL,
        geolocation_lng                REAL,
        geolocation_city               TEXT,
        geolocation_state              TEXT
    
)
'''
conn.execute( schema )
### inserting data
df_geolocation.to_sql( 'geolocation', conn, if_exists='append', index=False )

In [22]:
# Dataset order_items
### Reading csv
df_order_items = pd.read_csv( 'data/olist_order_items_dataset.csv' )
### Schema
schema = '''
    CREATE TABLE order_items(
        order_id               TEXT,
        order_item_id          INTEGER,
        product_id             TEXT,
        seller_id              TEXT,
        shipping_limit_date    TEXT,
        price                  REAL,
        freight_value          REAL
    
)
'''
conn.execute( schema )
### inserting data
df_order_items.to_sql( 'order_items', conn, if_exists='append', index=False )

In [25]:
# Dataset order_payments
### Reading csv
df_order_payments = pd.read_csv( 'data/olist_order_payments_dataset.csv' )
### Schema
schema = '''
    CREATE TABLE order_payments(
        order_id                TEXT,
        payment_sequential      INTEGER,
        payment_type            TEXT,
        payment_installments    INTEGER,
        payment_value           REAL
            
)
'''
conn.execute( schema )
### inserting data
df_order_payments.to_sql( 'order_payments', conn, if_exists='append', index=False )

In [29]:
# Dataset order_reviews
### Reading csv
df_order_reviews = pd.read_csv( 'data/olist_order_reviews_dataset.csv' )
### Schema
schema = '''
    CREATE TABLE order_reviews(
        review_id                  TEXT,
        order_id                   TEXT,
        review_score               INTEGER,
        review_comment_title       TEXT,
        review_comment_message     TEXT,
        review_creation_date       TEXT,
        review_answer_timestamp    TEXT
)
'''
conn.execute( schema )
### inserting data
df_order_reviews.to_sql( 'order_reviews', conn, if_exists='append', index=False )

In [32]:
# Dataset orders
### Reading csv
df_orders = pd.read_csv( 'data/olist_orders_dataset.csv' )
### Schema
schema = '''
    CREATE TABLE orders(
        order_id                         TEXT,
        customer_id                      TEXT,
        order_status                     TEXT,
        order_purchase_timestamp         TEXT,
        order_approved_at                TEXT,
        order_delivered_carrier_date     TEXT,
        order_delivered_customer_date    TEXT,
        order_estimated_delivery_date    TEXT
)
'''
conn.execute( schema )
### inserting data
df_orders.to_sql( 'orders', conn, if_exists='append', index=False )

In [35]:
# Dataset products
### Reading csv
df_products = pd.read_csv( 'data/olist_products_dataset.csv' )
### Schema
schema = '''
    CREATE TABLE products(
        product_id                    TEXT,
        product_category_name         TEXT,
        product_name_lenght           REAL,
        product_description_lenght    REAL,
        product_photos_qty            REAL,
        product_weight_g              REAL,
        product_length_cm             REAL,
        product_height_cm             REAL,
        product_width_cm              REAL
)
'''
conn.execute( schema )
### inserting data
df_products.to_sql( 'products', conn, if_exists='append', index=False )

In [38]:
# Dataset sellers
### Reading csv
df_sellers = pd.read_csv( 'data/olist_sellers_dataset.csv' )
### Schema
schema = '''
    CREATE TABLE sellers(
        seller_id                 TEXT,
        seller_zip_code_prefix    INTEGER,
        seller_city               TEXT,
        seller_state              TEXT
)
'''
conn.execute( schema )
### inserting data
df_sellers.to_sql( 'sellers', conn, if_exists='append', index=False )

In [41]:
# Dataset product_category_name_translation
### Reading csv
df_product_category_name_translation = pd.read_csv( 'data/product_category_name_translation.csv' )
### Schema
schema = '''
    CREATE TABLE product_category_name_translation(
        product_category_name            TEXT,
        product_category_name_english    TEXT
)
'''
conn.execute( schema )
### inserting data
df_product_category_name_translation.to_sql( 'product_category_name_translation', conn, if_exists='append', index=False )

### 3. Check database

In [42]:
# Check database
query = '''
    SELECT *
    FROM sqlite_master
    WHERE type = 'table'
'''
table = pd.read_sql_query( query, conn )
table

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,customers,customers,2,CREATE TABLE customers(\n customer_id ...
1,table,geolocation,geolocation,2237,CREATE TABLE geolocation(\n geolocation...
2,table,order_items,order_items,2236,CREATE TABLE order_items(\n order_id ...
3,table,order_payments,order_payments,16870,CREATE TABLE order_payments(\n order_id...
4,table,order_reviews,order_reviews,18462,CREATE TABLE order_reviews(\n review_id...
5,table,orders,orders,22128,CREATE TABLE orders(\n order_id ...
6,table,products,products,26660,CREATE TABLE products(\n product_id ...
7,table,sellers,sellers,27237,CREATE TABLE sellers(\n seller_id ...
8,table,product_category_name_translation,product_category_name_translation,27282,CREATE TABLE product_category_name_translation...


In [43]:
conn.close()