## Load All Dataset

In [1]:
import pandas as pd
from sqlalchemy import create_engine
from CustomLib.Config import postgres_config

In [2]:
def LoadAllDataset():
    dataset_folderpath = "D:/00 Project/00 My Project/Dataset/Brazilian_Olist/"
    df_order_items = pd.read_csv(dataset_folderpath + "1_olist_order_items_dataset.csv")
    df_order_payments = pd.read_csv(dataset_folderpath + "2_olist_order_payments_dataset.csv")
    df_orders = pd.read_csv(dataset_folderpath + "3_olist_orders_dataset.csv")
    df_products = pd.read_csv(dataset_folderpath + "4_olist_products_dataset.csv")
    df_sellers = pd.read_csv(dataset_folderpath + "5_olist_sellers_dataset.csv")
    return df_order_items, df_order_payments, df_orders, df_products, df_sellers

df_order_items, df_order_payments, df_orders, df_products, df_sellers = LoadAllDataset()

In [3]:
print(df_order_items.dtypes)
print("Length: ", len(df_order_items.index))

order_id                object
order_item_id            int64
product_id              object
seller_id               object
shipping_limit_date     object
price                  float64
freight_value          float64
dtype: object
Length:  112650


In [4]:
print(df_order_payments.dtypes)
print("Length: ", len(df_order_payments.index))

order_id                 object
payment_sequential        int64
payment_type             object
payment_installments      int64
payment_value           float64
dtype: object
Length:  103886


In [5]:
print(df_orders.dtypes)
print("Length: ", len(df_orders.index))

order_id                         object
customer_id                      object
order_status                     object
order_purchase_timestamp         object
order_approved_at                object
order_delivered_carrier_date     object
order_delivered_customer_date    object
order_estimated_delivery_date    object
dtype: object
Length:  99441


In [6]:
print(df_products.dtypes)
print("Length: ", len(df_products.index))

product_id                     object
product_category_name          object
product_name_lenght           float64
product_description_lenght    float64
product_photos_qty            float64
product_weight_g              float64
product_length_cm             float64
product_height_cm             float64
product_width_cm              float64
dtype: object
Length:  32951


In [7]:
print(df_sellers.dtypes)
print("Length: ", len(df_sellers.index))

seller_id                 object
seller_zip_code_prefix     int64
seller_city               object
seller_state              object
dtype: object
Length:  3095


## Insert Data into PostgreSQL

In [36]:
def add_data_sqlalchemy(df, table_name):
    conn_string = f'postgresql://{postgres_config["username"]}:{postgres_config["password"]}@{postgres_config["hostname"]}:{postgres_config["port"]}/{postgres_config["database"]}'
    engine = create_engine(conn_string)

    try:
        print(f'Start append table {postgres_config["schema"]}.{table_name}')
        df.to_sql(name=table_name, con=engine, schema=postgres_config["schema"], if_exists='replace', index=False)
        print(f'End append table {postgres_config["schema"]}.{table_name}')

        # with engine.connect() as conn:
        #     print(conn.execute(text(f'SELECT * FROM {postgres_config["schema"]}.{table_name}')).fetchall())
    except Exception as e:
        print("Error: ", str(e))

add_data_sqlalchemy(df_order_items, "order_items")
add_data_sqlalchemy(df_order_payments, "order_payments")
add_data_sqlalchemy(df_orders, "orders")
add_data_sqlalchemy(df_products, "products")
add_data_sqlalchemy(df_sellers, "sellers")

Start append table Olist_Data.order_items
End append table Olist_Data.order_items
Start append table Olist_Data.order_payments
End append table Olist_Data.order_payments
Start append table Olist_Data.orders
End append table Olist_Data.orders
Start append table Olist_Data.products
End append table Olist_Data.products
Start append table Olist_Data.sellers
End append table Olist_Data.sellers


## Read Data from PostgreSQL

In [8]:
conn_string = f'postgresql://{postgres_config["username"]}:{postgres_config["password"]}@{postgres_config["hostname"]}:{postgres_config["port"]}/{postgres_config["database"]}'

df_orders = pd.read_sql(f"SELECT * FROM \"Olist_Data\".orders;", conn_string)
df_order_payments = pd.read_sql(f"SELECT * FROM \"Olist_Data\".order_payments;", conn_string)
df_order_items = pd.read_sql(f"SELECT * FROM \"Olist_Data\".order_items;", conn_string)
df_products = pd.read_sql(f"SELECT * FROM \"Olist_Data\".products;", conn_string)
df_sellers = pd.read_sql(f"SELECT * FROM \"Olist_Data\".sellers;", conn_string)

In [9]:
df_orders.head(5)

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18 00:00:00
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13 00:00:00
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04 00:00:00
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15 00:00:00
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26 00:00:00


In [10]:
df_order_payments.head(5)

Unnamed: 0,order_id,payment_sequential,payment_type,payment_installments,payment_value
0,b81ef226f3fe1789b1e8b2acac839d17,1,credit_card,8,99.33
1,a9810da82917af2d9aefd1278f1dcfa0,1,credit_card,1,24.39
2,25e8ea4e93396b6fa0d3dd708e76c1bd,1,credit_card,1,65.71
3,ba78997921bbcdc1373bb41e913ab953,1,credit_card,8,107.78
4,42fdf880ba16b47b59251dd489d4441a,1,credit_card,2,128.45


In [11]:
df_order_items.head(5)

Unnamed: 0,order_id,order_item_id,product_id,seller_id,shipping_limit_date,price,freight_value
0,00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,48436dade18ac8b2bce089ec2a041202,2017-09-19 09:45:35,58.9,13.29
1,00018f77f2f0320c557190d7a144bdd3,1,e5f2d52b802189ee658865ca93d83a8f,dd7ddc04e1b6c2c614352b383efe2d36,2017-05-03 11:05:13,239.9,19.93
2,000229ec398224ef6ca0657da4fc703e,1,c777355d18b72b67abbeef9df44fd0fd,5b51032eddd242adc84c38acab88f23d,2018-01-18 14:48:30,199.0,17.87
3,00024acbcdf0a6daa1e931b038114c75,1,7634da152a4610f1595efa32f14722fc,9d7a1d34a5052409006425275ba1c2b4,2018-08-15 10:10:18,12.99,12.79
4,00042b26cf59d7ce69dfabb4e55b4fd9,1,ac6c3623068f30de03045865e4e10089,df560393f3a51e74553ab94004ba5c87,2017-02-13 13:57:51,199.9,18.14


In [12]:
df_products.head(5)

Unnamed: 0,product_id,product_category_name,product_name_lenght,product_description_lenght,product_photos_qty,product_weight_g,product_length_cm,product_height_cm,product_width_cm
0,1e9e8ef04dbcff4541ed26657ea517e5,perfumaria,40.0,287.0,1.0,225.0,16.0,10.0,14.0
1,3aa071139cb16b67ca9e5dea641aaa2f,artes,44.0,276.0,1.0,1000.0,30.0,18.0,20.0
2,96bd76ec8810374ed1b65e291975717f,esporte_lazer,46.0,250.0,1.0,154.0,18.0,9.0,15.0
3,cef67bcfe19066a932b7673e239eb23d,bebes,27.0,261.0,1.0,371.0,26.0,4.0,26.0
4,9dc1a7de274444849c219cff195d0b71,utilidades_domesticas,37.0,402.0,4.0,625.0,20.0,17.0,13.0


In [13]:
df_sellers.head(5)

Unnamed: 0,seller_id,seller_zip_code_prefix,seller_city,seller_state
0,3442f8959a84dea7ee197c632cb2df15,13023,campinas,SP
1,d1b65fc7debc3361ea86b5f14c68d2e2,13844,mogi guacu,SP
2,ce3ad9de960102d0677a81f5d0bb7b2d,20031,rio de janeiro,RJ
3,c0f3eea2e14555b6faeea3dd58c1b1c3,4195,sao paulo,SP
4,51a04a8a6bdcb23deccc82b0b80742cf,12914,braganca paulista,SP


## Data Manipulation