#### Load the data to dataframe

In [2]:
import pandas as pd
import numpy as np

In [8]:
aisles_df = pd.read_csv('aisles.csv')
aisles_df

Unnamed: 0,aisle_id,aisle
0,1,prepared soups salads
1,2,specialty cheeses
2,3,energy granola bars
3,4,instant foods
4,5,marinades meat preparation
...,...,...
129,130,hot cereal pancake mixes
130,131,dry pasta
131,132,beauty
132,133,muscles joints pain relief


In [11]:
orders_df = pd.read_csv("orders.csv").sample(10000)
orders_df.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
823725,2963744,49540,prior,5,3,19,2.0
305366,1404019,18458,prior,7,4,15,30.0
3053501,1517505,184171,prior,11,3,17,12.0
21930,2690919,1372,prior,13,5,15,5.0
855282,1984669,51394,prior,13,3,18,2.0


In [14]:
orders_df.shape

(10000, 7)

In [12]:
order_products_df = pd.read_csv("order_products.csv").sample(10000)
order_products_df.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered
147666,15560,46222,8,0
556936,58923,46979,8,1
29501507,3111431,14996,2,1
18768027,1979480,2050,20,0
16115346,1700267,43662,10,0


In [13]:
order_products_df.shape

(10000, 4)

In [15]:
products_df = pd.read_csv("products.csv")
products_df.head()

Unnamed: 0,product_id,product_name,aisle_id,department_id
0,1,Chocolate Sandwich Cookies,61,19
1,2,All-Seasons Salt,104,13
2,3,Robust Golden Unsweetened Oolong Tea,94,7
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1
4,5,Green Chile Anytime Sauce,5,13


In [16]:
products_df.shape

(49688, 4)

In [17]:
departments_df = pd.read_csv("departments.csv")
departments_df

Unnamed: 0,department_id,department
0,1,frozen
1,2,other
2,3,bakery
3,4,produce
4,5,alcohol
5,6,international
6,7,beverages
7,8,pets
8,9,dry goods pasta
9,10,bulk


In [18]:
departments_df.shape

(21, 2)

#### Create connection to the postgres database

In [19]:
import psycopg2
from sqlalchemy import create_engine

In [34]:
# Create the connection

try:
    conn = psycopg2.connect(dbname = "ecom_db", user = "postgres", password = "jaykayboss", port = "5432")
except:
    print("connection was not successful")

In [35]:
cur = conn.cursor()

In [36]:
engine = create_engine("postgresql+psycopg2://postgres:jaykayboss@localhost/ecom_db")

#### Create the tables using the connection

In [37]:
cur.execute("""
    CREATE TABLE aisles(
        aisle_id INTEGER PRIMARY KEY,
        aisle VARCHAR(255)
        )
""")

In [38]:
cur.execute("""
    CREATE TABLE departments(
        department_id INTEGER PRIMARY KEY,
        department VARCHAR(255)
        )
""")

In [39]:
cur.execute("""
    CREATE TABLE products(
        product_id INTEGER PRIMARY KEY,
        product_name VARCHAR(255),
        aisle_id INTEGER,
        department_id INTEGER,
        FOREIGN KEY(aisle_id) REFERENCES aisles(aisle_id),
        FOREIGN KEY(department_id) REFERENCES departments(department_id)
        )
""")

In [40]:
cur.execute("""
    CREATE TABLE orders(
        order_id INTEGER PRIMARY KEY,
        user_id INTEGER,
        order_number INTEGER,
        order_dow INTEGER,
        order_hour_of_day INTEGER,
        days_since_prior_order INTEGER
        )
""")

In [50]:
cur.execute("""
    CREATE TABLE order_products(
        order_id INTEGER,
        product_id INTEGER,
        add_to_cart_order INTEGER,
        reordered INTEGER,
        PRIMARY KEY(order_id, product_id),
        FOREIGN KEY(product_id) REFERENCES products(product_id)
        )
""")

In [51]:
conn.commit()

#### Let us drop some unnecessary columns from orders table


In [43]:
orders_df.drop("eval_set", inplace = True, axis = 1)

#### copy all data into table

In [44]:
aisles_df.to_sql("aisles", con = engine, if_exists = "append", index = False)

134

In [45]:
departments_df.to_sql("departments", con = engine, if_exists = "append", index = False)

21

In [46]:
products_df.to_sql("products", con = engine, if_exists = "append", index = False)

688

In [48]:
orders_df.to_sql("orders", con = engine, if_exists = "append", index = False)

1000

In [52]:
order_products_df.to_sql("order_products", con = engine, if_exists = "append", index = False)

1000