#### Creates Customer Data

In [64]:
from faker import Faker
import pandas as pd
import random

fake = Faker()
num_customers = 500
customers = [
    {
        'cust_id': cust_id,
        'name': fake.name() if not fake.name().startswith('P') else None,
        'email': fake.email() if not len(fake.email()) > 28 else None
    }
        for cust_id in range(1, num_customers + 1)
]

customers_df = pd.DataFrame(customers)
customers_df

Unnamed: 0,cust_id,name,email
0,1,Rebecca Lee,zwilliams@example.net
1,2,Antonio Thomas,monica54@example.net
2,3,Sarah James,angela64@example.com
3,4,Jessica George,bryantandrew@example.net
4,5,Jessica Patel,natalie24@example.net
...,...,...,...
495,496,Kaitlin Cole,franciscohall@example.org
496,497,Michele Baldwin,gstokes@example.net
497,498,Tiffany Stokes,phelpscrystal@example.com
498,499,Christine Conrad,kcarpenter@example.org


#### Sums of Nulls for 'email' and 'name'

In [65]:
quick_view = customers_df['email'].isnull().sum()
quick_view

np.int64(8)

In [66]:
name_view = customers_df['name'].isnull().sum()
name_view

np.int64(10)

#### Creates Product Data

In [67]:
product = ['grass', 'hay', 'corn', 'soybean', 'wheat', 'cabbage', 'alfalfa', 'peonies', 'hibiscus']
product_category = ['implements', 'seed', 'decorative', 'feed', 'produce']

products = [
    {
        'product_id': product_id,
        'product_name': product,
        'price': round(random.uniform(1, 100), 2),
        'product_category': random.choice(product_category)
    }
        for product_id, product in enumerate(product, start=1)]

products_df = pd.DataFrame(products)
products_df

Unnamed: 0,product_id,product_name,price,product_category
0,1,grass,60.57,decorative
1,2,hay,1.79,seed
2,3,corn,71.31,seed
3,4,soybean,96.63,decorative
4,5,wheat,81.56,seed
5,6,cabbage,67.27,implements
6,7,alfalfa,13.5,seed
7,8,peonies,91.5,seed
8,9,hibiscus,64.21,implements


In [68]:
num_orders = 1000
orders = [
    {
        'order_id': order_id,
        'cust_id' : random.choice(customers_df['cust_id'].tolist()),
        'product_id': random.choice(products_df['product_id'].tolist()),
        'order_date': fake.date()
    }
        for order_id in range(1, num_orders + 1)
]
print('rando ORDER list generated')

rando ORDER list generated


In [69]:
orders_df = pd.DataFrame(orders)
orders_df

Unnamed: 0,order_id,cust_id,product_id,order_date
0,1,208,9,1984-08-31
1,2,20,2,1982-04-14
2,3,409,4,2007-01-27
3,4,78,5,1982-03-04
4,5,94,4,2024-06-29
...,...,...,...,...
995,996,52,4,1977-05-28
996,997,449,5,2017-08-07
997,998,478,9,1995-05-06
998,999,118,4,1985-02-17


### Sanity Check

In [70]:
assert orders_df['cust_id'].isin(customers_df['cust_id']).all()
assert orders_df['product_id'].isin(products_df['product_id']).all()

### Maps tables to DB

In [71]:
from sqlalchemy import create_engine, text

engine = create_engine('postgresql://admin:admin@localhost:5433/postgres')

orders_table = """create table orders (
	order_id int generated always as identity primary key,
	cust_id int,
	order_date date,
	product_id int
);"""

with engine.connect() as conn :
    conn.execute(text(orders_table))
    conn.commit()

In [72]:
products_table = """create table products (
	product_id int generated always as identity primary key,
	product_name varchar,
	price float,
	product_cat varchar
);"""
with engine.connect() as conn :
    conn.execute(text(products_table))
    conn.commit()

In [73]:
customers_table = """create table customers (
	customer_id int generated always as identity primary key,
	name varchar,
	email varchar
);"""
with engine.connect() as conn:
    conn.execute(text(customers_table))
    conn.commit()

### Loads data to DB

In [74]:
# Customers Data
customers_df.to_sql('customers', engine, if_exists='replace', index=False)

500

In [75]:
# Orders Data
orders_df.to_sql('orders', engine, if_exists='replace', index=False)

1000

In [76]:
# Products Data
products_df.to_sql('products', engine, if_exists='replace', index=False)

9