#### Creates Customer Data

In [None]:
from faker import Faker
import pandas as pd
import random

fake = Faker()
num_customers = 500
customers = [
    {
        'cust_id': cust_id,
        'name': fake.name() if not fake.name().startswith('P') else None,
        'email': fake.email() if not len(fake.email()) > 28 else None,
        'address': fake.address(),
        'phone': fake.phone_number()
    }
        for cust_id in range(1, num_customers + 1)
]
customers_df = pd.DataFrame(customers)
customers_df

#### Sums of Nulls for 'email' and 'name'

In [None]:
customer_check = int(customers_df['email'].isnull().sum())
print(customer_check)

In [None]:
name_check = int(customers_df['name'].isnull().sum())
name_check

#### Creates Product Data

In [None]:
product = ['grass', 'hay', 'corn', 'soybean', 'wheat', 'cabbage', 'alfalfa', 'peonies', 'hibiscus']
product_category = ['implements', 'seed', 'decorative', 'feed', 'produce']

products = [
    {
        'product_id': product_id,
        'product_name': product,
        'price': round(random.uniform(1, 100), 2),
        'product_category': random.choice(product_category)
    }
        for product_id, product in enumerate(product, start=1)]

products_df = pd.DataFrame(products)
products_df

In [None]:
import random
num_orders = 1000
store_id = [ 1, 2, 3, 4 ]
orders = [
    {
        'order_id': order_id,
        'cust_id' : random.choice(customers_df['cust_id'].tolist()),
        'product_id': random.choice(products_df['product_id'].tolist()),
        'order_date': fake.date() if not pd.to_datetime(fake.date()).year == 2002 else random.choice(['not_a_date', 20002]),
        'store_id' : random.choice(store_id),
        'quantity' : random.randint(1, 3)
    }
        for order_id in range(1, num_orders + 1)
]
print('rando ORDER list generated')

In [None]:
orders_df = pd.DataFrame(orders)
orders_df

In [None]:
count_bad_i = orders_df['order_date'].value_counts().get(20002, 0)
print(count_bad_i)
#

In [None]:
count_bad_s = orders_df['order_date'].value_counts().get('not_a_date', 0)
print(count_bad_s)

### Sanity Check

In [None]:
assert orders_df['cust_id'].isin(customers_df['cust_id']).all()
assert orders_df['product_id'].isin(products_df['product_id']).all()

In [None]:
# orders_df
# customers_df
# products_df

#### In order to check for bad data in orders

In [None]:
check_o_types = orders_df.dtypes
print(check_o_types)

In [None]:
orders_df['order_date'] = pd.to_datetime(orders_df['order_date'], errors='coerce')
bd_mask = orders_df['order_date'].isna()
bad_dates = orders_df[bd_mask]['order_date']
print(bad_dates.value_counts)

#### In order to check bad data in customers

In [None]:
check_c_types = customers_df.dtypes
print(check_c_types)

In [None]:
customer_mask = customers_df['name'].isna()
bad_names = customers_df[customer_mask]['name']
print(bad_names.value_counts)

In [None]:
email_mask = customers_df['email'].isna()
bad_emails = customers_df[email_mask]['email']
print(bad_emails.value_counts)

#### In order to check bad data in products

In [None]:
check_p_types = products_df.dtypes
print(check_p_types)

In [None]:
product_mask = products_df['product_name'].isna()
bad_p_name = products_df[product_mask]['product_name']
print(bad_p_name.value_counts)

### Maps tables to DB

In [63]:
from sqlalchemy import create_engine, text

engine = create_engine('postgresql://admin:admin@localhost:5433/postgres')

orders_table = """create table orders (
	order_id int generated always as identity primary key,
	cust_id int,
	order_date date,
	product_id int,
	store_id int,
	quantity int
);"""

with engine.connect() as conn :
    conn.execute(text(orders_table))
    conn.commit()

In [64]:
products_table = """create table products (
	product_id int generated always as identity primary key,
	product_name varchar,
	price float,
	product_cat varchar
);"""
with engine.connect() as conn :
    conn.execute(text(products_table))
    conn.commit()

In [65]:
customers_table = """create table customers (
	customer_id int generated always as identity primary key,
	name varchar,
	email varchar,
	address varchar,
	phone varchar
);"""
with engine.connect() as conn:
    conn.execute(text(customers_table))
    conn.commit()

### Loads data to DB

In [66]:
# Customers Data
customers_df.to_sql('customers', engine, if_exists='replace', index=False)

500

In [67]:
# Orders Data
orders_df.to_sql('orders', engine, if_exists='replace', index=False)

1000

In [68]:
# Products Data
products_df.to_sql('products', engine, if_exists='replace', index=False)

9