- generate_employees_data
  - employee_id
  - name
  - hire_date
  - salary
  - department_id
- generate_departments_data
  - department_id
  - department_name
- generate_sales_data
  - order_id #!NEW
  - sale_id
  - product_id
  - employee_id
  - sale_date
  - sale_amount
- generate_products_data
  - product_id
  - product_name
  - price
  - category_id #!NEW
- generate_users
  - user_id
  - name
  - email
  - age
  - signup_date
- generate_orders
  - order_id
  - user_id
  - order_date
  - amount
- generate_reviews
  - review_id
  - order_id #!NEW
  - user_id
  - product_id
  - review_text
  - rating
  - review_date
- generate_inventory
  - product_id
  - warehouse_id
  - quantity

In [7]:
import psycopg2
import random
import pandas as pd
from faker import Faker
from datetime import datetime, timedelta

fake = Faker()

In [4]:
# Generate employees table data
num_records=5000
data = []
for _ in range(num_records):
    data.append({
        "employee_id": fake.unique.random_int(min=1, max=10000),
        "name": fake.name(),
        "hire_date": fake.date_between(start_date='-10y', end_date='today'),
        "salary": round(random.uniform(30000, 120000), 2),
        "department_id": random.randint(1, 10),
    })
data

[{'employee_id': 1738,
  'name': 'Jerome Garcia',
  'hire_date': datetime.date(2020, 5, 16),
  'salary': 54652.77,
  'department_id': 2},
 {'employee_id': 3620,
  'name': 'Nancy Wright',
  'hire_date': datetime.date(2017, 2, 18),
  'salary': 82373.93,
  'department_id': 8},
 {'employee_id': 1697,
  'name': 'Kenneth Davis',
  'hire_date': datetime.date(2019, 5, 13),
  'salary': 48518.86,
  'department_id': 9},
 {'employee_id': 49,
  'name': 'Jean Young',
  'hire_date': datetime.date(2020, 7, 28),
  'salary': 101371.06,
  'department_id': 5},
 {'employee_id': 9389,
  'name': 'Christine Rivera',
  'hire_date': datetime.date(2023, 9, 23),
  'salary': 98691.36,
  'department_id': 5},
 {'employee_id': 375,
  'name': 'Jerry Marshall',
  'hire_date': datetime.date(2021, 7, 23),
  'salary': 45128.44,
  'department_id': 10},
 {'employee_id': 7801,
  'name': 'Christopher Allen',
  'hire_date': datetime.date(2016, 10, 22),
  'salary': 118273.56,
  'department_id': 8},
 {'employee_id': 4977,
  'nam

In [5]:
# Generate departments table data
num_departments=10
data = []
for dept_id in range(1, num_departments + 1):
    data.append({
        "department_id": dept_id,
        "department_name": fake.job(),
    })
data

[{'department_id': 1,
  'department_name': 'Outdoor activities/education manager'},
 {'department_id': 2, 'department_name': 'Physiotherapist'},
 {'department_id': 3, 'department_name': 'Early years teacher'},
 {'department_id': 4, 'department_name': 'Higher education careers adviser'},
 {'department_id': 5, 'department_name': 'Commercial/residential surveyor'},
 {'department_id': 6, 'department_name': 'Airline pilot'},
 {'department_id': 7, 'department_name': 'Corporate investment banker'},
 {'department_id': 8, 'department_name': 'Data scientist'},
 {'department_id': 9, 'department_name': 'Energy manager'},
 {'department_id': 10, 'department_name': 'Broadcast engineer'}]

In [15]:
# Generate sales data
num_records=1000000
data = []
for _ in range(num_records):
    data.append({
        "order_id": fake.unique.random_int(min=1, max=10000000),
        "sale_id": fake.unique.random_int(min=1, max=10000000),
        "product_id": random.randint(1, 1000),
        "employee_id": random.randint(1, 10000),
        "sale_date": fake.date_time_between(start_date='-2y', end_date='now'),
        "sale_amount": round(random.uniform(50, 5000), 2),
    })
data

[{'order_id': 7493340,
  'sale_id': 1797477,
  'product_id': 302,
  'employee_id': 9367,
  'sale_date': datetime.datetime(2024, 10, 10, 14, 45, 24, 130750),
  'sale_amount': 2047.55},
 {'order_id': 9939195,
  'sale_id': 3595160,
  'product_id': 670,
  'employee_id': 7572,
  'sale_date': datetime.datetime(2023, 3, 27, 4, 27, 57, 565193),
  'sale_amount': 1083.44},
 {'order_id': 5846707,
  'sale_id': 5720334,
  'product_id': 310,
  'employee_id': 9469,
  'sale_date': datetime.datetime(2024, 12, 31, 1, 14, 31, 602190),
  'sale_amount': 2865.19},
 {'order_id': 9231938,
  'sale_id': 819673,
  'product_id': 487,
  'employee_id': 4974,
  'sale_date': datetime.datetime(2024, 6, 30, 18, 9, 55, 816937),
  'sale_amount': 1481.52},
 {'order_id': 648060,
  'sale_id': 2209437,
  'product_id': 459,
  'employee_id': 2222,
  'sale_date': datetime.datetime(2023, 10, 26, 6, 35, 3, 472596),
  'sale_amount': 356.91},
 {'order_id': 1742690,
  'sale_id': 8329963,
  'product_id': 812,
  'employee_id': 5736,
 

In [16]:
# Generate products table data
num_records=1000
data = []
for product_id in range(1, num_records + 1):
    data.append({
        "product_id": product_id,
        "product_name": fake.word(),
        "price": round(random.uniform(10, 1000), 2),
        "category_id": random.randint(1, 10),
    })
data

[{'product_id': 1,
  'product_name': 'personal',
  'price': 839.14,
  'category_id': 6},
 {'product_id': 2, 'product_name': 'then', 'price': 417.01, 'category_id': 4},
 {'product_id': 3, 'product_name': 'time', 'price': 741.12, 'category_id': 5},
 {'product_id': 4,
  'product_name': 'commercial',
  'price': 753.66,
  'category_id': 10},
 {'product_id': 5, 'product_name': 'best', 'price': 582.92, 'category_id': 9},
 {'product_id': 6, 'product_name': 'policy', 'price': 20.53, 'category_id': 2},
 {'product_id': 7,
  'product_name': 'design',
  'price': 165.51,
  'category_id': 9},
 {'product_id': 8, 'product_name': 'goal', 'price': 111.69, 'category_id': 9},
 {'product_id': 9,
  'product_name': 'prepare',
  'price': 905.36,
  'category_id': 8},
 {'product_id': 10, 'product_name': 'nor', 'price': 892.13, 'category_id': 8},
 {'product_id': 11,
  'product_name': 'significant',
  'price': 877.14,
  'category_id': 6},
 {'product_id': 12, 'product_name': 'song', 'price': 560.93, 'category_id': 

In [19]:
# Generate synthetic data for the users table.
num_records=10000
data = []
for _ in range(num_records):
    data.append({
        "user_id": fake.unique.random_int(min=1, max=100000),
        "name": fake.name(),
        "email": fake.email(),
        "age": random.randint(18, 80),
        "signup_date": fake.date_between(start_date='-10y', end_date='today')
    })
data

[{'id': 20435,
  'name': 'Joshua Woods',
  'email': 'clarence15@example.org',
  'age': 64,
  'signup_date': datetime.date(2018, 12, 19)},
 {'id': 66156,
  'name': 'Crystal Phillips',
  'email': 'wendy27@example.net',
  'age': 54,
  'signup_date': datetime.date(2016, 7, 2)},
 {'id': 17028,
  'name': 'Karen Vazquez',
  'email': 'sscott@example.org',
  'age': 61,
  'signup_date': datetime.date(2018, 4, 22)},
 {'id': 6063,
  'name': 'Eric Alexander',
  'email': 'ahopkins@example.com',
  'age': 21,
  'signup_date': datetime.date(2016, 4, 9)},
 {'id': 41012,
  'name': 'Gregory Yang',
  'email': 'jamesleblanc@example.org',
  'age': 30,
  'signup_date': datetime.date(2018, 4, 15)},
 {'id': 50078,
  'name': 'Alexander Crawford',
  'email': 'kathleen71@example.net',
  'age': 64,
  'signup_date': datetime.date(2016, 7, 8)},
 {'id': 465,
  'name': 'Molly Lee',
  'email': 'davisdanielle@example.net',
  'age': 56,
  'signup_date': datetime.date(2015, 2, 22)},
 {'id': 33073,
  'name': 'Pamela Gonzale

In [25]:
#Generate synthetic data for the orders table.
num_records=50000
data = []
for _ in range(num_records):
    user_id = fake.random_int(min=1, max=100000)
    data.append({
        "order_id": fake.unique.random_int(min=1, max=10000000),
        "user_id": user_id,
        "order_date": fake.date_time_between(start_date='-2y', end_date='now'),
        "amount": round(random.uniform(10, 1000), 2)
    })
data

[{'order_id': 734876,
  'user_id': 64343,
  'order_date': datetime.datetime(2023, 3, 19, 21, 21, 9, 906260),
  'amount': 479.52},
 {'order_id': 9008931,
  'user_id': 28909,
  'order_date': datetime.datetime(2024, 3, 3, 23, 59, 59, 177836),
  'amount': 197.3},
 {'order_id': 5240387,
  'user_id': 88124,
  'order_date': datetime.datetime(2024, 6, 4, 7, 5, 47, 31050),
  'amount': 204.42},
 {'order_id': 208182,
  'user_id': 24488,
  'order_date': datetime.datetime(2024, 2, 25, 4, 37, 24, 839269),
  'amount': 714.84},
 {'order_id': 8429542,
  'user_id': 14265,
  'order_date': datetime.datetime(2023, 10, 6, 12, 16, 31, 759466),
  'amount': 415.22},
 {'order_id': 90097,
  'user_id': 91838,
  'order_date': datetime.datetime(2024, 10, 9, 17, 17, 19, 760334),
  'amount': 903.27},
 {'order_id': 7675859,
  'user_id': 76603,
  'order_date': datetime.datetime(2024, 3, 23, 7, 54, 35, 654950),
  'amount': 454.84},
 {'order_id': 6966994,
  'user_id': 1068,
  'order_date': datetime.datetime(2023, 8, 21, 

In [26]:
#Generate synthetic data for the reviews table.
num_records=2000
data = []
for _ in range(num_records):
    user_id = fake.random_int(min=1, max=1000)
    order_id = fake.random_int(min=1, max=100000)
    product_id = fake.random_int(min=1, max=500)
    data.append({
        "review_id": fake.unique.random_int(min=1, max=50000),
        "order_id": order_id,
        "user_id": user_id,
        "product_id": product_id,
        "review_text": fake.sentence(nb_words=10),
        "rating": random.randint(1, 5),
        "review_date": fake.date_time_between(start_date='-1y', end_date='now')
    })
data

[{'review_id': 21863,
  'order_id': 25534,
  'user_id': 168,
  'product_id': 51,
  'review_text': 'Point teach down against city just.',
  'rating': 5,
  'review_date': datetime.datetime(2024, 5, 26, 23, 21, 6, 787606)},
 {'review_id': 22069,
  'order_id': 38833,
  'user_id': 879,
  'product_id': 9,
  'review_text': 'Wrong central number watch meeting assume wear focus.',
  'rating': 5,
  'review_date': datetime.datetime(2024, 4, 16, 5, 56, 49, 473432)},
 {'review_id': 36902,
  'order_id': 73935,
  'user_id': 620,
  'product_id': 430,
  'review_text': 'Might out under involve his past agreement.',
  'rating': 5,
  'review_date': datetime.datetime(2024, 2, 11, 19, 18, 38, 29196)},
 {'review_id': 40396,
  'order_id': 21146,
  'user_id': 684,
  'product_id': 221,
  'review_text': 'Rule myself class participant condition with friend whose might break.',
  'rating': 5,
  'review_date': datetime.datetime(2024, 6, 7, 10, 16, 10, 253929)},
 {'review_id': 44265,
  'order_id': 5512,
  'user_id':

In [29]:
#Generate synthetic data for the inventory table.

num_records=2000
num_warehouses = 5
data = []
for _ in range(num_records):
    for warehouse_id in range(1, num_warehouses + 1):
        data.append({
            "product_id": fake.random_int(min=1, max=500),
            "warehouse_id": warehouse_id,
            "quantity": random.randint(0, 1000)
        })
data

[{'product_id': 409, 'warehouse_id': 1, 'quantity': 783},
 {'product_id': 93, 'warehouse_id': 2, 'quantity': 160},
 {'product_id': 57, 'warehouse_id': 3, 'quantity': 171},
 {'product_id': 324, 'warehouse_id': 4, 'quantity': 770},
 {'product_id': 103, 'warehouse_id': 5, 'quantity': 589},
 {'product_id': 339, 'warehouse_id': 1, 'quantity': 897},
 {'product_id': 135, 'warehouse_id': 2, 'quantity': 438},
 {'product_id': 319, 'warehouse_id': 3, 'quantity': 14},
 {'product_id': 342, 'warehouse_id': 4, 'quantity': 751},
 {'product_id': 398, 'warehouse_id': 5, 'quantity': 146},
 {'product_id': 176, 'warehouse_id': 1, 'quantity': 439},
 {'product_id': 216, 'warehouse_id': 2, 'quantity': 349},
 {'product_id': 252, 'warehouse_id': 3, 'quantity': 400},
 {'product_id': 302, 'warehouse_id': 4, 'quantity': 396},
 {'product_id': 449, 'warehouse_id': 5, 'quantity': 882},
 {'product_id': 49, 'warehouse_id': 1, 'quantity': 70},
 {'product_id': 469, 'warehouse_id': 2, 'quantity': 600},
 {'product_id': 226