In [1]:
import os 
import csv
import random
from faker import Faker

In [2]:
data_dir = '../../../data'
table_name = 'dim_product'
output_dir = os.path.join(data_dir, f'pybrew_{table_name}')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
csv_location = os.path.join(output_dir, f'{table_name}.csv')

In [3]:
categories = {
    'Hot Beverages': {
        'Coffee': ['Latte', 'Cappuccino', 'Americano', 'Espresso', 'Mocha'],
        'Tea': ['Green Tea', 'Black Tea', 'Chai Tea', 'Earl Grey Tea', 'Herbal Tea']
    },
    'Cold Beverages': {
        'Iced Coffee': ['Iced Latte', 'Iced Cappuccino', 'Iced Americano', 'Iced Mocha', 'Cold Brew'],
        'Iced Tea': ['Iced Green Tea', 'Iced Black Tea', 'Iced Chai Tea', 'Iced Earl Grey Tea', 'Iced Herbal Tea']
    },
    'Snacks': {
        'Baked': ['Croissant', 'Bagel', 'Cookie', 'Muffin', 'Cinammon Roll'],
        'Packaged': ['Gronola Bar', 'Trail Mix', 'Fruit Cup', 'M&M', 'Chewing Gum']
    },
    'Retail': {
        'Mug': ['PyBrew I Wish You Were Here Mug 12oz', 'PyBrew Texas Edition Mug 36oz'],
        'Kitchen Tool': ['Dripper', 'French Press', 'Perculator']
    }
}

product_prices = {
    "Latte": 3.50,
    "Cappuccino": 4.00,
    "Americano": 2.50,
    "Espresso": 2.00,
    "Mocha": 3.00,
    "Green Tea": 2.00,
    "Black Tea": 1.50,
    "Chai Tea": 2.50,
    "Earl Grey Tea": 2.00,
    "Herbal Tea": 1.50,
    "Iced Latte": 4.50,
    "Iced Cappuccino": 5.00,
    "Iced Americano": 3.50,
    "Iced Mocha": 4.00,
    "Cold Brew": 4.50,
    "Iced Green Tea": 3.00,
    "Iced Black Tea": 2.50,
    "Iced Chai Tea": 3.50,
    "Iced Earl Grey Tea": 3.00,
    "Iced Herbal Tea": 2.50,
    "Croissant": 1.00,
    "Bagel": 1.50,
    "Cookie": 2.50,
    "Muffin": 2.00,
    "Cinammon Roll": 2.50,
    "Gronola Bar": 1.50,
    "Trail Mix": 2.00,
    "Fruit Cup": 1.00,
    "M&M": 1.75,
    "Chewing Gum": 1.50,
    "PyBrew I Wish You Were Here Mug 12oz": 10.00,
    "PyBrew Texas Edition Mug 36oz": 15.00,
    "Dripper": 5.00,
    "French Press": 10.00,
    "Perculator": 15.00
}

# Create a list of tuples
products_list = list()


# Use random.choice to randomly select a category and subcategory
for i in range(1000):
    category = random.choice(list(categories.keys()))
    subcategory = random.choice(list(categories[category].keys()))

    # Use random.choice to randomly select a product name from the subcategory
    product_name = random.choice(categories[category][subcategory])

    # Create a unique product id
    product_id = f"{category.upper()[:3]}:{subcategory.upper()[:3]}:{product_name.upper()[:3]}:{1_000 + i}"
    
    # Select the corresponding price from the product prices
    product_price = product_prices[product_name]

    # Add the product to the list
    products_list.append((category, subcategory, product_name, product_id, product_price))

# Print the list of products
print(products_list[:10])

[('Cold Beverages', 'Iced Tea', 'Iced Black Tea', 'COL:ICE:ICE:1000', 2.5), ('Snacks', 'Baked', 'Croissant', 'SNA:BAK:CRO:1001', 1.0), ('Retail', 'Kitchen Tool', 'French Press', 'RET:KIT:FRE:1002', 10.0), ('Snacks', 'Packaged', 'Gronola Bar', 'SNA:PAC:GRO:1003', 1.5), ('Retail', 'Mug', 'PyBrew I Wish You Were Here Mug 12oz', 'RET:MUG:PYB:1004', 10.0), ('Snacks', 'Packaged', 'Trail Mix', 'SNA:PAC:TRA:1005', 2.0), ('Snacks', 'Packaged', 'M&M', 'SNA:PAC:M&M:1006', 1.75), ('Retail', 'Mug', 'PyBrew Texas Edition Mug 36oz', 'RET:MUG:PYB:1007', 15.0), ('Hot Beverages', 'Tea', 'Chai Tea', 'HOT:TEA:CHA:1008', 2.5), ('Hot Beverages', 'Coffee', 'Americano', 'HOT:COF:AME:1009', 2.5)]


In [4]:
# column names
field_names = ['product_category', 'product_id', 'product_name', 'product_subcategory', 'product_price']

try:
    with open(csv_location, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(field_names)
        writer.writerows(products_list)
except Exception as e:
    print(e)
else:
    print(f"Success! File ready @ {csv_location}")

Success! File ready @ ../../../data/pybrew_dim_product/dim_product.csv
