In [1]:
import os 
import csv
import random
from faker import Faker

In [2]:
data_dir = '../../../data'
table_name = 'dim_product'
output_dir = os.path.join(data_dir, f'pybrew_{table_name}')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
csv_location = os.path.join(output_dir, f'{table_name}.csv')

In [3]:
categories = {
    'Hot Beverages': {
        'Coffee': ['Latte', 'Cappuccino', 'Americano', 'Espresso', 'Mocha'],
        'Tea': ['Green Tea', 'Black Tea', 'Chai Tea', 'Earl Grey Tea', 'Herbal Tea']
    },
    'Cold Beverages': {
        'Iced Coffee': ['Iced Latte', 'Iced Cappuccino', 'Iced Americano', 'Iced Mocha', 'Cold Brew'],
        'Iced Tea': ['Iced Green Tea', 'Iced Black Tea', 'Iced Chai Tea', 'Iced Earl Grey Tea', 'Iced Herbal Tea']
    },
    'Snacks': {
        'Baked': ['Croissant', 'Bagel', 'Cookie', 'Muffin', 'Cinammon Roll'],
        'Packaged': ['Gronola Bar', 'Trail Mix', 'Fruit Cup', 'M&M', 'Chewing Gum']
    },
    'Retail': {
        'Mug': ['PyBrew I Wish You Were Here Mug 12oz', 'PyBrew Texas Edition Mug 36oz'],
        'Kitchen Tool': ['Dripper', 'French Press', 'Perculator']
    }
}

# Create a list of tuples
products_list = list()


# Use random.choice to randomly select a category and subcategory
for i in range(1000):
    category = random.choice(list(categories.keys()))
    subcategory = random.choice(list(categories[category].keys()))

    # Use random.choice to randomly select a product name from the subcategory
    product_name = random.choice(categories[category][subcategory])

    # Create a unique product id
    product_id = f"{category.upper()[:3]}-{subcategory.upper()[:3]}-{product_name.upper()[:3]}-{1_000 + i}"

    # Add the product to the list
    products_list.append((category, subcategory, product_name, product_id))

# Print the list of products
print(products_list[:10])

[('Hot Beverages', 'Coffee', 'Americano', 'HOT-COF-AME-1000'), ('Retail', 'Kitchen Tool', 'Dripper', 'RET-KIT-DRI-1001'), ('Hot Beverages', 'Coffee', 'Espresso', 'HOT-COF-ESP-1002'), ('Hot Beverages', 'Tea', 'Black Tea', 'HOT-TEA-BLA-1003'), ('Retail', 'Kitchen Tool', 'Perculator', 'RET-KIT-PER-1004'), ('Cold Beverages', 'Iced Coffee', 'Iced Cappuccino', 'COL-ICE-ICE-1005'), ('Retail', 'Kitchen Tool', 'French Press', 'RET-KIT-FRE-1006'), ('Cold Beverages', 'Iced Tea', 'Iced Green Tea', 'COL-ICE-ICE-1007'), ('Retail', 'Kitchen Tool', 'Perculator', 'RET-KIT-PER-1008'), ('Retail', 'Mug', 'PyBrew Texas Edition Mug 36oz', 'RET-MUG-PYB-1009')]


In [4]:
# column names
field_names = ['product_category', 'product_id', 'product_name', 'product_subcategory']

try:
    with open(csv_location, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(field_names)
        writer.writerows(products_list)
except Exception as e:
    print(e)
else:
    print(f"Success! File ready @ {csv_location}")

Success! File ready @ ../../../data/pybrew_dim_product/dim_product.csv
