In [8]:
import random
import pandas as pd
import sqlite3
from faker import Faker
from datetime import datetime, timedelta

fake = Faker()
Faker.seed(0)

# Helper function to generate random dates after 2024-01-01
def random_date():
    start_date = datetime(2024, 1, 1)
    random_days = timedelta(days=random.randint(0, 365))
    return start_date + random_days

# Generate seller table
def generate_sellers(n):
    sellers = []
    categories = ['Electronics', 'Clothing', 'Home and Kitchen', 'Books', 'Sports', 'Toys']
    for _ in range(n):
        shop_id = fake.uuid4()
        sellers.append({
            "user_id": fake.uuid4(),
            "shop_id": shop_id,
            "shop_name": fake.company(),
            "shop_category": random.choice(categories),
            "shop_create_time": random_date(),
            "is_active": random.choice([True, False]),
            "last_modified_time": random_date(),
            "is_wh": random.choice([True, False]),
        })
    return pd.DataFrame(sellers)

# Generate listing table
def generate_listings(sellers, n):
    listings = []
    category_lvl_1 = ['Electronics', 'Fashion', 'Home', 'Books']
    category_lvl_2_map = {
        'Electronics': ['Mobile Phones', 'Laptops', 'Cameras'],
        'Fashion': ['Men', 'Women', 'Kids'],
        'Home': ['Furniture', 'Decor', 'Appliances'],
        'Books': ['Fiction', 'Non-Fiction', 'Educational']
    }
    for _ in range(n):
        shop_id = random.choice(sellers["shop_id"].tolist())
        model_id = fake.uuid4()
        category = random.choice(category_lvl_1)
        model_name = f"{random.choice(['Smartphone', 'Laptop', 'Camera', 'Shirt', 'Book', 'Chair'])} {random.choice(['Pro', 'Lite', 'Max', 'Basic'])}"
        listings.append({
            "listing_id": fake.uuid4(),
            "shop_id": shop_id,
            "model_id": model_id,
            "model_name": model_name,
            "model_description": f"High-quality {model_name.lower()} designed for performance.",
            "category_lvl_1": category,
            "category_lvl_2": random.choice(category_lvl_2_map[category]),
            "create_time": random_date(),
            "banned_time": random_date() if random.choice([True, False]) else None,
            "banned_by": fake.uuid4() if random.choice([True, False]) else None,
            "last_modified_time": random_date(),
        })
    return pd.DataFrame(listings)

# Generate item table
def generate_items(listings, n):
    items = []
    for _ in range(n):
        listing = listings.sample(1).iloc[0]
        shop_id = listing["shop_id"]
        model_id = listing["model_id"]
        color = random.choice(['Black', 'White', 'Red', 'Blue', 'Green'])
        material = random.choice(['Plastic', 'Metal', 'Wood', 'Fabric', 'Glass'])
        items.append({
            "sku_id": fake.uuid4(),
            "shop_id": shop_id,
            "listing_id": listing["listing_id"],
            "model_id": model_id,
            "item_id": fake.uuid4(),
            "item_description": f"{listing['model_name']} in {color} color, made from {material}.", 
            "stock_qty": random.randint(10, 500),
            "weight": round(random.uniform(0.5, 5), 2),
            "length": round(random.uniform(10, 100), 2),
            "width": round(random.uniform(5, 50), 2),
            "height": round(random.uniform(1, 30), 2),
            "item_price": round(random.uniform(100, 5000), 2),
            "is_active": random.choice([True, False]),
            "create_time": random_date(),
            "last_modified_time": random_date(),
        })
    return pd.DataFrame(items)

# Number of rows to generate
num_sellers = 50
num_listings = 200
num_items = 500

# Generate data
sellers_df = generate_sellers(num_sellers)
listings_df = generate_listings(sellers_df, num_listings)
items_df = generate_items(listings_df, num_items)

# Save to CSV files
# sellers_df.to_csv("sellers.csv", index=False)
# listings_df.to_csv("listings.csv", index=False)
# items_df.to_csv("items.csv", index=False)

# Save to SQLite databases
def save_to_sqlite(df, db_name, table_name):
    conn = sqlite3.connect(db_name)
    df.to_sql(table_name, conn, if_exists="replace", index=False)
    conn.close()

save_to_sqlite(sellers_df, "og_db_file/ums.db", "sellers")
save_to_sqlite(listings_df, "og_db_file/lpms.db", "listings")
save_to_sqlite(items_df, "og_db_file/lpms.db", "items")

print("Data generated and saved as CSV files: sellers.csv, listings.csv, items.csv.")
print("Data also saved to SQLite databases: ums.db (sellers), lpms.db (listings and items).")


Data generated and saved as CSV files: sellers.csv, listings.csv, items.csv.
Data also saved to SQLite databases: ums.db (sellers), lpms.db (listings and items).


In [9]:
import sqlite3

# Create an SQLite database for dim_item table
conn = sqlite3.connect("og_db_file/ewip_dw.db")

# Define the schema for the dim_item table
create_table_query = """
CREATE TABLE IF NOT EXISTS dim_item (
    sku_key TEXT PRIMARY KEY,
    sku_id TEXT,
    shop_id TEXT,
    listing_id TEXT,
    model_name TEXT,
    model_description TEXT,
    category_lvl_1 TEXT,
    category_lvl_2 TEXT,
    model_id TEXT,
    item_id TEXT,
    item_description TEXT,
    weight REAL,
    length REAL,
    width REAL,
    height REAL,
    item_price REAL,
    is_active BOOLEAN,
    create_time DATETIME,
    banned_time DATETIME,
    last_modified_time DATETIME,
    is_wh BOOLEAN
);
"""

# Execute the query to create the table
cursor = conn.cursor()
cursor.execute(create_table_query)
conn.commit()

# Close the connection
conn.close()

print("Empty dim_item table created in dim_item.db.")


Empty dim_item table created in dim_item.db.
