In [2]:
from faker import Faker
import random
import pandas as pd
from datetime import datetime, timedelta
import sqlite3

fake = Faker()

# Utility function to generate date range
def generate_date_range(start_date, end_date):
    return pd.date_range(start=start_date, end=end_date, freq='D')

# Generate data for dim_date
def generate_dim_date(start_date, end_date):
    dates = generate_date_range(start_date, end_date)
    dim_date = []
    for date in dates:
        dim_date.append({
            'date_key': int(date.strftime('%Y%m%d')),
            'date': date.date(),
            'quarter': (date.month - 1) // 3 + 1,
            'year': date.year,
            'month': date.month,
            'day': date.day,
            'day_of_week': date.weekday() + 1,
            'day_type': 'Weekday' if date.weekday() < 5 else 'Weekend',
            'is_holiday': random.choice([True, False]),
            'is_campaign_day': random.choice([True, False]),
            'is_salary_day': date.day in [15, 30]
        })
    return dim_date

dim_date_data = generate_dim_date('2024-01-01', '2024-12-31')

# Generate data for dim_seller
def generate_dim_seller(n):
    dim_seller = []
    for i in range(n):
        seller_id = fake.uuid4()
        dim_seller.append({
            'seller_key': i + 1,
            'user_id': seller_id,
            'shop_id': fake.uuid4(),
            'user_birthdate': fake.date_of_birth(minimum_age=18, maximum_age=65),
            'shop_name': fake.company(),
            'shop_category': random.choice(['Electronics', 'Clothing', 'Home', 'Toys']),
            'shop_create_time': fake.date_time_this_decade(),
            'user_email': fake.email(),
            'user_contact_no': fake.phone_number(),
            'is_active': random.choice([True, False]),
            'last_modified_time': fake.date_time_this_year()
        })
    return dim_seller

dim_seller_data = generate_dim_seller(100)

# Generate data for dim_item
def generate_dim_item(n):
    dim_item = []
    for i in range(n):
        dim_item.append({
            'sku_key': i + 1,
            'sku_id': fake.uuid4(),
            'shop_id': random.choice([s['shop_id'] for s in dim_seller_data]),
            'listing_id': fake.uuid4(),
            'model_name': fake.word(),
            'model_description': fake.text(max_nb_chars=50),
            'category_lvl_1': random.choice(['Electronics', 'Clothing', 'Home', 'Toys']),
            'category_lvl_2': random.choice(['Mobile', 'Laptop', 'Shoes', 'Furniture']),
            'model_id': fake.uuid4(),
            'item_id': fake.uuid4(),
            'item_description': fake.text(max_nb_chars=100),
            'weight': random.uniform(0.1, 50.0),
            'length': random.uniform(5.0, 100.0),
            'width': random.uniform(5.0, 100.0),
            'height': random.uniform(5.0, 100.0),
            'item_price': random.uniform(10.0, 1000.0),
            'is_active': random.choice([True, False]),
            'create_time': fake.date_time_this_decade(),
            'banned_time': None if random.choice([True, False]) else fake.date_time_this_decade(),
            'last_modified_time': fake.date_time_this_year(),
            'is_wh': random.choice([True, False])
        })
    return dim_item

dim_item_data = generate_dim_item(500)

# Generate data for dim_staff
def generate_dim_staff(n):
    dim_staff = []
    for i in range(n):
        dim_staff.append({
            'staff_key': i + 1,
            'staff_id': fake.uuid4(),
            'agency': fake.company(),
            'staff_email': fake.email(),
            'name_first': fake.first_name(),
            'name_middle': fake.first_name(),
            'name_family': fake.last_name(),
            'birthdate': fake.date_of_birth(minimum_age=18, maximum_age=65),
            'account_create_time': fake.date_time_this_decade(),
            'contact_number': fake.phone_number(),
            'is_current': random.choice([True, False])
        })
    return dim_staff

dim_staff_data = generate_dim_staff(50)

# Generate data for fact_platform_performance_summary
def generate_fact_platform_performance_summary(dates):
    fact_platform = []
    for date in dates:
        fact_platform.append({
            'date_key': int(date.strftime('%Y%m%d')),
            '1d_ado': random.randint(1000, 5000),
            '7d_ado': random.uniform(1000, 5000),
            '30d_ado': random.uniform(1000, 5000),
            '1d_adgmv': random.uniform(10000, 50000),
            '7d_adgmv': random.uniform(10000, 50000),
            '30d_adgmv': random.uniform(10000, 50000),
            '1d_avg_active_buyers': random.uniform(100, 1000),
            '7d_avg_active_buyers': random.uniform(100, 1000),
            '30d_avg_active_buyers': random.uniform(100, 1000),
            '1d_avg_active_shops': random.uniform(50, 500),
            '7d_avg_active_shops': random.uniform(50, 500),
            '30d_avg_active_shops': random.uniform(50, 500),
            '1d_otd_time': random.uniform(1.0, 5.0),
            '7d_otd_time': random.uniform(1.0, 5.0),
            '30d_otd_time': random.uniform(1.0, 5.0)
        })
    return fact_platform

fact_platform_data = generate_fact_platform_performance_summary(generate_date_range('2024-01-01', '2024-12-31'))

# Generate data for fact_sku_performance_summary
def generate_fact_sku_performance_summary(dates):
    fact_sku = []
    for date in dates:
        for item in dim_item_data:
            fact_sku.append({
                'date_key': int(date.strftime('%Y%m%d')),
                'sku_id': item['sku_id'],
                '1d_ado': random.randint(10, 500),
                '7d_ado': random.uniform(10, 500),
                '30d_ado': random.uniform(10, 500),
                '90d_ado': random.uniform(10, 500),
                '1d_adgmv': random.uniform(100, 10000),
                '7d_adgmv': random.uniform(100, 10000),
                '30d_adgmv': random.uniform(100, 10000),
                '90d_adgmv': random.uniform(100, 10000)
            })
    return fact_sku

fact_sku_data = generate_fact_sku_performance_summary(generate_date_range('2024-01-01', '2024-12-31'))

# Generate data for fact_seller_performance_summary
def generate_fact_seller_performance_summary(dates):
    fact_seller = []
    for date in dates:
        for seller in dim_seller_data:
            fact_seller.append({
                'date_key': int(date.strftime('%Y%m%d')),
                'shop_id': seller['shop_id'],
                '1d_ado': random.randint(10, 500),
                '7d_ado': random.uniform(10, 500),
                '30d_ado': random.uniform(10, 500),
                '90d_ado': random.uniform(10, 500),
                '1d_adgmv': random.uniform(100, 10000),
                '7d_adgmv': random.uniform(100, 10000),
                '30d_adgmv': random.uniform(100, 10000),
                '90d_adgmv': random.uniform(100, 10000)
            })
    return fact_seller

fact_seller_data = generate_fact_seller_performance_summary(generate_date_range('2024-01-01', '2024-12-31'))

# Generate data for fact_staff_prod_summary
def generate_fact_staff_prod_summary(dates):
    fact_staff = []
    staff_keys = range(1, 50)
    for date in dates:
        for staff_key in staff_keys:
            fact_staff.append({
                'staff_key': staff_key,
                'date_key': int(date.strftime('%Y%m%d')),
                'wh_key': random.randint(1, 10),
                'total_items': random.randint(100, 1000),
                'total_manhours': random.uniform(10.0, 50.0),
                'total_active_manhours': random.uniform(5.0, 50.0),
                'prod_rate': random.uniform(0.5, 1.5),
                'idle_rate': random.uniform(0.1, 0.5)
            })
    return fact_staff

fact_staff_data = generate_fact_staff_prod_summary(generate_date_range('2024-01-01', '2024-12-31'))

# Generate data for fact_warehouse_summary
def generate_fact_warehouse_summary(dates):
    fact_warehouse = []
    for date in dates:
        for wh_key in range(1, 10):
            fact_warehouse.append({
                'date_key': int(date.strftime('%Y%m%d')),
                'wh_key': wh_key,
                'total_items': random.randint(1000, 10000),
                'total_manhours': random.uniform(100.0, 500.0),
                'total_active_manhours': random.uniform(50.0, 500.0),
                '1d_ado': random.randint(100, 1000),
                '7d_ado': random.uniform(100, 1000),
                '30d_ado': random.uniform(100, 1000),
                '1d_adi': random.randint(10, 500),
                '7d_adi': random.uniform(10, 500),
                '30d_adi': random.uniform(10, 500),
                '1d_prod_rate': random.uniform(0.5, 1.5),
                '7d_prod_rate': random.uniform(0.5, 1.5),
                '30d_prod_rate': random.uniform(0.5, 1.5),
                '1d_idle_rate': random.uniform(0.1, 0.5),
                '14d_idle_rate': random.uniform(0.1, 0.5),
                '30d_idle_rate': random.uniform(0.1, 0.5)
            })
    return fact_warehouse

fact_warehouse_data = generate_fact_warehouse_summary(generate_date_range('2024-01-01', '2024-12-31'))

# Export data to SQLite database
def export_to_sqlite(db_name):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Create tables and insert data
    pd.DataFrame(dim_date_data).to_sql('dim_date', conn, if_exists='replace', index=False)
    pd.DataFrame(dim_seller_data).to_sql('dim_seller', conn, if_exists='replace', index=False)
    pd.DataFrame(dim_item_data).to_sql('dim_item', conn, if_exists='replace', index=False)
    pd.DataFrame(dim_staff_data).to_sql('dim_staff', conn, if_exists='replace', index=False)
    pd.DataFrame(fact_platform_data).to_sql('fact_platform_performance_summary', conn, if_exists='replace', index=False)
    pd.DataFrame(fact_sku_data).to_sql('fact_sku_performance_summary', conn, if_exists='replace', index=False)
    pd.DataFrame(fact_seller_data).to_sql('fact_seller_performance_summary', conn, if_exists='replace', index=False)
    pd.DataFrame(fact_staff_data).to_sql('fact_staff_prod_summary', conn, if_exists='replace', index=False)
    pd.DataFrame(fact_warehouse_data).to_sql('fact_warehouse_summary', conn, if_exists='replace', index=False)

    conn.commit()
    conn.close()

# Export to SQLite database named "ewip_data_warehouse"
export_to_sqlite("ewip_data_warehouse.db")

# Data is ready for use or insertion into a database.
