In [21]:
import psycopg2
from psycopg2.extras import execute_batch
from faker import Faker
import random
from datetime import datetime, timedelta
import os
import configparser

fake = Faker()
Faker.seed(0)


# Determine the base directory
try:
    base_dir = os.path.dirname(os.path.abspath(__file__))  # Script-based environments
except NameError:
    base_dir = os.getcwd()  # Interactive environments (e.g., Jupyter, IPython)

# Build the path to the ini file
config_path = os.path.join(base_dir, '..', 'postgresql_initial_setup', 'db_config.ini')

# Normalize the path (resolve '..' to parent directory)
config_path = os.path.normpath(config_path)

# Load the configuration file
config = configparser.ConfigParser()
config.read(config_path)

# print(f"Configuration file path: {config_path}")

# Database connection details
DB_CONFIG = {
    'dbname': 'etl_proj_lpms',
    'user': config['database']['user'],
    'password': config['database']['password'],
    'host': config['database']['host'],
    'port': config['database']['port']
}

START_DATE = datetime(2024, 1, 1)

def connect_to_db(db_name):
    db_config = DB_CONFIG.copy()
    db_config["dbname"] = db_name
    return psycopg2.connect(**db_config)

# Generate data for etl_proj_lpms
def populate_lpms():
    conn = connect_to_db("etl_proj_lpms")
    cursor = conn.cursor()

    listings = []
    items = []

    for i in range(1, 101):
        shop_id = fake.uuid4()
        listing_id = fake.uuid4()
        model_id = fake.uuid4()
        model_name = fake.word()
        model_description = fake.sentence()
        category_lvl_1 = fake.word()
        category_lvl_2 = fake.word()
        create_time = START_DATE + timedelta(days=random.randint(0, 365))
        banned_time = None if random.random() > 0.8 else create_time + timedelta(days=random.randint(1, 100))
        banned_by = None if banned_time is None else fake.name()
        last_modified_time = create_time + timedelta(days=random.randint(1, 100))

        listings.append((listing_id, shop_id, model_id, model_name, model_description,
                         category_lvl_1, category_lvl_2, create_time, banned_time, banned_by, last_modified_time))

        for _ in range(random.randint(1, 5)):
            sku_id = fake.uuid4()
            item_id = fake.uuid4()
            item_description = fake.sentence()
            stock_qty = random.randint(0, 500)
            weight, length, width, height = [random.uniform(1.0, 50.0) for _ in range(4)]
            item_price = random.uniform(10, 1000)
            is_active = random.choice([True, False])
            create_time = START_DATE + timedelta(days=random.randint(0, 365))
            last_modified_time = create_time + timedelta(days=random.randint(1, 100))

            items.append((sku_id, shop_id, listing_id, model_id, item_id, item_description,
                          stock_qty, weight, length, width, height, item_price, is_active, create_time, last_modified_time))

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS listing (
            listing_id UUID PRIMARY KEY,
            shop_id UUID,
            model_id UUID,
            model_name TEXT,
            model_description TEXT,
            category_lvl_1 TEXT,
            category_lvl_2 TEXT,
            create_time TIMESTAMP,
            banned_time TIMESTAMP,
            banned_by TEXT,
            last_modified_time TIMESTAMP
        );
    """)

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS item (
            sku_id UUID PRIMARY KEY,
            shop_id UUID,
            listing_id UUID,
            model_id UUID,
            item_id UUID,
            item_description TEXT,
            stock_qty INTEGER,
            weight FLOAT,
            length FLOAT,
            width FLOAT,
            height FLOAT,
            item_price FLOAT,
            is_active BOOLEAN,
            create_time TIMESTAMP,
            last_modified_time TIMESTAMP
        );
    """)

    execute_batch(cursor, """
        INSERT INTO listing (listing_id, shop_id, model_id, model_name, model_description, 
        category_lvl_1, category_lvl_2, create_time, banned_time, banned_by, last_modified_time)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, listings)

    execute_batch(cursor, """
        INSERT INTO item (sku_id, shop_id, listing_id, model_id, item_id, item_description, 
        stock_qty, weight, length, width, height, item_price, is_active, create_time, last_modified_time)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, items)

    conn.commit()
    cursor.close()
    conn.close()

# Function to generate new data daily
def generate_daily_data():
    conn = connect_to_db("etl_proj_lpms")
    cursor = conn.cursor()

    new_listings = []
    new_items = []
    today = datetime.now()

    for i in range(10):  # Generate 10 new listings daily
        shop_id = fake.uuid4()
        listing_id = fake.uuid4()
        model_id = fake.uuid4()
        model_name = fake.word()
        model_description = fake.sentence()
        category_lvl_1 = fake.word()
        category_lvl_2 = fake.word()
        create_time = today
        banned_time = None if random.random() > 0.9 else create_time + timedelta(days=random.randint(1, 100))
        banned_by = None if banned_time is None else fake.name()
        last_modified_time = create_time

        new_listings.append((listing_id, shop_id, model_id, model_name, model_description,
                             category_lvl_1, category_lvl_2, create_time, banned_time, banned_by, last_modified_time))

        for _ in range(random.randint(1, 3)):  # Generate 1-3 items per listing
            sku_id = fake.uuid4()
            item_id = fake.uuid4()
            item_description = fake.sentence()
            stock_qty = random.randint(0, 500)
            weight, length, width, height = [random.uniform(1.0, 50.0) for _ in range(4)]
            item_price = random.uniform(10, 1000)
            is_active = random.choice([True, False])
            create_time = today
            last_modified_time = create_time

            new_items.append((sku_id, shop_id, listing_id, model_id, item_id, item_description,
                              stock_qty, weight, length, width, height, item_price, is_active, create_time, last_modified_time))

    execute_batch(cursor, """
        INSERT INTO listing (listing_id, shop_id, model_id, model_name, model_description, 
        category_lvl_1, category_lvl_2, create_time, banned_time, banned_by, last_modified_time)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, new_listings)

    execute_batch(cursor, """
        INSERT INTO item (sku_id, shop_id, listing_id, model_id, item_id, item_description, 
        stock_qty, weight, length, width, height, item_price, is_active, create_time, last_modified_time)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, new_items)

    conn.commit()
    cursor.close()
    conn.close()

# Similar functions for etl_proj_ums and etl_proj_oms
# Define populate_ums() and populate_oms() with similar logic

if __name__ == "__main__":
    populate_lpms()
    # Call populate_ums()
    # Call populate_oms()
    generate_daily_data()


InvalidTextRepresentation: invalid input syntax for type integer: "f728b4fa-4248-4e3a-8a5d-2f346baa9455"
LINE 4:         VALUES ('f728b4fa-4248-4e3a-8a5d-2f346baa9455', 'e3e...
                        ^
