In [None]:
import psycopg2
from psycopg2.extras import execute_batch
from faker import Faker
import random
from datetime import datetime, timedelta
import os
import configparser

fake = Faker()
Faker.seed(0)


# Determine the base directory
try:
    base_dir = os.path.dirname(os.path.abspath(__file__))  # Script-based environments
except NameError:
    base_dir = os.getcwd()  # Interactive environments (e.g., Jupyter, IPython)

# Build the path to the ini file
config_path = os.path.join(base_dir, '..', 'postgresql_initial_setup', 'db_config.ini')

# Normalize the path (resolve '..' to parent directory)
config_path = os.path.normpath(config_path)

# Load the configuration file
config = configparser.ConfigParser()
config.read(config_path)

# print(f"Configuration file path: {config_path}")

# Database connection details
DB_CONFIG = {
    'dbname': 'etl_proj_ums',
    'user': config['database']['user'],
    'password': config['database']['password'],
    'host': config['database']['host'],
    'port': config['database']['port']
}


START_DATE = datetime(2024, 1, 1)

def connect_to_db(db_name):
    db_config = DB_CONFIG.copy()
    db_config["dbname"] = db_name
    return psycopg2.connect(**db_config)

# Generate data for etl_proj_ums
def populate_ums():
    conn = connect_to_db("etl_proj_ums")
    cursor = conn.cursor()

    users = []
    sellers = []

    for i in range(1, 101):  # Generate 100 users
        user_id = fake.uuid4()
        username = fake.user_name()
        password = fake.password()
        name_first = fake.first_name()
        name_middle = fake.first_name() if random.random() > 0.5 else None
        name_last = fake.last_name()
        sex = random.choice(["M", "F"])
        birthdate = fake.date_of_birth(minimum_age=18, maximum_age=70)
        present_address_region = fake.state()
        present_address_city = fake.city()
        present_address_brgy = fake.street_name()
        email = fake.email()
        contact_number = fake.phone_number()
        account_create_time = START_DATE + timedelta(days=random.randint(0, 365))
        is_active = random.choice([True, False])
        last_modified_time = account_create_time + timedelta(days=random.randint(1, 100))

        users.append((user_id, username, password, name_first, name_middle, name_last, sex,
                      birthdate, present_address_region, present_address_city, present_address_brgy,
                      email, contact_number, account_create_time, is_active, last_modified_time))

        if random.random() > 0.6:  # 60% chance to become a seller
            shop_id = fake.uuid4()
            shop_name = fake.company()
            shop_category = random.choice(["Electronics", "Fashion", "Home", "Sports", "Books"])
            shop_create_time = account_create_time + timedelta(days=random.randint(1, 50))
            is_active = random.choice([True, False])
            last_modified_time = shop_create_time + timedelta(days=random.randint(1, 100))
            is_wh = random.choice([True, False])

            sellers.append((user_id, shop_id, shop_name, shop_category, shop_create_time,
                            is_active, last_modified_time, is_wh))

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS "user" (
            user_id UUID PRIMARY KEY,
            username TEXT,
            password TEXT,
            name_first TEXT,
            name_middle TEXT,
            name_last TEXT,
            sex CHAR(1),
            birthdate DATE,
            present_address_region TEXT,
            present_address_city TEXT,
            present_address_brgy TEXT,
            email TEXT,
            contact_number TEXT,
            account_create_time TIMESTAMP,
            is_active BOOLEAN,
            last_modified_time TIMESTAMP
        );
    """)

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS seller (
            user_id UUID,
            shop_id UUID PRIMARY KEY,
            shop_name TEXT,
            shop_category TEXT,
            shop_create_time TIMESTAMP,
            is_active BOOLEAN,
            last_modified_time TIMESTAMP,
            is_wh BOOLEAN,
            FOREIGN KEY (user_id) REFERENCES "user" (user_id)
        );
    """)

    execute_batch(cursor, """
        INSERT INTO "user" (user_id, username, password, name_first, name_middle, name_last, sex, 
        birthdate, present_address_region, present_address_city, present_address_brgy, email, 
        contact_number, account_create_time, is_active, last_modified_time)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, users)

    execute_batch(cursor, """
        INSERT INTO seller (user_id, shop_id, shop_name, shop_category, shop_create_time, is_active, last_modified_time, is_wh)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
    """, sellers)

    conn.commit()
    cursor.close()
    conn.close()

# Function to generate new data daily
def generate_daily_data_ums():
    conn = connect_to_db("etl_proj_ums")
    cursor = conn.cursor()

    new_users = []
    new_sellers = []
    today = datetime.now()

    for i in range(5):  # Generate 5 new users daily
        user_id = fake.uuid4()
        username = fake.user_name()
        password = fake.password()
        name_first = fake.first_name()
        name_middle = fake.first_name() if random.random() > 0.5 else None
        name_last = fake.last_name()
        sex = random.choice(["M", "F"])
        birthdate = fake.date_of_birth(minimum_age=18, maximum_age=70)
        present_address_region = fake.state()
        present_address_city = fake.city()
        present_address_brgy = fake.street_name()
        email = fake.email()
        contact_number = fake.phone_number()
        account_create_time = today
        is_active = random.choice([True, False])
        last_modified_time = today

        new_users.append((user_id, username, password, name_first, name_middle, name_last, sex,
                          birthdate, present_address_region, present_address_city, present_address_brgy,
                          email, contact_number, account_create_time, is_active, last_modified_time))

        if random.random() > 0.6:  # 60% chance to become a seller
            shop_id = fake.uuid4()
            shop_name = fake.company()
            shop_category = random.choice(["Electronics", "Fashion", "Home", "Sports", "Books"])
            shop_create_time = today
            is_active = random.choice([True, False])
            last_modified_time = today
            is_wh = random.choice([True, False])

            new_sellers.append((user_id, shop_id, shop_name, shop_category, shop_create_time,
                                is_active, last_modified_time, is_wh))

    execute_batch(cursor, """
        INSERT INTO "user" (user_id, username, password, name_first, name_middle, name_last, sex, 
        birthdate, present_address_region, present_address_city, present_address_brgy, email, 
        contact_number, account_create_time, is_active, last_modified_time)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, new_users)

    execute_batch(cursor, """
        INSERT INTO seller (user_id, shop_id, shop_name, shop_category, shop_create_time, is_active, last_modified_time, is_wh)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
    """, new_sellers)

    conn.commit()
    cursor.close()
    conn.close()

if __name__ == "__main__":
    populate_ums()
    generate_daily_data_ums()
