In [25]:
from faker import Faker
from db_auth import pyodbc_url
import pyodbc
import random

In [26]:
fake = Faker()

In [27]:
def insert_data(data, tableName):
    conn = pyodbc.connect(pyodbc_url)
    cursor = conn.cursor()
    columns = ", ".join(data[0].keys())
    placeholders = ", ".join(["?" for _ in data[0]])
    insert_query = f"INSERT INTO {tableName} ({columns}) VALUES ({placeholders})"
    values = [tuple(order.values()) for order in data]
    cursor.executemany(insert_query, values)
    conn.commit()
    cursor.close()
    conn.close()

In [28]:
def read_data(column, tableName):
    conn = pyodbc.connect(pyodbc_url)
    cursor = conn.cursor()
    cursor.execute(f"SELECT {column} FROM {tableName}")
    rows = cursor.fetchall()
    id_col = []
    for row in rows:
        id_col.append(row[0])
    cursor.close()
    conn.close()
    return id_col

In [29]:
products_bronze = [
    {
        "product_id": fake.uuid4(),
        "name": fake.word().capitalize(),
        "category": random.choice(["Electronics", "Clothing", "Home & Kitchen", "Books", "Toys"]),
        "brand": fake.company(),
        "cost_price": round(random.uniform(10, 300), 2),
        "selling_price": round(random.uniform(15, 500), 2),
        "raw_data_source": "Product Catalog",
        "ingestion_timestamp": fake.date_time_this_year()
    }
    for _ in range(100000)
]

product_id = [product['product_id'] for product in products_bronze] + read_data('product_id', 'raw.products')

insert_data(products_bronze, "[raw].[products]")

In [30]:
customers_bronze = [
    {
        "customer_id": fake.uuid4(),
        "name": fake.name(),
        "email": fake.email(),
        "address": fake.address(),
        "age": random.randint(18, 70),
        "gender": random.choice(["Male", "Female", "Other"]),
        "signup_date": fake.date_time_this_decade(),
        "raw_data_source": "CRM System",
        "ingestion_timestamp": fake.date_time_this_year()
    }
    for _ in range(100000)
]

customer_id = [customers['customer_id'] for customers in customers_bronze] + read_data('customer_id', 'raw.customer')

insert_data(customers_bronze, "[raw].[customer]")

In [31]:
orders_bronze = [
    {
        "order_id": fake.uuid4(),
        "quantity": random.randint(1, 10),
        "price": round(random.uniform(5, 500), 2),
        "order_date": fake.date_time_this_year(),
        "shipping_address": fake.address(),
        "raw_data_source": "E-Commerce API",
        "ingestion_timestamp": fake.date_time_this_year()
    }
    for _ in range(100000)
]

order_id = [orders['order_id'] for orders in orders_bronze] + read_data('order_id', 'raw.orders')


insert_data(orders_bronze, "[raw].[orders]")

In [33]:
sales_transactions_bronze = [
    {
        "transaction_id": fake.uuid4(),
        "order_id": random.choice(order_id),
        "customer_id": random.choice(customer_id),
        "product_id": random.choice(product_id),
        "total_amount": round(random.uniform(10, 1000), 2),
        "discount": round(random.uniform(0, 100), 2),
        "tax": round(random.uniform(1, 50), 2),
        "refund_amount": round(random.uniform(0, 100), 2),
        "transaction_date": fake.date_time_this_year(),
        "raw_data_source": "POS System",
        "ingestion_timestamp": fake.date_time_this_year()
    }
    for _ in range(100000)
]


insert_data(sales_transactions_bronze, "[raw].[sales_transaction]")