In [None]:
import pandas as pd
import numpy as np
from faker import Faker
import random

# Faker setup
fake = Faker()
Faker.seed(42)
np.random.seed(42)

# Generate 50 fake e-commerce orders
data = []
for _ in range(50):
    customer_id = random.choice([fake.random_int(min=1000, max=9999), None])  # Missing IDs
    name = fake.name()
    city = random.choice([fake.city().lower(), fake.city().upper(), fake.city()])  # Mixed casing
    postal_code = random.choice([fake.postcode(), str(fake.random_int(100000, 999999)), float(fake.random_int(100000, 999999))])
    order_date = random.choice([
        fake.date_this_year().strftime("%Y-%m-%d"),
        fake.date_this_year().strftime("%d/%m/%Y"),
        fake.date_this_year().strftime("%m-%d-%Y")
    ])
    quantity = random.choice([fake.random_int(min=1, max=5), -1 * fake.random_int(min=1, max=5)])  # Negative errors
    price = f"${fake.random_int(10, 200):,}"  # Price with $ and commas
    data.append([customer_id, name, city, postal_code, order_date, quantity, price])

df = pd.DataFrame(data, columns=["customer_id", "name", "city", "postal_code", "order_date", "quantity", "price"])

# Add duplicates
df = pd.concat([df, df.iloc[:5]], ignore_index=True)

# Save raw data
df.to_csv("day_01_raw_data.csv", index=False)
print("Raw dataset generated: day_01_raw_data.csv")