In [1]:
import csv
import random
from faker import Faker
from datetime import datetime

In [2]:
# Initialize Faker
fake = Faker()  

In [3]:
# Define the schema fields
fields = [
    'order_id', 'customer_id', 'customer_name', 'product_id', 'product_name',
    'product_category', 'payment_type', 'qty', 'price', 'datetime', 'country',
    'city', 'ecommerce_website_name', 'payment_txn_id', 'payment_txn_success',
    'failure_reason'
]

In [4]:
# Define sample data
product_categories = ['Stationery', 'Electronics', 'Books', 'Clothing', 'Toys']
payment_types = ['Card', 'Internet Banking', 'UPI', 'Wallet']
countries = {
    'India': ['Mumbai', 'Delhi', 'Bengaluru', 'Chennai', 'Kolkata'],
    'USA': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Boston'],
    'UK': ['London', 'Manchester', 'Birmingham', 'Leeds', 'Oxford'],
    'Germany': ['Berlin', 'Munich', 'Frankfurt', 'Hamburg', 'Stuttgart']
}
websites = ['www.amazon.com', 'www.flipkart.com', 'www.ebay.in', 'www.tatacliq.com', 'www.snapdeal.com']
failures = ['Card Expired' , 'Blocked Card' , 'Transaction Timeout']


In [5]:
# Function to generate a single record
def generate_record(order_id):
    customer_id = random.randint(100, 999)
    customer_name = fake.name()
    product_id = random.randint(200, 299)
    product_name = fake.word().capitalize()
    product_category = random.choice(product_categories)
    payment_type = random.choice(payment_types)
    qty = random.randint(1, 10)
    price = round(random.uniform(5.0, 1000.0), 2)
    order_datetime = fake.date_time_between(start_date='-1y', end_date='now').strftime("%Y-%m-%d %H:%M")
    country = random.choice(list(countries.keys()))
    city = random.choice(countries[country])
    ecommerce_website_name = random.choice(websites)
    payment_txn_id = fake.uuid4().split('-')[0]
    payment_txn_success = random.choice(['Y', 'N'])
    failure_reason = "" if payment_txn_success == 'Y' else random.choice(failures)

    return [
        order_id, customer_id, customer_name, product_id, product_name,
        product_category, payment_type, qty, price, order_datetime, country,
        city, ecommerce_website_name, payment_txn_id, payment_txn_success,
        failure_reason
    ]

In [6]:
# Generate 10,000 records and write to CSV
with open('ecommerce_data.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(fields)  # Write the header
    for order_id in range(1, 10001):
        writer.writerow(generate_record(order_id))

print("Data generation complete. The file 'ecommerce_data.csv' has been created.") 

Data generation complete. The file 'ecommerce_data.csv' has been created.
