In [3]:
import pandas as pd
import random
from datetime import datetime, timedelta

# --- 1. CONFIGURATION ---
NUM_ROWS = 5000
BRAND_NAME = "Vivo"

# Define Vivo Models and their corresponding market prices (Approx INR)
# This ensures the price matches the correct model.
vivo_catalog = {
    "Vivo X100 Pro": 89999,
    "Vivo X100": 63999,
    "Vivo V30 Pro": 41999,
    "Vivo V30": 33999,
    "Vivo V29": 32999,
    "Vivo V29e": 26999,
    "Vivo T2 Pro": 23999,
    "Vivo Y200": 21999,
    "Vivo Y56": 19999,
    "Vivo T2x": 12999,
    "Vivo Y17s": 11499,
    "Vivo T3 5G": 19999
}

cities = [
    "Mumbai", "Delhi", "Bangalore", "Hyderabad", "Ahmedabad", 
    "Chennai", "Kolkata", "Pune", "Jaipur", "Lucknow", "Surat", 
    "Kanpur", "Nagpur", "Indore", "Thane", "Bhopal", "Patna"
]

payment_methods = ["UPI", "Credit Card", "Debit Card", "Cash", "EMI", "Net Banking"]

# Sample names to mix and match
first_names = ["Aarav", "Vihaan", "Aditya", "Arjun", "Sai", "Reyansh", "Ayaan", "Debarpita", "Tani", "Krishna", "Ishaan", "Shaurya", "Diya", "Saanvi", "Ananya", "Aadhya", "Pari", "Neha", "Priya", "Riya", "Meera", "Kavita", "Devesh", "Abhishek", "Ravi", "Amit", "Shivani", "Surisha", "Vivek", "Anirudh","Kunal","Rohit","Nikhil","Siddharth","Varun","Manish","Harsh","Yash","Mohit","Akash","Rahul","Suresh","Mahesh","Pranav","Rajat","Ankit","Sachin","Pankaj","Gaurav","Deepak","Ashish","Sunil","Vikas","Ajay","Sanjay","Hemant","Ramesh","Naresh","Kartik","Uday","Tejas","Mayank","Akhil","Devansh","Parth","Atharva","Raghav","Naveen","Tarun","Arvind","Bharat","Chirag","Darshan","Eshan","Faizan","Govind","Himanshu","Jayesh","Lakshay","Mukul","Naman","Omkar","Pulkit","Ritvik","Samarth","Tanmay","Utkarsh","Vedant","Yuvraj","Aishwarya","Pooja","Nidhi","Sneha","Kiran","Shreya","Swati","Aarti","Bhavna","Chandni","Damini","Ekta","Gauri","Heena","Ira","Jasmin","Komal","Lata","Madhuri","Namrata","Ojasvi","Pallavi","Rachna","Saloni","Tanvi","Urmila","Vandana","Yamini","Zoya","Ankita","Bhavya","Charu","Diksha","Esha","Falguni","Gunjan","Harini","Ishita","Jaya","Kanika","Lavanya","Monika","Neelam","Oindrila","Poonam","Ritu","Sakshi","Trisha","Uma","Vaishali","Yashika","Alka","Bina","Chhavi","Divya","Eshani","Farah","Geetika","Hiral","Indu","Jayant","Keshav","Lokesh","Mihir","Neeraj","Omesh","Pradeep","Rohil","Shubham","Tushar","Viraj","Yogesh","Arpit","Bhavesh","Chetan","Dinesh","Kishan","Manoj","Narendra","Prashant","Rajesh","Sameer","Umesh","Vishal","Wasim","Zubin","Aman","Bhanu","Dev","Irfan" ]
last_names = ["Sharma", "Verma", "Gupta", "Malhotra", "Singh", "Patel", "Reddy", "Nair", "Das", "Joshi", "Mehta", "Chopra", "Jain", "Saxena", "Iyer", "Khan", "Mishra", "Yadav", "Ghosh", "Bhadra", "Rao", "Agrawal","Ahluwalia","Ahuja","Anand","Arora","Bajaj","Bansal","Batra","Bedi","Bhalla","Bhardwaj","Bhatt","Bose","Chakraborty","Chatterjee","Chaudhary","Dutta","Dwivedi","Gandhi","Goel","Grover","Haldar","Jindal","Kapoor","Kashyap","Khatri","Kohli","Kulkarni","Mahajan","Mathur","Menon","Mukherjee","Nagpal","Narayan","Ojha","Pandey","Parikh","Qureshi","Rajput","Ray","Sengupta","Shah","Shetty","Shukla","Sinha","Srivastava","Talwar","Tandon","Thakur","Trivedi","Tyagi","Upadhyay","Vaid","Venkatesh","Walia","Zaveri","Acharya","Balakrishnan","Banerjee","Bhandari","Chandra","Desai","Gokhale","Kannan","Karmakar","Lal","Mandal","Naik","Pillai","Prasad","Rastogi","Sarin","Subramanian","Tripathi","Ullah","Varadarajan","Vyas","Ibrahim","Farooq","Hussain","Ismail","Salim","Siddiqui","Azmi","Ansari","Baig","Hashmi","Nadvi","Usmani","Kureshi","Madani","Rizvi"]

# --- 2. HELPER FUNCTIONS ---

def generate_random_date(start_year=2023, end_year=2024):
    """Generates a random date between start_year and end_year"""
    start = datetime(start_year, 1, 1)
    end = datetime(end_year, 12, 31)
    delta = end - start
    random_seconds = random.randint(0, int(delta.total_seconds()))
    return start + timedelta(seconds=random_seconds)

def generate_customer_name():
    """Generates a random full name"""
    return f"{random.choice(first_names)} {random.choice(last_names)}"

# --- 3. DATA GENERATION LOOP ---

data = []

for i in range(1, NUM_ROWS + 1):
    # 1. Transaction ID (Sequential)
    trans_id = 10000 + i
    
    # 2. Date & Day Name
    date_obj = generate_random_date()
    date_str = date_obj.strftime("%Y-%m-%d")
    day_name = date_obj.strftime("%A")
    
    # 3. Product Details (Model & Price linked)
    model_name = random.choice(list(vivo_catalog.keys()))
    price = vivo_catalog[model_name]
    
    # 4. Units Sold (Weighted: Mostly 1, sometimes 2 or 3)
    units = random.choices([1, 2, 3], weights=[85, 10, 5])[0]
    
    # 5. Customer Details
    cust_name = generate_customer_name()
    cust_age = random.randint(18, 60)
    city = random.choice(cities)
    rating = random.choices([1, 2, 3, 4, 5], weights=[5, 5, 10, 35, 45])[0] # Skew towards higher ratings
    pay_method = random.choice(payment_methods)
    
    # Append row
    data.append([
        trans_id,        # Transaction ID
        date_str,        # Date
        day_name,        # Day Name
        BRAND_NAME,      # Brand
        model_name,      # Mobile Model
        price,           # Price Per Unit
        units,           # Units Sold
        cust_name,       # Customer Name
        cust_age,        # Customer Age
        city,            # City
        pay_method,      # Payment Method
        rating           # Customer Ratings
    ])

# --- 4. CREATE DATAFRAME & SAVE ---

columns = [
    "Transaction ID", "Date", "Day Name", "Brand", "Mobile Model", 
    "Price Per Unit", "Units Sold", "Customer Name", "Customer Age", 
    "City", "Payment Method", "Customer Ratings"
]

df = pd.DataFrame(data, columns=columns)

# Display first 5 rows to check
print("First 5 rows of generated data:")
print(df.head())

# Save to CSV
csv_filename = "5k_vivo_transactions.csv"
df.to_csv(csv_filename, index=False)
print(f"\nSuccess! File saved as: {csv_filename}")

First 5 rows of generated data:
   Transaction ID        Date   Day Name Brand  Mobile Model  Price Per Unit  \
0           10001  2023-01-07   Saturday  Vivo  Vivo V30 Pro           41999   
1           10002  2023-05-10  Wednesday  Vivo      Vivo T2x           12999   
2           10003  2024-03-30   Saturday  Vivo     Vivo X100           63999   
3           10004  2024-05-07    Tuesday  Vivo     Vivo Y17s           11499   
4           10005  2023-03-09   Thursday  Vivo   Vivo T2 Pro           23999   

   Units Sold      Customer Name  Customer Age     City Payment Method  \
0           1      Neeraj Shetty            27   Mumbai            EMI   
1           1  Madhuri Chaudhary            40   Kanpur    Net Banking   
2           2    Oindrila Farooq            33  Kolkata            EMI   
3           1       Deepak Patel            32   Indore            EMI   
4           1       Mahesh Nadvi            57   Nagpur     Debit Card   

   Customer Ratings  
0                 4 