In [3]:
import pandas as pd
import random

# List of 100 unique Indian names
indian_names = [
    "Aarav", "Vivaan", "Aditya", "Vihaan", "Krishna", "Ishaan", "Shaurya", "Atharv", "Kabir", "Arjun",
    "Ananya", "Saanvi", "Ishita", "Tanisha", "Riya", "Aditi", "Pooja", "Sneha", "Kavya", "Meera",
    "Rohan", "Neha", "Siddharth", "Harsh", "Nikita", "Sakshi", "Rahul", "Manisha", "Deepak", "Simran",
    "Divya", "Ankit", "Preeti", "Vikas", "Ritu", "Mohit", "Payal", "Kunal", "Swati", "Vivek",
    "Amit", "Bhavna", "Yash", "Ekta", "Gaurav", "Sonia", "Rakesh", "Tanya", "Varun", "Nidhi",
    "Piyush", "Alok", "Jatin", "Sonali", "Shweta", "Umesh", "Neetu", "Tarun", "Richa", "Kiran",
    "Ashish", "Pragya", "Suraj", "Harpreet", "Sourabh", "Manoj", "Chetan", "Bhawna", "Sumit", "Megha",
    "Deepika", "Siddhi", "Rajesh", "Ankur", "Anmol", "Ishwar", "Ravindra", "Anvesha", "Naveen", "Kritika",
    "Sanjay", "Bharti", "Lalit", "Vipul", "Abhishek", "Tanmay", "Saurav", "Anirudh", "Shivani", "Devansh"
]

# Shuffle names to ensure randomness and select 100 unique names
random.shuffle(indian_names)
indian_names = indian_names[:100]  # Pick 100 unique names

# Choices for other fields
domains = ["Web Development", "Data Science", "Cloud Computing"]
degrees = ["IT", "CSE", "IOT", "Other"]
time_spent_choices = ["1 hour", "2 hours", "3 hours", "4 hours", "5 hours"]
revisit_course_material = ["Daily", "Weekly"]

# Generate dataset with 100 records
data = []

for name in indian_names:
    cgpa = round(random.uniform(5.0, 10.0), 2)  # CGPA between 5 and 10
    domain = random.choice(domains)
    degree = random.choice(degrees)
    age = random.randint(18, 22)
    time_spent = random.choice(time_spent_choices)
    revisit = random.choice(revisit_course_material)

    data.append([name, cgpa, domain, degree, age, time_spent, revisit])

# Create a new DataFrame with the required structure
df_new = pd.DataFrame(data, columns=["Name", "CGPA", "Domain", "Degree", "Age", "Time Spent", "Revisit Frequency"])

# Save dataset to a file in the current directory
output_file = "Updated_SmartLearn_Students.csv"
df_new.to_csv(output_file, index=False)

# Display confirmation message
print(f"✅ Dataset saved successfully as '{output_file}' in the current directory.")
print("\n🔹 Preview of the dataset:\n")
print(df_new.head())  # Show first few rows of the dataset


✅ Dataset saved successfully as 'Updated_SmartLearn_Students.csv' in the current directory.

🔹 Preview of the dataset:

       Name  CGPA           Domain Degree  Age Time Spent Revisit Frequency
0     Rohan  8.29     Data Science    CSE   20    5 hours             Daily
1    Vihaan  6.83     Data Science     IT   18    4 hours             Daily
2     Swati  7.21  Web Development    IOT   19    5 hours             Daily
3     Umesh  9.66  Web Development  Other   19     1 hour            Weekly
4  Harpreet  6.68  Web Development     IT   18     1 hour            Weekly


In [10]:
import pandas as pd
import random

# Expanded list of Indian first names
first_names = [
    "Aarav", "Vivaan", "Aditya", "Vihaan", "Krishna", "Ishaan", "Shaurya", "Atharv", "Kabir", "Arjun",
    "Ananya", "Saanvi", "Ishita", "Tanisha", "Riya", "Aditi", "Pooja", "Sneha", "Kavya", "Meera",
    "Rohan", "Neha", "Siddharth", "Harsh", "Nikita", "Sakshi", "Rahul", "Manisha", "Deepak", "Simran",
    "Divya", "Ankit", "Preeti", "Vikas", "Ritu", "Mohit", "Payal", "Kunal", "Swati", "Vivek"
]

# Expanded list of Indian last names
last_names = [
    "Sharma", "Verma", "Mishra", "Reddy", "Nair", "Gupta", "Bose", "Iyer", "Das", "Chopra",
    "Patel", "Jain", "Agarwal", "Bhatia", "Choudhary", "Mehta", "Trivedi", "Joshi", "Kulkarni", "Desai"
]

# Generate unique full names (ensuring enough unique combinations)
all_names = [f"{fn} {ln}" for fn in first_names for ln in last_names]
random.shuffle(all_names)

# Ensure we don't go beyond the available names
num_records = min(2000, len(all_names))  # Limit dataset to available names
unique_names = all_names[:num_records]

# Choices for other fields
domains = ["Web Development", "Data Science", "Cloud Computing"]
degrees = ["IT", "CSE", "IOT", "Other"]
time_spent_choices = ["1 hour", "2 hours", "3 hours", "4 hours", "5 hours"]
revisit_course_material = ["Daily", "Weekly"]

# Generate dataset efficiently using list comprehension
data = [
    [
        unique_names[i], 
        round(random.uniform(5.0, 10.0), 2),  # CGPA between 5 and 10
        random.choice(domains),
        random.choice(degrees),
        random.randint(18, 22),
        random.choice(time_spent_choices),
        random.choice(revisit_course_material)
    ]
    for i in range(num_records)
]

# Create DataFrame
df_final = pd.DataFrame(data, columns=["Name", "CGPA", "Domain", "Degree", "Age", "Time Spent", "Revisit Frequency"])

# Save dataset in the local project folder as Excel
excel_output_file = "Final_SmartLearn_Students_2000.xlsx"
df_final.to_excel(excel_output_file, index=False)

# Print confirmation message
print(f"✅ Dataset saved as {excel_output_file} with {num_records} rows.")


✅ Dataset saved as Final_SmartLearn_Students_2000.xlsx with 800 rows.
