In [34]:
import pandas as pd
import random

# Load the CSV file

data = pd.read_csv('names-by-nationality.csv')

# Randomly select ages between 22 and 68
data['Age'] = [random.randint(22, 68) for _ in range(len(data))]

# Create the description field based on the template
data['Description'] = data.apply(lambda row: f"I am {row['name']}, I am {row['Age']} years old and I am {row['nationality']}", axis=1)

# Shuffle the data to randomize
data = data.sample(frac=1).reset_index(drop=True)

# Split data into male and female to balance
male_data = data[data['sex'] == 'Male']
female_data = data[data['sex'] == 'Female']

# Get min length to balance out the genders
min_len = min(len(male_data), len(female_data))

# Combine balanced data
balanced_data = pd.concat([male_data.head(min_len), female_data.head(min_len)]).sample(frac=1).reset_index(drop=True)

# Since the task requires 962 entries, truncate if over or report if under
final_data = balanced_data.head(962) if len(balanced_data) >= 962 else balanced_data
print(f"Selected {len(final_data)} entries.")

# Save the modified data to a new CSV file
final_data.to_csv('processed_names.csv', index=False)

print("Processed data has been saved to 'processed_names.csv'.")


Selected 962 entries.
Processed data has been saved to 'processed_names.csv'.


CSV file has been created with 962 entries.
