In [14]:
from faker import Faker
from faker.providers import DynamicProvider
import pandas as pd

In [None]:
fake = Faker(['fr_FR'])
nbr_of_samples = 100_000

# Path to CSV files
cities_csv_path = 'csv/communes-france-2025.csv'
hobbys_csv_path = 'ChatGPT/liste_hobbies.csv'
traits_csv_path = 'ChatGPT/liste_traits_caractere.csv'
jobs_csv_path = 'ChatGPT/liste_metiers.csv'

In [16]:
# Load city names from CSV
df_cities = pd.read_csv(cities_csv_path, sep=',', usecols=['nom_standard'])
print(df_cities.shape)
cities_list = df_cities['nom_standard'].tolist()

df_hobbys = pd.read_csv(hobbys_csv_path, sep=',', usecols=['Hobby'])
print(df_hobbys.shape)
hobbys_list = df_hobbys['Hobby'].tolist()

# Load character traits from CSV
df_traits = pd.read_csv(traits_csv_path, sep=',', usecols=['Trait'])
print(df_traits.shape)
traits_list = df_traits['Trait'].tolist()

# Load job titles from CSV
df_jobs = pd.read_csv(jobs_csv_path, sep=',', usecols=['Metier'])
print(df_jobs.shape)
jobs_list = df_jobs['Metier'].tolist()

(34935, 1)
(210, 1)
(84, 1)
(191, 1)


In [18]:
# Providers

# City Provider
city_provider = DynamicProvider(
    provider_name="city",
    elements=cities_list
)
fake.add_provider(city_provider)

# Hobby Provider
hobby_provider = DynamicProvider(
    provider_name="hobby",
    elements=hobbys_list
)
fake.add_provider(hobby_provider)

# Trait Provider
trait_provider = DynamicProvider(
    provider_name="trait",
    elements=traits_list
)
fake.add_provider(trait_provider)

# Job Provider
job_provider = DynamicProvider(
    provider_name="job",
    elements=jobs_list
)
fake.add_provider(job_provider)

In [19]:
# Generate a sample fake profile
nom_prenom = fake.name().split(" ")
print("Id :", fake.uuid4())
print("Prénom :", fake.first_name())
print("Nom :", fake.last_name())
print("Sexe :", fake.random_element(elements=["M", "F"]))
print("Âge :", fake.random_int(min=18, max=80))
print("Ville :", fake.city())
print("Hobby :", fake.hobby())
print("Trait :", fake.trait())
print("Job :", fake.job())

Id : 5441900d-3ab2-4d2c-b73d-d12bd1fb556b
Prénom : Alfred
Nom : Paris
Sexe : M
Âge : 18
Ville : Montaigut-le-Blanc
Hobby : Course à pied
Trait : Indiscipliné
Job : Ingénieur électrique


In [20]:
df_fake_profile = pd.DataFrame({
    "Id": [fake.uuid4() for _ in range(nbr_of_samples)],
    "Prénom": [fake.first_name() for _ in range(nbr_of_samples)],
    "Nom": [fake.last_name() for _ in range(nbr_of_samples)],
    "Sexe": [fake.random_element(elements=["M", "F"]) for _ in range(nbr_of_samples)],
    "Âge": [fake.random_int(min=18, max=80) for _ in range(nbr_of_samples)],
    "Ville": [fake.city() for _ in range(nbr_of_samples)],
    "Hobby": [fake.hobby() for _ in range(nbr_of_samples)],
    "Trait": [fake.trait() for _ in range(nbr_of_samples)],
    "Job": [fake.job() for _ in range(nbr_of_samples)]
})
print(df_fake_profile.columns)
print(df_fake_profile.shape)
df_fake_profile.head()

Index(['Id', 'Prénom', 'Nom', 'Sexe', 'Âge', 'Ville', 'Hobby', 'Trait', 'Job'], dtype='object')
(100, 9)


Unnamed: 0,Id,Prénom,Nom,Sexe,Âge,Ville,Hobby,Trait,Job
0,cf9b36d1-1478-46b2-b640-6d5b34975329,Xavier,Étienne,M,23,Labastide-Beauvoir,Snowboard,Manipulateur,Community manager
1,e11c04d5-6763-47ea-a552-f09dd9363573,Anne,Menard,M,28,Andrésy,Aquariophilie,Colérique,Cariste
2,0e20a810-d628-4f4a-af94-e7db5b4aeb57,Thibaut,Masse,F,75,Urbanya,Intelligence artificielle,Optimiste,Second de cuisine
3,5aacfd20-7293-4bde-ba03-270e4af8d274,Colette,Dumas,F,22,Boissezon,Aïkido,Colérique,Contrôleur de gestion
4,7edc197b-0416-40d3-aaf9-a75b4cc0e57a,Pierre,Maillard,M,56,Saint-Mard,Basket-ball,Idéaliste,Sage-femme


In [21]:
# Save to CSV
df_fake_profile.to_csv("Fake_profiles/fake_profiles_" + str(nbr_of_samples) + ".csv", index=False)