In [31]:
from faker import Faker
from faker.providers import DynamicProvider
import pandas as pd

In [32]:
fake = Faker(['fr_FR'])
nbr_of_samples = 100

In [33]:
# Load city names from CSV
df_cities = pd.read_csv('communes-france-2025.csv', sep=',', usecols=['nom_standard'])
print(df_cities.shape)
cities_list = df_cities['nom_standard'].tolist()

# Load character traits from CSV
df_traits = pd.read_csv('traits_de_caractere.csv', header=None, names=['trait'])
print(df_traits.shape)
traits_list = df_traits['trait'].tolist()

# Load job titles from CSV
df_jobs = pd.read_csv('metiers.csv', header=None, names=['job'])
print(df_jobs.shape)
jobs_list = df_jobs['job'].tolist()

(34935, 1)
(433, 1)
(217, 1)


In [34]:
# Providers

# City Provider
city_provider = DynamicProvider(
    provider_name="city",
    elements=cities_list
)
fake.add_provider(city_provider)

# Hobby Provider
hobby_provider = DynamicProvider(
    provider_name="hobby",
    elements=[
        "Football",
        "Lecture",
        "Cuisine",
        "Jardinage",
        "Photographie",
        "Peinture",
        "Musique",
        "Randonnée",
        "Yoga",
        "Voyage"
    ]
)
fake.add_provider(hobby_provider)

# Trait Provider
trait_provider = DynamicProvider(
    provider_name="trait",
    elements=traits_list
)
fake.add_provider(trait_provider)

# Job Provider
job_provider = DynamicProvider(
    provider_name="job",
    elements=jobs_list
)
fake.add_provider(job_provider)

In [35]:
# Generate a sample fake profile
nom_prenom = fake.name().split(" ")
print("Id :", fake.uuid4())
print("Prénom :", fake.first_name())
print("Nom :", fake.last_name())
print("Sexe :", fake.random_element(elements=["M", "F"]))
print("Âge :", fake.random_int(min=18, max=80))
print("Ville :", fake.city())
print("Hobby :", fake.hobby())
print("Trait :", fake.trait())
print("Job :", fake.job())

Id : 61d4e3a2-8542-4ab0-92c7-4dc4b1ee8a89
Prénom : Éléonore
Nom : Boulay
Sexe : F
Âge : 51
Ville : Saint-Laurent
Hobby : Jardinage
Trait : Candide
Job : bûcherons


In [36]:
df_fake_profile = pd.DataFrame({
    "Id": [fake.uuid4() for _ in range(nbr_of_samples)],
    "Prénom": [fake.first_name() for _ in range(nbr_of_samples)],
    "Nom": [fake.last_name() for _ in range(nbr_of_samples)],
    "Sexe": [fake.random_element(elements=["M", "F"]) for _ in range(nbr_of_samples)],
    "Âge": [fake.random_int(min=18, max=80) for _ in range(nbr_of_samples)],
    "Ville": [fake.city() for _ in range(nbr_of_samples)],
    "Hobby": [fake.hobby() for _ in range(nbr_of_samples)],
    "Trait": [fake.trait() for _ in range(nbr_of_samples)],
    "Job": [fake.job() for _ in range(nbr_of_samples)]
})
print(df_fake_profile.columns)
print(df_fake_profile.shape)
df_fake_profile.head()

Index(['Id', 'Prénom', 'Nom', 'Sexe', 'Âge', 'Ville', 'Hobby', 'Trait', 'Job'], dtype='object')
(100, 9)


Unnamed: 0,Id,Prénom,Nom,Sexe,Âge,Ville,Hobby,Trait,Job
0,efe8622c-a430-4037-9d91-11aa858dc595,Pierre,Laporte,M,33,Le Claux,Photographie,Déraisonnable / Modéré,chefs de rang
1,f8e14faf-6f98-4905-8773-50303e5ed324,Jeanne,Meyer,F,75,Sainte-Sévère-sur-Indre,Lecture,Maître de soi,chefs d'équipe BTP
2,80bb7dc3-aaea-4309-b401-9afad86bb44f,Capucine,Marchal,M,67,Lucarré,Football,Disponible,chefs de projet en IA
3,b756d0e1-a5c4-4df0-817b-105b809021de,Gérard,Baudry,M,70,Mouron-sur-Yonne,Cuisine,Pervers,chefs de projet web
4,0e54fbed-114d-4f3a-9431-c6365199eb0c,Hugues,Bonnin,M,65,Bayon,Peinture,Moral / Amoral,cordonniers


In [37]:
# Save to CSV
df_fake_profile.to_csv("fake_profiles_" + str(nbr_of_samples) + ".csv", index=False)