In [1]:
import pandas as pd
from datetime import datetime, timedelta
import random

In [2]:
df = pd.read_csv('assets/brazilian-names-and-gender.csv')
print(df)

          Name Gender
0         Abel      M
1     Abelardo      M
2        Abner      M
3       Abraão      M
4      Absalom      M
...        ...    ...
6095      Ésia      F
6096      Íria      F
6097      Íris      F
6098     Úlima      F
6099    Úrsula      F

[6100 rows x 2 columns]


In [3]:
def generate_random_name(row):
    random_first_name = random.choice(df['Name'])
    random_second_name = random.choice(df['Name'])
    random_name = random_first_name + ' ' + random_second_name
    first_name_gender = df.loc[df['Name'] == random_first_name, 'Gender'].iloc[0]

    return random_name, first_name_gender

df[['Name', 'Gender']] = df.apply(generate_random_name, axis=1, result_type='expand')

df = df.head(1000)
print(df)


                Name Gender
0    Adolar Quintana      M
1      Antonina René      F
2        Danila Yula      F
3     Fergus Aquilão      M
4       Xereu Aragão      M
..               ...    ...
995    Adiel Caitria      M
996   Berardo Fulton      M
997    Mansur Mizael      M
998   Cleonice Nowak      F
999   Dakarai Celino      F

[1000 rows x 2 columns]


In [4]:
df_states = pd.read_csv('assets/estados.csv')

def random_state_generator(row, df_states):
    random_state = random.choice(df_states['NOME'])
    return random_state

df['State'] = df.apply(random_state_generator, df_states=df_states, axis=1)
print(df)

                Name Gender           State
0    Adolar Quintana      M       São Paulo
1      Antonina René      F         Roraima
2        Danila Yula      F         Roraima
3     Fergus Aquilão      M           Amapá
4       Xereu Aragão      M       Tocantins
..               ...    ...             ...
995    Adiel Caitria      M  Santa Catarina
996   Berardo Fulton      M        Rondônia
997    Mansur Mizael      M           Ceará
998   Cleonice Nowak      F          Paraná
999   Dakarai Celino      F     Mato Grosso

[1000 rows x 3 columns]


In [5]:
def generate_random_birthdate(row):
    start_date = datetime.now() - timedelta(days=365 * 100)
    random_birthdate = start_date + timedelta(days=random.randint(0, 365 * 82))
    return random_birthdate

df['Birthdate'] = df.apply(generate_random_birthdate, axis=1)
df['Birthdate'] = df['Birthdate'].dt.date
print(df)

                Name Gender           State   Birthdate
0    Adolar Quintana      M       São Paulo  1948-11-24
1      Antonina René      F         Roraima  1951-05-20
2        Danila Yula      F         Roraima  1992-09-01
3     Fergus Aquilão      M           Amapá  1959-07-03
4       Xereu Aragão      M       Tocantins  1960-02-23
..               ...    ...             ...         ...
995    Adiel Caitria      M  Santa Catarina  2005-05-09
996   Berardo Fulton      M        Rondônia  1939-08-26
997    Mansur Mizael      M           Ceará  1948-12-09
998   Cleonice Nowak      F          Paraná  1956-11-12
999   Dakarai Celino      F     Mato Grosso  1963-01-27

[1000 rows x 4 columns]


In [6]:
def calculate_age(birthdate):
    current_date = datetime.now()
    age = current_date.year - birthdate.year - ((current_date.month, current_date.day) < (birthdate.month, birthdate.day))
    return age

df['Age'] = df['Birthdate'].apply(calculate_age)
print(df)

                Name Gender           State   Birthdate  Age
0    Adolar Quintana      M       São Paulo  1948-11-24   75
1      Antonina René      F         Roraima  1951-05-20   73
2        Danila Yula      F         Roraima  1992-09-01   31
3     Fergus Aquilão      M           Amapá  1959-07-03   65
4       Xereu Aragão      M       Tocantins  1960-02-23   64
..               ...    ...             ...         ...  ...
995    Adiel Caitria      M  Santa Catarina  2005-05-09   19
996   Berardo Fulton      M        Rondônia  1939-08-26   84
997    Mansur Mizael      M           Ceará  1948-12-09   75
998   Cleonice Nowak      F          Paraná  1956-11-12   67
999   Dakarai Celino      F     Mato Grosso  1963-01-27   61

[1000 rows x 5 columns]


In [7]:
df.to_csv('people.csv', index=False)