## Required Libraries 
To do so, we use the following libraries:
- Faker: To generate fake names
- names-dataset: To get gender and country info


In [1]:
from faker import Faker
from names_dataset import NameDataset
import numpy as np
import pandas as pd

In [2]:
faker = Faker()
nd = NameDataset()

## Generate Fake Data

In [3]:
df = pd.DataFrame({
    'Name' : [faker.name() for i in range(10000)]
})

In [4]:
df

Unnamed: 0,Name
0,Christopher Hayes
1,Amanda Ramirez
2,Preston Villanueva
3,Ronald Kirby
4,Paul Duarte
...,...
9995,Jason Evans DDS
9996,Mrs. Jennifer Stewart DDS
9997,Jessica Pruitt
9998,David Edwards


## Extract First Name & Last Name
To predict gender and country using a name, we need the first name and last names separate. We can simply assume that the first part of a name is the first name and the last par is last name. For example:

In [5]:
def get_fname_lname(x):
    fname = x.split()[0]
    lname = x.split()[-1]
    
    return fname, lname
    

In [6]:
df['FirstName'] = df['Name'].apply(lambda x: get_fname_lname(x)[0])

In [7]:
df['LastName'] = df['Name'].apply(lambda x: get_fname_lname(x)[-1])

In [8]:
df

Unnamed: 0,Name,FirstName,LastName
0,Christopher Hayes,Christopher,Hayes
1,Amanda Ramirez,Amanda,Ramirez
2,Preston Villanueva,Preston,Villanueva
3,Ronald Kirby,Ronald,Kirby
4,Paul Duarte,Paul,Duarte
...,...,...,...
9995,Jason Evans DDS,Jason,DDS
9996,Mrs. Jennifer Stewart DDS,Mrs.,DDS
9997,Jessica Pruitt,Jessica,Pruitt
9998,David Edwards,David,Edwards


## Predict Gender

In [9]:
def predict_gender(x):
    gen = nd.search(x)['first_name']
    if gen is None:
        
        return None
    else:
        stats = gen['gender']
        return max(stats, key=stats.get)

        
        

In [10]:
df['Gender'] = df['FirstName'].apply( lambda x: predict_gender(x))

In [11]:
df

Unnamed: 0,Name,FirstName,LastName,Gender
0,Christopher Hayes,Christopher,Hayes,Male
1,Amanda Ramirez,Amanda,Ramirez,Female
2,Preston Villanueva,Preston,Villanueva,Male
3,Ronald Kirby,Ronald,Kirby,Male
4,Paul Duarte,Paul,Duarte,Male
...,...,...,...,...
9995,Jason Evans DDS,Jason,DDS,Male
9996,Mrs. Jennifer Stewart DDS,Mrs.,DDS,
9997,Jessica Pruitt,Jessica,Pruitt,Female
9998,David Edwards,David,Edwards,Male


## Gender Probability


In [12]:
def gender_probability(x):
    info = nd.search(x)['first_name']
    
    if info is None:
        return None
    
    
    
    return max(info['gender'].values())

In [13]:
df['GenderProbability'] = df['FirstName'].apply(lambda x: gender_probability(x))

In [14]:
df

Unnamed: 0,Name,FirstName,LastName,Gender,GenderProbability
0,Christopher Hayes,Christopher,Hayes,Male,0.991
1,Amanda Ramirez,Amanda,Ramirez,Female,0.987
2,Preston Villanueva,Preston,Villanueva,Male,0.988
3,Ronald Kirby,Ronald,Kirby,Male,0.992
4,Paul Duarte,Paul,Duarte,Male,0.990
...,...,...,...,...,...
9995,Jason Evans DDS,Jason,DDS,Male,0.992
9996,Mrs. Jennifer Stewart DDS,Mrs.,DDS,,
9997,Jessica Pruitt,Jessica,Pruitt,Female,0.992
9998,David Edwards,David,Edwards,Male,0.991


## Country

In [15]:
def get_country(x):
    info = nd.search(x)['first_name']
    
    if info is None:
        return None
    
    return max(info['country'], key=info['country'].get)


In [16]:
df['country'] = df['FirstName'].apply(lambda x :get_country(x))

In [17]:
df

Unnamed: 0,Name,FirstName,LastName,Gender,GenderProbability,country
0,Christopher Hayes,Christopher,Hayes,Male,0.991,United States
1,Amanda Ramirez,Amanda,Ramirez,Female,0.987,United States
2,Preston Villanueva,Preston,Villanueva,Male,0.988,United States
3,Ronald Kirby,Ronald,Kirby,Male,0.992,United States
4,Paul Duarte,Paul,Duarte,Male,0.990,United Kingdom
...,...,...,...,...,...,...
9995,Jason Evans DDS,Jason,DDS,Male,0.992,United States
9996,Mrs. Jennifer Stewart DDS,Mrs.,DDS,,,
9997,Jessica Pruitt,Jessica,Pruitt,Female,0.992,United States
9998,David Edwards,David,Edwards,Male,0.991,United States


## Country Probability

In [18]:
def country_probability(x):
    info = nd.search(x)['first_name']
    
    if info is None:
        return None
    
    
    
    return max(info['country'].values())

In [22]:
df['CountryProbability'] = df["FirstName"].apply(lambda x: country_probability(x))

In [23]:
df

Unnamed: 0,Name,FirstName,LastName,Gender,GenderProbability,country,CountryProbability
0,Christopher Hayes,Christopher,Hayes,Male,0.991,United States,0.444
1,Amanda Ramirez,Amanda,Ramirez,Female,0.987,United States,0.371
2,Preston Villanueva,Preston,Villanueva,Male,0.988,United States,0.781
3,Ronald Kirby,Ronald,Kirby,Male,0.992,United States,0.242
4,Paul Duarte,Paul,Duarte,Male,0.990,United Kingdom,0.368
...,...,...,...,...,...,...,...
9995,Jason Evans DDS,Jason,DDS,Male,0.992,United States,0.485
9996,Mrs. Jennifer Stewart DDS,Mrs.,DDS,,,,
9997,Jessica Pruitt,Jessica,Pruitt,Female,0.992,United States,0.348
9998,David Edwards,David,Edwards,Male,0.991,United States,0.265


In [28]:
df.to_csv(r'prediction.csv')