# Required Libraries

- [`Faker`](https://github.com/joke2k/faker): To generate fake names
- [`names-dataset`](https://github.com/philipperemy/name-dataset): To get gender and country info

In [1]:
import numpy as np
import pandas as pd
from faker import Faker
from names_dataset import NameDataset

In [2]:
en_fake =Faker()
fa_fake =Faker('Fa')
nd = NameDataset()

In [3]:
def make_name():
    if np.random.rand() > 0.5:
        return fa_fake.name()
    return en_fake.name()

In [4]:
df = pd.DataFrame({
    'Name':[make_name() for _ in range (10)]
})

In [5]:
df

Unnamed: 0,Name
0,Timothy Jenkins
1,Jordan Patton
2,Juan White
3,Catherine Jennings
4,مهدي تهرانی
5,Philip Huber
6,سوگند محمد پور
7,Michael Jones
8,یگانه معروف
9,ريحانه رفیعی


# Exteract First Name and Last Name

In [6]:
df['First Name'] = df['Name'].apply(
    lambda full_name: full_name.split()[0]
)

In [7]:
df['Last Name'] = df['Name'].apply(
    lambda full_name: full_name.split()[1]
)

# predict Gender

In [8]:
nd.search('Cory')['first_name']['gender']

{'Female': 0.17, 'Male': 0.83}

In [9]:
#max(output ,key = output.get)

In [10]:
def name_to_gender(first_name):
    info = nd.search(first_name)['first_name']
    if info is None:
        return
    return max(info['gender'] ,key = info['gender'].get)

In [11]:
df['Gender'] = df['First Name'].apply(name_to_gender)

In [12]:
df

Unnamed: 0,Name,First Name,Last Name,Gender
0,Timothy Jenkins,Timothy,Jenkins,Male
1,Jordan Patton,Jordan,Patton,Male
2,Juan White,Juan,White,Male
3,Catherine Jennings,Catherine,Jennings,Female
4,مهدي تهرانی,مهدي,تهرانی,Male
5,Philip Huber,Philip,Huber,Male
6,سوگند محمد پور,سوگند,محمد,Female
7,Michael Jones,Michael,Jones,Male
8,یگانه معروف,یگانه,معروف,Female
9,ريحانه رفیعی,ريحانه,رفیعی,Female


# Gender probability 

In [13]:
def gender_probability(first_name):
    info = nd.search(first_name)['first_name']
    if info is None:
        return
    Ge = max(info['gender'] ,key = info['gender'].get)
    return nd.search(first_name)['first_name']['gender'][Ge]

In [14]:
gender_probability('iris')

0.985

In [15]:
df['Gender Probability'] = df['First Name'].apply(gender_probability)

In [16]:
df

Unnamed: 0,Name,First Name,Last Name,Gender,Gender Probability
0,Timothy Jenkins,Timothy,Jenkins,Male,0.988
1,Jordan Patton,Jordan,Patton,Male,0.893
2,Juan White,Juan,White,Male,0.987
3,Catherine Jennings,Catherine,Jennings,Female,0.994
4,مهدي تهرانی,مهدي,تهرانی,Male,0.967
5,Philip Huber,Philip,Huber,Male,0.988
6,سوگند محمد پور,سوگند,محمد,Female,0.915
7,Michael Jones,Michael,Jones,Male,0.992
8,یگانه معروف,یگانه,معروف,Female,0.812
9,ريحانه رفیعی,ريحانه,رفیعی,Female,0.9


# predict Country 

In [17]:
nd.search('Ali')['first_name']['country']

{'United Arab Emirates': 0.037,
 'Algeria': 0.054,
 'Egypt': 0.149,
 'Iraq': 0.142,
 'Iran, Islamic Republic of': 0.052,
 'Morocco': 0.079,
 'Saudi Arabia': 0.146,
 'Syrian Arab Republic': 0.05,
 'Tunisia': 0.04,
 'Turkey': 0.25}

In [18]:
def name_to_country(first_name):
    info = nd.search(first_name)['first_name']
    if info is None:
        return
    return max(info['country'] ,key = info['country'].get)

In [19]:
df['Country'] = df['First Name'].apply(name_to_country)

In [20]:
df

Unnamed: 0,Name,First Name,Last Name,Gender,Gender Probability,Country
0,Timothy Jenkins,Timothy,Jenkins,Male,0.988,United States
1,Jordan Patton,Jordan,Patton,Male,0.893,United States
2,Juan White,Juan,White,Male,0.987,Colombia
3,Catherine Jennings,Catherine,Jennings,Female,0.994,France
4,مهدي تهرانی,مهدي,تهرانی,Male,0.967,Iraq
5,Philip Huber,Philip,Huber,Male,0.988,United Kingdom
6,سوگند محمد پور,سوگند,محمد,Female,0.915,"Iran, Islamic Republic of"
7,Michael Jones,Michael,Jones,Male,0.992,United States
8,یگانه معروف,یگانه,معروف,Female,0.812,"Iran, Islamic Republic of"
9,ريحانه رفیعی,ريحانه,رفیعی,Female,0.9,Iraq


# Country probability

In [21]:
nd.search('Ali')['first_name']['country']['Egypt']

0.149

In [22]:
def country_probability(first_name):
    info = nd.search(first_name)['first_name']
    if info is None:
        return
    Cu = max(info['country'] ,key = info['country'].get)
    return nd.search(first_name)['first_name']['country'][Cu]

In [23]:
df['Country probability'] = df['First Name'].apply(country_probability)

In [24]:
df

Unnamed: 0,Name,First Name,Last Name,Gender,Gender Probability,Country,Country probability
0,Timothy Jenkins,Timothy,Jenkins,Male,0.988,United States,0.53
1,Jordan Patton,Jordan,Patton,Male,0.893,United States,0.342
2,Juan White,Juan,White,Male,0.987,Colombia,0.235
3,Catherine Jennings,Catherine,Jennings,Female,0.994,France,0.485
4,مهدي تهرانی,مهدي,تهرانی,Male,0.967,Iraq,0.589
5,Philip Huber,Philip,Huber,Male,0.988,United Kingdom,0.306
6,سوگند محمد پور,سوگند,محمد,Female,0.915,"Iran, Islamic Republic of",0.804
7,Michael Jones,Michael,Jones,Male,0.992,United States,0.407
8,یگانه معروف,یگانه,معروف,Female,0.812,"Iran, Islamic Republic of",0.435
9,ريحانه رفیعی,ريحانه,رفیعی,Female,0.9,Iraq,0.456


In [25]:
def country_probability1(first_name, country):
    return nd.search(first_name)['first_name']['country'][country]

In [26]:
country_probability1('Kelly', 'United States')

0.337