In [None]:
from google.colab import drive 
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# EDA package
import pandas as pd
import numpy as np

In [None]:
# ML packages
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction import DictVectorizer
#from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
# Load the dataset
df = pd.read_csv('drive/My Drive/JuzzChatBot/100names.csv')

In [None]:
df.size

196

In [None]:
#Data Cleaning
# Check for column name consistency
df.columns

Index(['names', 'sex'], dtype='object')

In [None]:
#Data types
df.dtypes

names    object
sex      object
dtype: object

In [None]:
# Check for missing values
df.isnull().isnull().sum()

names    0
sex      0
dtype: int64

In [None]:
# Number of female names
df[df.sex == 'female'].size

70

In [None]:
# Number of Male names
df[df.sex == 'male'].size


126

In [None]:
df_name = df

In [None]:
#Replacing all female and male with 0 and 1 
df_name.sex.replace({'female':0,'male':1}, inplace=True)

In [None]:
df_name.sex.unique()

array([1, 0])

In [None]:
df_name.dtypes

names    object
sex       int64
dtype: object

In [None]:
Xfeatures = df_name['names']

In [None]:
# Feature Extraction
cv = CountVectorizer()
X = cv.fit_transform(Xfeatures)

In [None]:
cv.get_feature_names()

['aden',
 'adilah',
 'adrian',
 'ahammad',
 'alex',
 'alistar',
 'ananda',
 'ang',
 'ann',
 'ansley',
 'ariel',
 'bebeh',
 'beh',
 'boo',
 'brenda',
 'catherine',
 'cavien',
 'chai',
 'chan',
 'chee',
 'cheng',
 'chia',
 'chiew',
 'chong',
 'choo',
 'choong',
 'chua',
 'cova',
 'de',
 'dino',
 'fariz',
 'fern',
 'gek',
 'gerd',
 'gildon',
 'gina',
 'goh',
 'gunjikar',
 'halim',
 'hansen',
 'hauy',
 'heng',
 'herry',
 'hong',
 'hsin',
 'hui',
 'hwee',
 'inis',
 'jarrod',
 'jen',
 'jennie',
 'jenny',
 'jens',
 'jessie',
 'jiaming',
 'jiekai',
 'jimmy',
 'johnny',
 'joo',
 'joshua',
 'julie',
 'jun',
 'kaiyun',
 'karan',
 'karthick',
 'keam',
 'kean',
 'keat',
 'kee',
 'kheong',
 'khim',
 'kim',
 'kong',
 'kuah',
 'kwan',
 'kyaw',
 'lau',
 'lay',
 'lee',
 'leh',
 'leng',
 'li',
 'lim',
 'linda',
 'ling',
 'lional',
 'loong',
 'louis',
 'madelil',
 'md',
 'mei',
 'michael',
 'min',
 'ming',
 'mohammad',
 'mon',
 'monica',
 'nair',
 'najib',
 'namrata',
 'naren',
 'ng',
 'ngiap',
 'nicholas

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
#Features
X
#Labels
y = df_name.sex

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33,random_state = 42)

In [None]:
# Naive Baynes Classifier is used because it does not require large dataset and juzz only gives us 100 dataset. It is fast and can make real time classifier
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X_train,y_train)
clf.score(X_test,y_test)

0.6060606060606061

In [None]:
# Accuracy of our model
print("Accuracy of Model",clf.score(X_test,y_test)*100,"%")

Accuracy of Model 60.60606060606061 %


In [None]:
#Accuracy of our model
print("Accuracy of model", clf.score(X_train,y_train)*100,"%")

Accuracy of model 100.0 %


In [None]:
#Sample1 Prediction
sample_name = ["Mark"]
vect =cv.transform(sample_name).toarray()

In [None]:
#Female is 0, Male is 1
clf.predict(vect)

array([1])

In [None]:
# Sample2 Prediction
sample_name1 =["Peter"]
vect2 =cv.transform(sample_name1).toarray()

In [None]:
#Female is 0, Male is 1
clf.predict(vect2)

array([1])

In [None]:
# Sample3 Prediction
sample_name2 =["Jessie"]
vect3 =cv.transform(sample_name2).toarray()

In [None]:
#Female is 0, Male is 1
clf.predict(vect3)

array([0])

In [None]:
!pip install --upgrade gspread

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gspread
  Downloading gspread-5.6.2-py3-none-any.whl (38 kB)
Installing collected packages: gspread
  Attempting uninstall: gspread
    Found existing installation: gspread 3.4.2
    Uninstalling gspread-3.4.2:
      Successfully uninstalled gspread-3.4.2
Successfully installed gspread-5.6.2


In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

wb = gc.open_by_key('1akpkCKVR1rcAYYQwQNVxMUdvkOBEj7u35Sa3rG5543E')


In [None]:
# Sample3 Prediction of Random Names
sample_name3 = ["Nefertiti","Nasha","Ama","Ayo","Xhavier","Ovetta","Tathiana","Xia","Joseph","Xianliang"]
vect3 = cv.transform(sample_name3).toarray()

In [None]:
clf.predict(vect3)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [None]:
# A function to do it
def genderpredictor(a):
    data_set = [a]
    vector = cv.transform(data_set).toarray()
    if clf.predict(vector) == 0:
        print("Female")
    else:
        print("Male")

In [None]:
namelist = ["Yaa","Yaw","Femi","Masha"]
for i in namelist:
    print(genderpredictor(i))

Male
None
Male
None
Male
None
Male
None


In [None]:
# Predicting the gender in new dataset in csv under "Contacts sheet"
namelist = ["Alex Lee","Loh Lee Chin","Shell Nee toh","Sharon neo","Jaslyn wee","New Wee Keong","Samuel Hourong","Angeline Long","Chew Speacor","Low Fong Moi","Jiancon Fong",
"Ling Ling Chai","Audrey Wu","Choon Seong Lim","Max Sum","Wai Yee Choo","Beiqing Zhao","Bernard Chong","Mun Bryan","Vicki Ding","Simon Chang","Yan Xion","Sze Ling Ng","Danru Wen",
"Karen Tham","Lim Yong Chang","Tan Hui Leng","Chiou Pey Leong","Beng Hon Wee","Adrian yu Hung Wei","Susan Zhao","Lydia Liu","Chin han chung","Ying Chai Lee","Thng Kok Meng","Tiffany Dong",
"Mun Hong Yuen","Karl Chandran","Bruno Sng","Chong Yang Sng","Yi Fan Tan","Kah Hui Chng","Cheng Sook Fen","Clyn Koh","Julian Elisa Alimin","Shu Chin Lau","Brynn Ting",
"Therasa Chang","Tee Bee Chin","Wei Long","Benny Tan","Sharmanie Tan","Ng Wan Ting","Choon Siang Chua","Ling Ing Choo","Jamie Loh","Shaun Tan","Hiang Chye Chang","Kelvin Wong",
"Fany Fong Yin Kiew","Terence Cheng","Jong Soon Kai","Chia Lim","Kai Jun Yeo","Xamioming Sun","Lee Fang Tan","Chrysun Zapata","Cynthia Goh","Yu lily Chen","Kim Peng Lee","Chan Felicia",
"Grace Yang","Jasmine Chong","Amanda Lim","Shermaine Tan","Jimmy Woon","Albert Ong","Jia Hann Tan","Chia Nancy","Vivian Fong","Eddie Lim","Shiqi Liang","Teh Tit Hean",
"Phua Huat","Chi Ling Kong","Tay Catherine","Lee Sharine","Lim Mei Ping","Stanly Yong","Jianhua Weng","Ng Sau Kheng","Eveline Yip","Pui Lee Tong","Samantha Wong","Guo Wanping",
"Aaron Poh","Joseph Eng","Rachael Lo","Joanne Goh","Hai Chew Ng",]
for i in namelist:
    print(genderpredictor(i))

Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Female
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Female
None
Male
None
Male
None
Male
None
Male
None
Male
None
Male
None
Female
None
Male
None
Male
None
Male
None
Male
None
Female
None
Male
None
Female
None
Male
None
Male
None
Male
None
Male
None
Male
None
Female
None
Male
None
Male
None
Male
None
Male
None
Male
Non

In [None]:
genderpredictor("Nicole")

Female


In [None]:
ws = wb.worksheet('100names')

# get_all_values gives a list of rows.
rows = ws.get_all_values()
print(rows)

# Convert to a DataFrame and render.
#import pandas as pd
df100names=pd.DataFrame.from_records(rows[1:],columns=rows[0])
print(df100names)

[['names', 'sex'], ['Kyaw', 'male'], ['Quek Kim Lee', 'female'], ['Herry Chai', 'male'], ['Julie', 'female'], ['Gerd', 'male'], ['Michael Ng Kean Teik', 'male'], ['Naren', 'male'], ['Jessie Ong', 'female'], ['Lay Khim', 'female'], ['Wu', 'male'], ['Monica', 'female'], ['Alex', 'male'], ['Louis', 'male'], ['Phua Xu Mei', 'male'], ['Nicole T', 'female'], ['Gina', 'female'], ['Robyn Catherine Nair', 'female'], ['K Lee', 'male'], ['Serene Lee', 'female'], ['Lau Siew Lee', 'female'], ['Adilah', 'female'], ['Kuah Boo Cheng', 'male'], ['Wen Ting', 'female'], ['Rachel Heng', 'female'], ['Kong De Ren', 'male'], ['Karthick', 'male'], ['Yen Yen', 'female'], ['Yuong', 'male'], ['Jennie', 'female'], ['Ong Gek Tin', 'male'], ['Tan Wai Loong', 'male'], ['Simeon Tang', 'male'], ['Shine Ang', 'female'], ['Lee Jiekai', 'male'], ['Zijun', 'male'], ['Johnny', 'male'], ['Linda D Tom', 'male'], ['Ariel', 'male'], ['Shah', 'male'], ['Rositah', 'female'], ['Chee Kheong', 'male'], ['Shu Ting', 'male'], ['Kaiyu

In [None]:
wsContacts = wb.worksheet('Contacts')

# get_all_values gives a list of rows.
contactListofLists = wsContacts.get_all_values()

# Convert to a DataFrame and render.
dfContacts = pd.DataFrame.from_records(contactListofLists[1:],columns=contactListofLists[0])
print(dfContacts)

            names sex
0        Alex Lee    
1    Loh Lee Chin    
2   Shell Nee toh    
3      Sharon neo    
4      Jaslyn wee    
..            ...  ..
95      Aaron Poh    
96     Joseph Eng    
97     Rachael Lo    
98     Joanne Goh    
99    Hai Chew Ng    

[100 rows x 2 columns]


In [None]:
wsResult = wb.worksheet('Results')
# wsResult.update(none,dataToWrite)


In [None]:
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv('drive/My Drive/JuzzChatBot/Contacts.csv')  # read predict file
namelist = []
results = []
 for i in range(df['name']):
  namelist.append(i)
  name= genderpredictor(i)
  results.append(name)

dataframe = pd.DataFrame({'name':namelist,'sex':results}) 
dataframe.to_csv("drive/My Drive/JuzzChatBot/Results.csv") # read output file


