In [166]:
import pandas as pd 
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt

In [167]:
female = pd.read_csv(r'C:\Users\User\Desktop\Gender\female.txt' , header= None , names= ['Name'])
male = pd.read_csv(r'C:\Users\User\Desktop\Gender\male.txt' , header = None , names = ['Name'])

In [168]:
female.head()

Unnamed: 0,Name
0,Abagael
1,Abagail
2,Abbe
3,Abbey
4,Abbi


In [169]:
male.head()

Unnamed: 0,Name
0,Aamir
1,Aaron
2,Abbey
3,Abbie
4,Abbot


In [170]:
female['Gender'] = 0

In [171]:
female

Unnamed: 0,Name,Gender
0,Abagael,0
1,Abagail,0
2,Abbe,0
3,Abbey,0
4,Abbi,0
...,...,...
4996,Zorine,0
4997,Zsa Zsa,0
4998,Zsazsa,0
4999,Zulema,0


In [172]:
male['Gender'] = 1

In [173]:
male

Unnamed: 0,Name,Gender
0,Aamir,1
1,Aaron,1
2,Abbey,1
3,Abbie,1
4,Abbot,1
...,...,...
2938,Zeus,1
2939,Zippy,1
2940,Zollie,1
2941,Zolly,1


In [174]:
new_data = pd.merge(female , male   , how = 'outer' )

In [175]:
new_data

Unnamed: 0,Name,Gender
0,Abagael,0
1,Abagail,0
2,Abbe,0
3,Abbey,0
4,Abbi,0
...,...,...
7939,Zeus,1
7940,Zippy,1
7941,Zollie,1
7942,Zolly,1


In [176]:
new_data = new_data.sample(n = 7944 )

In [177]:
X_names = new_data['Name']
Y = new_data.Gender

In [178]:
cv = CountVectorizer()
X = cv.fit_transform(X_names)

In [179]:
cv.get_feature_names()

['aamir',
 'aaron',
 'abagael',
 'abagail',
 'abbe',
 'abbey',
 'abbi',
 'abbie',
 'abbot',
 'abbott',
 'abby',
 'abdel',
 'abdul',
 'abdulkarim',
 'abdullah',
 'abe',
 'abel',
 'abelard',
 'abigael',
 'abigail',
 'abigale',
 'abner',
 'abra',
 'abraham',
 'abram',
 'acacia',
 'ace',
 'ada',
 'adah',
 'adair',
 'adaline',
 'adam',
 'adams',
 'adara',
 'addie',
 'addis',
 'adel',
 'adela',
 'adelaide',
 'adele',
 'adelice',
 'adelina',
 'adelind',
 'adeline',
 'adella',
 'adelle',
 'adena',
 'adey',
 'adger',
 'adi',
 'adiana',
 'adina',
 'aditya',
 'adlai',
 'adnan',
 'adolf',
 'adolfo',
 'adolph',
 'adolphe',
 'adolpho',
 'adolphus',
 'adora',
 'adore',
 'adoree',
 'adorne',
 'adrea',
 'adria',
 'adriaens',
 'adrian',
 'adriana',
 'adriane',
 'adrianna',
 'adrianne',
 'adrick',
 'adrien',
 'adriena',
 'adrienne',
 'aeriel',
 'aeriela',
 'aeriell',
 'ag',
 'agace',
 'agamemnon',
 'agata',
 'agatha',
 'agathe',
 'aggi',
 'aggie',
 'aggy',
 'agna',
 'agnella',
 'agnes',
 'agnese',
 'agne

In [180]:
X_train , X_test , Y_train , Y_test = train_test_split(X , Y , test_size= 0.3 , random_state= 42)

# MultinomialNB 

In [181]:
mnb = MultinomialNB()
mnb.fit(X_train , Y_train)

MultinomialNB()

In [182]:
mnb_pred = mnb.predict(X_test)

In [183]:
print(metrics.classification_report(Y_test , mnb_pred))

              precision    recall  f1-score   support

           0       0.62      0.95      0.75      1498
           1       0.07      0.01      0.01       886

    accuracy                           0.60      2384
   macro avg       0.35      0.48      0.38      2384
weighted avg       0.42      0.60      0.48      2384



In [184]:
print("Accuracy of Model",mnb.score(X_train,Y_train)*100,"%")

Accuracy of Model 96.61870503597122 %


# DecissionTree

In [185]:
tree = DecisionTreeClassifier()
tree.fit(X_train , Y_train)

DecisionTreeClassifier()

In [186]:
tree_pred = tree.predict(X_test)

In [187]:
print(metrics.classification_report(Y_test , tree_pred))

              precision    recall  f1-score   support

           0       0.62      0.95      0.75      1498
           1       0.05      0.00      0.01       886

    accuracy                           0.60      2384
   macro avg       0.33      0.48      0.38      2384
weighted avg       0.41      0.60      0.47      2384



In [188]:
print("Accuracy of Model",tree.score(X_train,Y_train)*100,"%")

Accuracy of Model 96.63669064748201 %


# RandomForest

In [189]:
rf = RandomForestClassifier()
rf.fit(X_train , Y_train)

RandomForestClassifier()

In [190]:
rf_pred = rf.predict(X_test)

In [191]:
print(metrics.classification_report(Y_test , rf_pred))

              precision    recall  f1-score   support

           0       0.62      0.95      0.75      1498
           1       0.05      0.00      0.01       886

    accuracy                           0.60      2384
   macro avg       0.33      0.48      0.38      2384
weighted avg       0.41      0.60      0.47      2384



In [192]:
print("Accuracy of Model",rf.score(X_train,Y_train)*100,"%")

Accuracy of Model 96.52877697841727 %


# DummyClassifier

In [193]:
dummy = DummyClassifier()
dummy.fit(X_train , Y_train)

DummyClassifier()

In [194]:
dummy_pred = dummy.predict(X_test)

In [195]:
print(metrics.classification_report(Y_test , dummy_pred))

              precision    recall  f1-score   support

           0       0.63      1.00      0.77      1498
           1       0.00      0.00      0.00       886

    accuracy                           0.63      2384
   macro avg       0.31      0.50      0.39      2384
weighted avg       0.39      0.63      0.48      2384



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [196]:
print("Accuracy of Model",dummy.score(X_train,Y_train)*100,"%")

Accuracy of Model 63.00359712230216 %


# Logistic

In [197]:
log = LogisticRegression()
log.fit(X_train , Y_train)

LogisticRegression()

In [198]:
log_pred = log.predict(X_test)

In [199]:
log.score(X_test , Y_test)*100

62.961409395973156

In [200]:
print("Accuracy of Model",log.score(X_train,Y_train)*100,"%")

Accuracy of Model 63.23741007194245 %


In [201]:
sample_name = ["abbe"]
vect = cv.transform(sample_name).toarray()

In [202]:
vect

array([[0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [203]:
mnb.predict(vect)

array([0], dtype=int64)

In [204]:
sample_name1 = ["zolly"]
vect1 = cv.transform(sample_name1).toarray()

In [205]:
vect1

array([[0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [206]:
mnb_end_predict = mnb.predict(vect1)

In [207]:
if(mnb_end_predict == 0):
    print('female')
else:
    print('male')

male


In [208]:

vect2 = cv.transform(X_names).toarray()

In [209]:
mnb_end_predict2 = mnb.predict(vect2)

In [210]:
for x in mnb_end_predict2:
    if(x == 0):
        print('female')
    else:
        print('male')

female
female
female
female
female
female
female
female
female
female
female
male
female
female
female
female
female
female
male
female
male
female
female
female
female
female
female
female
male
female
female
female
male
female
male
female
female
female
female
female
female
female
female
male
female
female
male
female
female
female
female
female
female
female
male
female
female
female
female
female
male
female
female
female
female
female
female
female
male
female
male
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
male
female
female
female
male
female
male
female
female
male
male
female
female
female
male
female
female
male
male
male
female
female
male
female
female
male
female
female
female
female
female
male
female
female
female
male
female
female
female
male
female
female
male
male
female
male
female
female
female
female
female
female
female
male
female
female
male
male


female
female
female
female
female
female
female
female
male
male
male
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
male
female
male
female
female
male
female
female
female
female
male
female
female
female
female
female
male
female
female
male
female
female
female
female
female
male
female
male
female
female
female
female
male
female
male
female
female
female
male
female
female
female
male
female
female
female
male
male
female
female
male
female
female
female
female
female
female
male
female
female
female
female
female
male
female
female
male
female
male
female
female
female
male
male
female
female
female
male
female
female
female
female
female
female
female
female
male
female
female
male
male
female
female
female
male
female
female
male
female
female
male
female
male
female
female
female
male
female
female
male
male
male
male
female
male
male
female
female
female
female
female
male
male
male
female
female
female
female
male
f

male
female
female
female
male
female
male
female
female
female
female
female
female
female
male
female
male
female
female
female
female
female
female
female
female
female
female
female
male
male
female
male
female
male
male
male
male
male
female
female
female
male
female
female
female
female
female
female
male
female
female
female
female
male
female
female
female
female
female
female
male
male
female
female
male
female
female
female
female
male
male
male
female
male
female
female
male
female
female
female
female
male
female
female
female
male
female
female
female
female
female
female
female
male
female
female
female
male
male
female
female
female
female
female
female
male
female
female
female
female
female
male
female
male
female
male
male
female
male
female
female
male
female
female
female
female
female
male
female
female
female
female
female
female
female
male
female
female
female
male
male
female
female
female
female
male
female
female
male
female
female
male
female
female
female
m

male
female
female
female
female
female
female
female
female
male
male
female
female
female
female
female
female
female
male
female
male
male
female
female
male
female
male
female
female
female
female
male
female
female
female
male
female
male
female
male
female
female
female
female
female
male
female
female
female
male
female
female
female
female
male
female
female
female
female
male
female
female
male
female
female
female
female
female
female
male
female
male
female
female
female
female
female
female
female
female
female
female
female
female
male
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
female
male
female
female
female
female
female
male
female
female
female
male
male
female
female
female
female
female
female
female
female
female
female
female
female
male
female
female
female
male
female
male
female
female
male
male
male
male
female
female
female
female
female
female
female
female
female
female
female
femal

female
female
female
male
male
female
female
female
female
female
female
male
female
female
female
male
female
female
female
female
female
male
female
female
male
female
female
female
female
female
male
female
female
male
male
female
male
female
female
female
female
female
female
male
female
female
female
female
female
male
female
female
female
female
female
female
female
male
female
female
female
male
female
male
female
female
female
male
male
female
female
female
female
female
male
female
female
male
female
female
male
female
male
female
male
female
female
female
female
female
female
female
male
male
female
male
female
female
male
female
female
female
male
male
female
female
male
female
female
female
female
male
female
male
female
male
female
female
female
female
male
female
male
female
female
female
female
female
male
female
male
female
male
male
male
male
female
male
female
female
female
male
female
female
female
female
female
female
female
female
female
female
male
female
male
fem