# Gender Classification Using Logistic Regression

The dataset can be found at -  [Elakiricode - Github](https://github.com/elakiricoder/gender_classification_Logistic_Regression)

## Imports

In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Read File

In [65]:
df = pd.read_csv('male_female_v7.csv')

In [66]:
df.head()

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,Male
0,1,11.8,6.1,1,0,1,1,1
1,0,14.0,5.4,0,0,1,0,0
2,0,11.8,6.3,1,1,1,1,1
3,0,14.4,6.1,0,1,1,1,1
4,1,13.5,5.9,0,0,0,0,0


In [67]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5001 entries, 0 to 5000
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   long_hair                  5001 non-null   int64  
 1   forehead_width_cm          5001 non-null   float64
 2   forehead_height_cm         5001 non-null   float64
 3   nose_wide                  5001 non-null   int64  
 4   nose_long                  5001 non-null   int64  
 5   lips_thin                  5001 non-null   int64  
 6   distance_nose_to_lip_long  5001 non-null   int64  
 7   Male                       5001 non-null   int64  
dtypes: float64(2), int64(6)
memory usage: 312.7 KB


## Split the Data

In [68]:
X = df.drop('Male', axis=1)
y = df['Male']

In [69]:
X.shape

(5001, 7)

In [70]:
y.shape

(5001,)

In [71]:
from sklearn.model_selection import train_test_split

In [72]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [73]:
from collections import Counter
print(Counter(y_train))
print(Counter(y_test))

Counter({0: 1693, 1: 1657})
Counter({1: 843, 0: 808})


In [74]:
X_train.shape

(3350, 7)

In [75]:
y_train.shape

(3350,)

In [76]:
X_test.shape

(1651, 7)

In [77]:
y_test.shape

(1651,)

## Train the Data

In [82]:
from sklearn.linear_model import LogisticRegression

In [83]:
lgmodel = LogisticRegression()

In [84]:
lgmodel.fit(X_train,y_train)

LogisticRegression()

## Prediction

In [85]:
predictions = lgmodel.predict(X_test)

## Evaluate the Model

In [86]:
from sklearn.metrics import classification_report

In [87]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96       808
           1       0.96      0.96      0.96       843

    accuracy                           0.96      1651
   macro avg       0.96      0.96      0.96      1651
weighted avg       0.96      0.96      0.96      1651



## Test the Model

In [90]:
gender_index = 200

y_test_np = np.array(y_test)

if (y_test_np[gender_index] == 1) and (predictions[gender_index] == 1):
    print('Actual Gender - Male ----- Predicted Gender - Male')
elif (y_test_np[gender_index] == 1) and (predictions[gender_index] == 0):
    print('Actual Gender - Male ----- Predicted Gender - Female')
elif (y_test_np[gender_index] == 0) and (predictions[gender_index] == 1):
    print('Actual Gender - Female ----- Predicted Gender - Male')
else:
    print('Actual Gender - Female ----- Predicted Gender - Female') 

Actual Gender - Female ----- Predicted Gender - Male
