# Gender Classification

This notebook will attempt to classify photos of people to determine whether they are male or female 

## Data input & cleansing


In [187]:
import numpy as np
import cv2
import glob
from sklearn.base import BaseEstimator, ClassifierMixin

train_images_path = "Face Database/TrainImages"
test_images_path = "Face Database/TestImages"

# Extract gray images from files
train_images = [cv2.cvtColor(cv2.imread(file), cv2.COLOR_BGR2GRAY) for file in glob.glob("{0}/*.jpg".format(train_images_path))]
test_images = [cv2.cvtColor(cv2.imread(file), cv2.COLOR_BGR2GRAY) for file in glob.glob("{0}/*.jpg".format(test_images_path))]

# Ensure image sizes are of the same dimension
# May result loss in accuracy, how to better handle this?
train_images = [img[:101, :101].reshape(101 * 101) if img.shape != (101, 101) else img.reshape(101 * 101) for img in train_images]
test_images = [img[:101, :101].reshape(101 * 101) if img.shape != (101, 101) else img.reshape(101 * 101) for img in test_images]


# Convert list to 2d ndarray
train_images = np.stack(train_images)
test_images = np.stack(test_images)

In [188]:
# Read ground-truth label
def extract_gender_classifier(file_name):
    file = open(file_name, "r")
    if file.mode == "r":
        contents = file.readlines()
        # Gender label in the 0th index (male: 1; female: 0)
        gender_classifier_str = contents[0]
        return int(gender_classifier_str[0])
    
train_images_label = [extract_gender_classifier(file) for file in glob.glob("{0}/*.att".format(train_images_path))]
test_images_label = [extract_gender_classifier(file) for file in glob.glob("{0}/*.att".format(test_images_path))]

## Rosenblatt's perceptron implementation

In [None]:
# Non-linear activiation function
def heaviside(x):
    return np.heaviside(x, 1).astype(np.int)


threshold = 0.001

class Rosenblatt(BaseEstimator, ClassifierMixin):
    def __init__(self):
        return
    
    def predict(self, X):
        return heaviside(X.dot(self.weights) + self.bias)
    
    """
    Input: 
    
    X: numpy 2D array. Each row represents one training example.
    y: numpy 1D array. Binary classification of each example.
    
    --------
    
    Output:
    
    self: Trained perceptron model
    
    """
    def fit(self, X, y, epochs=100):
        num_features = X.shape[1]
    
        self.weights = np.zeros((num_features, ))
        self.bias = 0.0
        
        for num_iter in range(epochs):
            # Current number of errors
            errors = 0
            
            for xi, y_true in zip(X, y):
                error = y_true - self.predict(xi)
                
                if abs(error) > threshold:
                    # Update weights accordingly
                    self.weights += error * xi
                    self.bias += error
                    
                    # Number of errors
                    errors += 1
            
            if errors == 0:
                break

        return self

### Training and testing Rosenblatt's perceptron

In [None]:
# Training and running model against train data
model = Rosenblatt()
model.fit(train_images, train_images_label, epochs=1000)
test_images_output = model.predict(test_images)

### Checking accuracy of Rosenblatt's perceptron model

In [None]:
test_images_label = np.array(test_images_label)

# Rosenblatt's perceptron accuracy rate
num_correct = np.where(test_images_output == test_images_label)[0].shape[0]

accuracy = num_correct / test_images_label.shape[0]

print(accuracy * 100)