In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
%matplotlib inline

We'll implement logistic regression to solve the gender voice classification problem.  
In this, the logistic unit can be considered as the most simplistic neural net with just one neuron.  
So we'll implement logistic regession using the forward + backprop philosophy of neural networks.   

Lets import the data into a dataframe first and have a look.

In [11]:
df = pd.read_csv('data/voice.csv')
df.head()

Unnamed: 0,meanfreq,sd,median,Q25,Q75,IQR,skew,kurt,sp.ent,sfm,...,centroid,meanfun,minfun,maxfun,meandom,mindom,maxdom,dfrange,modindx,label
0,0.059781,0.064241,0.032027,0.015071,0.090193,0.075122,12.863462,274.402906,0.893369,0.491918,...,0.059781,0.084279,0.015702,0.275862,0.007812,0.007812,0.007812,0.0,0.0,male
1,0.066009,0.06731,0.040229,0.019414,0.092666,0.073252,22.423285,634.613855,0.892193,0.513724,...,0.066009,0.107937,0.015826,0.25,0.009014,0.007812,0.054688,0.046875,0.052632,male
2,0.077316,0.083829,0.036718,0.008701,0.131908,0.123207,30.757155,1024.927705,0.846389,0.478905,...,0.077316,0.098706,0.015656,0.271186,0.00799,0.007812,0.015625,0.007812,0.046512,male
3,0.151228,0.072111,0.158011,0.096582,0.207955,0.111374,1.232831,4.177296,0.963322,0.727232,...,0.151228,0.088965,0.017798,0.25,0.201497,0.007812,0.5625,0.554688,0.247119,male
4,0.13512,0.079146,0.124656,0.07872,0.206045,0.127325,1.101174,4.333713,0.971955,0.783568,...,0.13512,0.106398,0.016931,0.266667,0.712812,0.007812,5.484375,5.476562,0.208274,male


Next, lets check for any null records.

In [12]:
np.where(pd.isnull(df))

(array([], dtype=int64), array([], dtype=int64))

Great!, there are no null records.  
Now for a bit of preprocessing.  
Lets first separate the features and labels.  

In [13]:
X = df.drop("label", axis=1)
Y = df["label"]

Normalize the features uing scikit-learn's standardscaler

In [14]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

Convert the labels into ones and zeroes as they are strings

In [21]:
gender_encoder = LabelEncoder()
Y = gender_encoder.fit_transform(Y)

All the steps above can be encapsulated in methods

In [35]:
def normalize_features(features):
    scaler = StandardScaler()
    scaler.fit(features)
    return scaler.transform(features)

In [36]:
def encode_gender(Y):
    gender_encoder = LabelEncoder()
    return gender_encoder.fit_transform(Y)

In [37]:
def read_data(path):
    df = pd.read_csv(path)
    X = df.drop("label", axis=1)
    Y = df["label"]
    X = normalize_features(X)
    Y = encode_gender(Y)
    Y = Y.reshape(Y.shape[0], 1)
    return X,Y

Lets test this all together now

X,Y = read_data('./data/voice.csv')
assert(X.shape == (3168, 20))
assert(Y.shape == (3168, 1))
assert(Y.dtype == np.dtype('int64'))

We now need to split the data into training and test sets.  
We'll use the excellent train_test_split method provided by scikit-learn.

In [39]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

Since we need to implement logistic regression from scratch, lets first define the sigmoid function.

In [40]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s

We need to initialize weights and the bias values to zeros.
So lets have a method for that.

In [42]:
def initialize_weight_and_bias(dimension):
    w = np.zeros((dimension,1))
    b = 0
    return w, b

Lets now implement forward prop which is essentially defined as:  
$A = \sigma(w^T X + b)$

In [43]:
def forward_prop(X, Y, w, b):
    return sigmoid(np.dot(w.T, X) + b)

The cost function is defined as:  

$J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)})$   

where m is the total number of samples

In [44]:
def compute_cost(A, Y, m):
    cost = (-1 / m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    return cost

In the backpropogation step, we'll calculate the gradients defines as:   

$$ \frac{\partial J}{\partial w} = \frac{1}{m}X(A-Y)^T\tag{7}$$
$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (a^{(i)}-y^{(i)})\tag{8}$$

In [45]:
def back_prop(X, A, Y, m):
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum(A - Y)
    return {"dw": dw, "db": db}

Now lets integrate all these steps together and implement gradient descent.

In [47]:
def optimize(w, b, X, Y, num_iterations, learning_rate):
    costs = []
    m = X.shape[1]

    for i in range(num_iterations):

        A = forward_prop(X, Y, w, b)
        cost = compute_cost(A, Y, m)
        grads = back_prop(X, A, Y, m)

        dw = grads["dw"]
        db = grads["db"]

        w = w - learning_rate * dw
        b = b - learning_rate * db

        if i % 100 == 0:
            costs.append(cost)


    params = {"w": w,
              "b": b}

    grads = {"dw": dw,
             "db": db}

    return params, grads, costs

We can now use the weights and bias learned to make predictions in our training and test sets.

In [48]:
def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)

    A = sigmoid(np.dot(w.T, X) + b)
    for i in range(A.shape[1]):

        if A[0, i] <= 0.5:
            Y_prediction[0, i] = 0
        else:
            Y_prediction[0, i] = 1

    return Y_prediction

Finally, we can hook this whole implementation as a model and run it for a specified number of iterations.

In [54]:
def model(X_train, Y_train, X_test, Y_test, num_iterations=100, learning_rate=0.005):

    w, b = initialize_weight_and_bias(X_train.shape[0])

    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate)

    w = parameters["w"]
    b = parameters["b"]

    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)

    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test,
         "Y_prediction_train": Y_prediction_train,
         "w": w,
         "b": b,
         "learning_rate": learning_rate,
         "num_iterations": num_iterations}
    return d

Lets run this model for 2000 iterations with a learning rate of 0.01

In [53]:
result = model(X_train.T, Y_train.T, X_test.T, Y_test.T, num_iterations = 2000, learning_rate = 0.01)

train accuracy: 96.36937647987372 %
test accuracy: 97.94952681388013 %
