## B. Neural Network: MultiClass Classification


Modify the previous architecture to model multi-class classification task. Test your architecture on the **Statlog (Vehicle Silhouettes)** Data Set ('Vehicles.csv'). Save your solution as a seperate notebook file with appropriate filename.

**Note:**

1. Perform the train/validate/test split as 70/15/15.
2. Use Random seed as '777' wherever needed.
3. Report appropriate measures in addition to accuracy and also plot the confusion matrix.

More details on the dataset can be found at: https://archive.ics.uci.edu/ml/datasets/Statlog+%28Vehicle+Silhouettes%29


In [950]:
# Package imports
import numpy as np
import sklearn
import sklearn.linear_model
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
import matplotlib.pyplot as plt

%matplotlib inline

np.random.seed(777)


In [951]:
# splits the data into train, val, test

def split_stratified_into_train_val_test(df_input, stratify_colname='y',
                                         frac_train=0.6, frac_val=0.15, frac_test=0.25,
                                         random_state=None):

    if frac_train + frac_val + frac_test != 1.0:
        raise ValueError('fractions %f, %f, %f do not add up to 1.0' %
                         (frac_train, frac_val, frac_test))

    if stratify_colname not in df_input.columns:
        raise ValueError('%s is not a column in the dataframe' %
                         (stratify_colname))

    X = df_input.drop(columns=[stratify_colname])  # Contains all columns.
    # Dataframe of just the column on which to stratify.
    y = df_input[[stratify_colname]]

    # Split original dataframe into train and temp dataframes.
    df_train, df_temp, y_train, y_temp = train_test_split(X,
                                                          y,
                                                          stratify=y,
                                                          test_size=(
                                                              1.0 - frac_train),
                                                          random_state=random_state)

    # Split the temp dataframe into val and test dataframes.
    relative_frac_test = frac_test / (frac_val + frac_test)
    df_val, df_test, y_val, y_test = train_test_split(df_temp,
                                                      y_temp,
                                                      stratify=y_temp,
                                                      test_size=relative_frac_test,
                                                      random_state=random_state)

    assert len(df_input) == len(df_train) + len(df_val) + len(df_test)

    return df_train, df_val, df_test, y_train, y_val, y_test


### Encode each class of column bus with a unique int


In [952]:
import pandas as pd
np.random.seed(777)
df = pd.read_csv('..//Data//Vehicles.csv')
df


Unnamed: 0,100,36,73,199,73.1,6,162,40,20,127,189,401,125,72,6.1,19,200,204,bus
0,91,36,72,162,60,8,150,44,19,133,166,334,121,63,2,22,196,205,saab
1,91,41,64,148,61,8,129,51,18,142,161,249,153,68,6,12,194,201,van
2,86,39,58,125,55,5,117,57,17,134,140,204,148,69,7,6,190,194,van
3,95,53,95,202,65,10,193,34,22,160,220,559,237,71,3,2,188,196,saab
4,91,43,72,142,56,7,149,45,19,140,168,327,165,72,13,23,186,191,saab
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
840,80,37,57,116,55,6,125,54,18,125,142,229,132,81,8,5,178,184,opel
841,104,55,107,222,68,11,218,31,24,173,232,703,229,71,3,10,188,199,saab
842,94,38,84,158,55,9,169,39,20,130,196,430,155,69,9,15,190,195,opel
843,104,52,100,191,59,9,197,33,23,158,218,583,234,70,10,10,191,198,saab


Encode cat variables


In [953]:
print(df['bus'].unique())
df['bus'] = pd.factorize(df['bus'])[0]
df['bus']


['saab' 'van' 'bus' 'opel']


0      0
1      1
2      1
3      0
4      0
      ..
840    3
841    0
842    3
843    0
844    1
Name: bus, Length: 845, dtype: int64

In [954]:
X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(df, stratify_colname='bus',
                                                                                      frac_train=0.7, frac_val=0.15, frac_test=0.15,
                                                                                      random_state=None)
X_train, X_val, X_test, y_train, y_val, y_test = X_train.T.to_numpy(), X_val.T.to_numpy(
), X_test.T.to_numpy(), y_train.T.to_numpy(), y_val.T.to_numpy(), y_test.T.to_numpy()
X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape


((18, 591), (18, 127), (18, 127), (1, 591), (1, 127), (1, 127))

In [955]:
trainx, valx, testx, trainy,valy,testy = X_train.T, X_val.T, X_test.T, y_train.T, y_val.T, y_test.T


In [956]:
def model_architecture(X, Y):
    n_x = X.shape[1]  # size of input layer
    n_h = len(np.unique(Y))
    n_y = 1
    return (n_x, n_h, n_y)


In [957]:
def initialize_parameters(n_x, n_h, n_y):

    ### START CODE HERE ###
    W1 = np.random.randn(n_x, n_h) * 0.01
    b1 = np.zeros((1, n_h))
    ### END CODE HERE ###
    
    parameters = {"W1": W1,
                  "b1": b1
                  }

    return parameters


In [958]:
def softmax(x):
    print(x.shape)
    exp_scores = np.exp(x)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return probs

In [959]:
def cross_entropy_loss(probs, y):
    correct_logprobs = -np.log(probs[range(y.shape[0]),y])
    return np.sum(correct_logprobs)/y.shape[0]

In [960]:
a  = np.array([[.10,.20,.70], [.10,.60,.30]])
y = np.array([2,2])

correct_logprobs = -np.log(a[range(y.shape[0]),y])
np.sum(correct_logprobs)/y.shape[0]


0.7803238741323343

In [961]:
def forward_propagation(X, parameters, a1=softmax):
    # Retrieve each parameter from the dictionary "parameters"
    ### START CODE HERE ###
    W1 = parameters['W1']
    b1 = parameters['b1']
    ### END CODE HERE ###

    # Implement Forward Propagation to calculate A2 (probabilities)
    ### START CODE HERE ##
    Z1 = np.dot(X, W1) + b1
    A1 = softmax(Z1)
    
    ### END CODE HERE ###
    cache = {"Z1": Z1,
             "A1": A1}

    return A1, cache


In [962]:
def compute_loss(A1, Y, loss_function=cross_entropy_loss):
    # Compute the cross-entropy loss
    ### START CODE HERE ###
    loss = loss_function(A1, Y)
    ### END CODE HERE ###
    loss = float(np.squeeze(loss))
    assert(isinstance(loss, float))

    return loss


In [963]:
def backprop(parameters, cache, X, Y):

    # First, retrieve W1 and W2 from the dictionary "parameters".
    ### START CODE HERE ###
    W1 = parameters['W1']
    ### END CODE HERE ###

    # Retrieve also A1 and A2 from dictionary "cache".
    ### START CODE HERE ###
    A1 = cache['A1']
    ### END CODE HERE ###

    # Backward propagation: calculate dW1, db1, dW2, db2.
    ### START CODE HERE ###
    dscores = A1
    dscores[range(Y.shape[0]),Y] -= 1
    dscores /= Y.shape[0]
    dW1 = np.dot(X.T, dscores)
    db1 = np.sum(dscores, axis=0, keepdims=True)
    
    ### END CODE HERE ###

    grads = {"dW1": dW1,
             "db1": db1,
            }
    return grads


In [964]:
def update(parameters, grads, learning_rate=0.01):

    # Retrieve each parameter from the dictionary "parameters"
    ### START CODE HERE ###
    W1 = parameters['W1']
    b1 = parameters['b1']
    ### END CODE HERE ###

    # Retrieve each gradient from the dictionary "grads"
    ### START CODE HERE ###
    dW1 = grads['dW1']
    db1 = grads['db1']
    ## END CODE HERE ###

    # Update rule for each parameter
    ### START CODE HERE ###
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    ### END CODE HERE ###
    
    parameters = {"W1": W1,
                  "b1": b1
    }
    return parameters


In [965]:
def NeuralNetwork(X, Y, n_h, num_iterations=10000, learning_rate=0.01, print_loss=False, a1=softmax, loss_function=cross_entropy_loss):

    np.random.seed(3)
    n_x = model_architecture(X, Y)[0]
    n_y = model_architecture(X, Y)[2]

    # Initialize parameters
    ### START CODE HERE ###
    parameters = initialize_parameters(n_x, n_h, n_y)
    ### END CODE HERE ###

    # Loop (gradient descent)
    for i in range(0, num_iterations):

        ### START CODE HERE ###
        # Forward propagation. Inputs: "X, parameters". Outputs: "A1, cache".
        A1, cache = forward_propagation(X, parameters, a1=a1)
        
        # loss function. Inputs: "A2, Y, parameters". Outputs: "loss".
        loss = compute_loss(A1, Y, loss_function=loss_function)

        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backprop(parameters, cache, X, Y)

        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update(parameters, grads)

        ### END CODE HERE ###

        # Print the loss every 100 iterations
        if print_loss and i % 100 == 0:
            print("loss after iteration %i: %f" % (i, loss))

    return parameters


In [966]:
def predict(parameters, X):

    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    ### START CODE HERE ###
    A1, cache = forward_propagation(X, parameters)
    predictions = np.argmax(A1.T, axis=1)
    ### END CODE HERE ###

    return predictions


In [967]:
parameters = NeuralNetwork(trainx, trainy, 4, num_iterations = 3, learning_rate = 0.01, print_loss=False)

(591, 4)
(591, 4)
(591, 4)


  exp_scores = np.exp(x)
  probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
  correct_logprobs = -np.log(probs[range(y.shape[0]),y])


In [968]:
parameters

{'W1': array([[nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan]]),
 'b1': array([[nan, nan, nan, nan]])}

In [969]:
predictions = predict(parameters, trainx)
predictions


(591, 4)


array([0, 0, 0, 0], dtype=int64)