In [141]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

###  Perceptron with Iris dataset

In [291]:
iris = datasets.load_iris()
X = iris.data
y = iris.target
y_names = iris.target_names
y_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

We will be classifying the iris dataset as "setosa" (1) and "not setosa" (0)

In [292]:
new_classes = []
for i in y:
    if i == 0:
        new_classes.append(1)
    else:
        new_classes.append(0)
y = np.array(new_classes)

In [293]:
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [339]:
def predict_perceptron(input_vector, weights):
    """
    Perceptron prediction function that will return a positive or negative classification
    
    input: 
        input_vector: numpy array; vector of independent variables
        weights: iterable array object; repspective weights per input variable
    output:
        activation: binary; positive or negative classification
    """
    # insert 1 to account for bias (1*bias in weight vector will have no effect)
    input_vector = np.insert(input_vector,0,1)
    # summation of input vector * weights
    dot_prod = np.dot(input_vector, weights)
    # if activation > 0 than postive classification, otherwise negative classification
    activation = 1 if dot_prod > 0 else 0
    return activation
    
def train_perceptron(input_vector, weights, labels, epochs=100, learning_rate=0.1):
    """
    Perceptron training function that adjusts the weights and bias terms
    
    input:
        input_vector: numpy array; matrix or numpy array of independent variables
        weights: list; list of weights, one per independent variable plus 1 bias term
        labels: list; the correct labels for the data
        epochs: int; the number of iterations you'd like to train
        learning_rate: float; the amount to "learn" each iteration
    output:
        weights: list; list of trained weights for the perceptron
    """
    for _ in range(epochs):
        for inp, lab in zip(input_vector, labels):
            pred = predict_perceptron(inp, weights)
            weights[1:] += learning_rate * (lab - pred) * inp
            weights[0] += learning_rate * (lab - pred)
    return weights

In [340]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [341]:
# train weights on train set

# set weights to 0 to start (including bias term)
init_weights = [0 for i in range(len(X[0])+1)]
init = [X_train, init_weights, y_train, 1000, 0.2]
weights = train_perceptron(*init)
print(weights)

[0.2, 0.3800000000000001, 1.2400000000000002, -1.6400000000000001, -0.7]


In [342]:
# test accuracy on training set

predictions = []
for row in X_train:
    pred = predict_perceptron(row, weights)
    predictions.append(pred)
    
class_report = classification_report(y_train, predictions)
conf_report = confusion_matrix(y_train, predictions)
print(class_report)
print(
    pd.DataFrame(
        conf_report
        , columns=['pred not-versicolor','pred versicolor']
        , index=['actual not-versicolor','actual versicolor']
    )
)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        69
           1       1.00      1.00      1.00        31

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100

                       pred not-versicolor  pred versicolor
actual not-versicolor                   69                0
actual versicolor                        0               31


In [343]:
# test accuracy on test set

predictions = []
for row in X_test:
    pred = predict_perceptron(row, weights)
    predictions.append(pred)
    
class_report = classification_report(y_test, predictions)
conf_report = confusion_matrix(y_test, predictions)
print(class_report)
print(
    pd.DataFrame(
        conf_report
        , columns=['pred not-versicolor','pred versicolor']
        , index=['actual not-versicolor','actual versicolor']
    )
)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       1.00      1.00      1.00        19

    accuracy                           1.00        50
   macro avg       1.00      1.00      1.00        50
weighted avg       1.00      1.00      1.00        50

                       pred not-versicolor  pred versicolor
actual not-versicolor                   31                0
actual versicolor                        0               19


###### Conclusion: The Perceptron did a perfect job at classifying setosa iris'

### Single Layer NN with Iris

###### NOTE: Professor, I tried hard to code this from scratch, and I followed several blog posts closely, but I'm afraid its just not quite clicked for me. I have to admit defeat.

In [445]:
from sklearn.metrics import log_loss

In [446]:
# activation function
def sigmoid(x, deriv=False):
    if deriv == True:
        return x * (1 - x)
    return 1/(1+np.exp(-x))

# forward propogation
def forward(inputs, weights):
    return sigmoid(np.dot(inputs, weights))

# back propogation
def backprop(inputs, hidden, labels, weights):
    error = labels - hidden
    delta = error * sigmoid(hidden, deriv=True)
    weights += np.dot(inputs.T, delta)
    return weights

# train nn
def train_nn(inputs, init_weights, labels, epochs=10000):
    hidden = forward(inputs, init_weights)
    weights = init_weights
    for _ in range(epochs):
        weights = backprop(inputs, hidden, labels, weights)
        hidden = forward(inputs, weights)
    return weights
    
# predict new
def predict_nn(inputs, trained_weights):
    pred = sigmoid(np.dot(inputs, weights))
    return pred

In [447]:
init_weights = np.array([0.0,0.0,0.0,0.0,0.0])
inputs = np.hstack((np.array(np.ones(100)).reshape(100,1),X_train))

##### Round 2....

resource: https://towardsdatascience.com/neural-network-on-iris-data-4e99601a42c8

In [607]:
def define_input(X, y, hidden=6):
    """
    input:
        X: numpy array
        y: numpy array
    output:
        W1, W2: initial random weights for input layer and hidden layer
        b1, b2: initial zeroed bias terms for input and hidden layer
    """
    hidden_layer_size = hidden
    # W1 = np.zeros(( hidden_layer_size, X_train.shape[1] ))
    W1 = np.random.randn( hidden_layer_size, X.shape[1] ) * 0.01
    b1 = np.zeros(( hidden_layer_size, 1 ))
    # W2 = np.zeros(( y_train[1], hidden_layer_size ))
    W2 = np.random.randn( 1, hidden_layer_size ) * 0.01
    b2 = np.zeros(( 1, 1))
    return W1, W2, b1, b2

In [608]:
# activation and tanh functions for hidden and output layers

def sigmoid(x):
    return 1/(1+np.exp(-x))

def tanh(x):
    return np.tanh(x)

In [609]:
def forward_prop(X, W1, W2, b1, b2):
    # dot prod of first layer
    dp1 = np.dot(W1, X.T) + b1
    # activation of first layer
    act1 = tanh(dp1)
    # dot prod of second layer
    dp2 = np.dot(W2, act1) + b2
    # activation of second layer
    act2 = sigmoid(-dp2)
    return dp1, dp2, act1, act2

In [610]:
def cost(y, W1, W2, act2):
    W1=W1
    W2=W2
    m = y.shape[0]

    logprobs = np.multiply(np.log(act2), y) + np.multiply((1 - y), np.log(1 - act2))
    cost = - np.sum(logprobs) / m
    return cost

In [611]:
def backprop(X, y, W1, W2, act1, act2):
    m = X.shape[0]
    
    dW2 = (1 / m) * np.dot((act2-y), act1.T)
    db2 = (1 / m) * np.sum((act2-y), axis=1, keepdims=True)
    dZ1 = (np.multiply(np.dot(W2.T, (act2-y)), 1 - np.power(act1, 2)))
    dW1 = (1 / m) * np.dot(dZ1, X)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, dW2, db1, db2

In [612]:
def update_weights(W1, W2, dW1, dW2, b1, b2, db1, db2, learning_rate=0.1):
    W1 = W1 - learning_rate * dW1
    W2 = W2 - learning_rate * dW2
    b1 = b1 - learning_rate * db1
    b2 = b2 - learning_rate * db2
    return W1, W2, b1, b2

In [618]:
def nn_model(X, y, epochs=1000):
    W1, W2, b1, b2 = define_input(X, y)
    for _ in range(epochs):
        dp1, dp2, act1, act2 = forward_prop(X, W1, W2, b1, b2)
#         cost = cost(y, W1, W2, act2)
        dW1, dW2, db1, db2 = backprop(X, y, W1, W2, act1, act2)
        W1, W2, b1, b2 = update_weights(W1, W2, dW1, dW2, b1, b2, db1, db2)
    return W1, W2, b1, b2

In [620]:
W1, W2, b1, b2 = nn_model(X_train, y_train)

In [624]:
# dp1, dp2, act1, act2
forward_prop(X_train[0], W1, W2, b1, b2)[3]

array([[1.00000000e+00, 4.52892280e-30, 1.00000000e+00, 4.52892962e-30,
        1.00000000e+00, 1.00000000e+00]])