In [277]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import KFold

%matplotlib inline

In [278]:
class Loss(object):
    
    def __call__(self, predicted, actual):
        """Calculates the loss as a function of the prediction and the actual.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (float) 
          The value of the loss for this batch of observations.
        """
        raise NotImplementedError
        
    def derivative(self, predicted, actual):
        """The derivative of the loss with respect to the prediction.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (np.ndarray, float) 
          The derivatives of the loss.
        """
        raise NotImplementedError
        
        
class SquaredErrorLoss(Loss):
    
    def __call__(self, predicted, actual):
        return 0.5*np.sum(
            np.multiply((predicted - actual), (predicted - actual))
        )
    
    def delta(self, z_prime, predicted, actual):
        return (
            np.multiply((predicted - actual), z_prime.T)
        )

class crossEntropy(Loss):
    
    def __call__(self, predicted, actual):
        return np.sum(
            np.nan_to_num(np.multiply(-actual,np.log(predicted))-np.multiply((1-actual),np.log(1-predicted)))
        )
    
    def delta(self, z_prime, predicted, actual):
        return (
            (predicted-actual)
        )

In [279]:
class ActivationFunction(object):
        
    def __call__(self, a):
        """Applies activation function to the values in a layer.
        
        Args:
          a (np.ndarray, float): the values from the previous layer (after 
            multiplying by the weights.
          
        Returns: (np.ndarray, float) 
          The values h = g(a).
        """
        return a
    
    def derivative(self, h):
        """The derivatives as a function of the outputs at the nodes.
        
        Args:
          h (np.ndarray, float): the outputs h = g(a) at the nodes.
          
        Returns: (np.ndarray, float) 
          The derivatives dh/da.
        """
        return np.ones(h.shape)
    
class ReLU(ActivationFunction):
    
    def __call__(self, a):
        return np.where(a > 0, a, 0)
    
    def derivative(self, a):
        return np.where(a > 0, 1, 0)

class Sigmoid(ActivationFunction):
    
    def __call__(self, a):
        return 1/(1 + np.exp(-a))
    
    def derivative(self, a):
        e = self.__call__(a)
        return  np.multiply(e, (1 - e))

In [280]:
class Layer(object):
    """A data structure for a layer in a neural network.
    
    Attributes:
      num_nodes (int): number of nodes in the layer
      activation_function (ActivationFunction)
      values_pre_activation (np.ndarray, float): most recent values
        in layer, before applying activation function
      values_post_activation (np.ndarray, float): most recent values
        in layer, after applying activation function
    """
    
    def __init__(self, num_nodes, activation_function=ActivationFunction()):
        self.num_nodes = num_nodes
        self.activation_function = activation_function
        
    def get_layer_values(self, values_pre_activation):
        """Applies activation function to values from previous layer.
        
        Stores the values (both before and after applying activation 
        function)
        
        Args:
          values_pre_activation (np.ndarray, float): 
            A (batch size) x self.num_nodes array of the values
            in layer before applying the activation function
        
        Returns: (np.ndarray, float)
            A (batch size) x self.num_nodes array of the values
            in layer after applying the activation function
        """
        self.values_pre_activation = values_pre_activation
        self.values_post_activation = self.activation_function(
            values_pre_activation
        )
        return self.values_post_activation
    
    def get_layer_derivatives(self, values_pre_activation):
        return self.activation_function.derivative(
            values_pre_activation
        )

In [281]:
class FullyConnectedNeuralNetwork(object):
    """A data structure for a fully-connected neural network.
    
    Attributes:
      layers (Layer): A list of Layer objects.
      loss (Loss): The loss function to use in training.
      learning_rate (float): The learning rate to use in backpropagation.
      weights (list, np.ndarray): A list of weight matrices,
        length should be len(self.layers) - 1
      biases (list, float): A list of bias terms,
        length should be equal to len(self.layers)
    """
    
    def __init__(self, layers, loss, learning_rate):
        self.layers = layers
        self.loss = loss
        self.learning_rate = learning_rate
        
        # initialize weight matrices and biases to zeros
        self.weights = []
        self.updatedWeights = []
        self.biases = []
        self.updatedBiases = []
        mu, sigma = 0, 1
        for i in range(1, len(self.layers)):
            w = np.matrix(np.random.normal(mu, sigma, (self.layers[i - 1].num_nodes, self.layers[i].num_nodes))/np.sqrt(self.layers[i - 1].num_nodes))
            self.weights.append(w)
            self.updatedWeights.append(w)
            self.biases.append(np.zeros(self.layers[i].num_nodes))
            self.updatedBiases.append(np.zeros(self.layers[i].num_nodes))
        
    def feedforward(self, inputs):
        """Predicts the output(s) for a given set of input(s).
        
        Args:
          inputs (np.ndarray, float): A (batch size) x self.layers[0].num_nodes array
          
        Returns: (np.ndarray, float) 
          An array of the predicted output labels, length is the batch size
        """
        self.storedValuesZ = [inputs]
        self.storedValuesA = [inputs]
        a = inputs
        
        ## Iterate layers
        for i, layer in enumerate(self.layers):
            ## g(hw + b),  h = previous layer values
            if i != len(self.layers) - 1:
                z = np.matrix(np.add(a * self.weights[i], np.matrix(self.biases[i])))
                self.storedValuesZ.append(z)
                a = np.matrix(self.layers[i + 1].get_layer_values(z))
                self.storedValuesA.append(a)
        return a
    
    def predict(self, inputs):
        h = inputs
        ## Iterate layers
        for i, layer in enumerate(self.layers):
            ## g(hw + b),  h = previous layer values
            if i != len(self.layers) - 1:
                a = np.matrix(np.add(h * self.updatedWeights[i], np.matrix(self.updatedBiases[i])))
                h = self.layers[i+1].get_layer_values(a)
        return h
    
    def backProp(self, predicted, actual):
        gradient_b = [np.zeros(b.shape) for b in self.biases]
        gradient_w = [np.zeros(w.shape) for w in self.weights]
        # Update First weights
        z_prime = 0
        if (type(self.loss) == type(SquaredErrorLoss())):
            z_prime = self.layers[-1].get_layer_derivatives(self.storedValuesZ[-1]).T
        delta = self.loss.delta(z_prime, predicted, actual).T
        dLdw = np.multiply(delta,self.storedValuesA[-2]).T
        
        gradient_b[-1] = delta.T
        gradient_w[-1] = dLdw
        #self.updatedWeights[-1]= self.weights[-1] - self.learning_rate * dLdw
        #self.updatedBiases[-1] = self.biases[-1] - np.multiply(self.learning_rate, delta).T
        
        # Update rest of the weights
        for l in range(2, len(self.layers)):
            z = self.storedValuesZ[-l]
            dadz = self.layers[-l].get_layer_derivatives(z)
            delta = np.multiply(self.weights[-l + 1] * delta, dadz.T)
            dLdw = np.dot(delta, self.storedValuesA[-l - 1])
            
            gradient_b[-l] = delta.T
            gradient_w[-l] = dLdw
            #self.updatedBiases[-l] = self.biases[-l] - np.multiply(self.learning_rate, delta).T
            #self.updatedWeights[-l] = self.weights[-l] - np.multiply(self.learning_rate, np.dot(delta, self.storedValuesA[-l - 1])).T
        return (gradient_b, gradient_w)
        
    def train(self, inputs, labels):
        """Trains neural network based on a batch of training data.
        
        Args:
          inputs (np.ndarray): A (batch size) x self.layers[0].num_nodes array
          labels (np.ndarray): An array of ground-truth output labels, 
            length is the batch size.
        """
        predicted = self.feedforward(inputs)
        gradient_b, gradient_w = self.backProp(predicted, labels)
        return (gradient_b, gradient_w, self.loss(predicted,labels))
    
    def train_epochs_minibatch(self, inputs, labels, epochs = 10, mini_batch=1):
        '''
        Args:
          inputs (np.ndarray): A x self.layers[0].num_nodes array
          labels (np.ndarray): An array of ground-truth output labels, 
            length is the inputs size.
          epochs (int): Number of times the data is iterated through
          mini_batch (int): Number of observations to train at a time
        '''
        if (inputs.shape[0] < mini_batch):
            mini_batch = inputs.shape[0]
        epochsLosses = []
        for i in range(epochs):
            sum_gradient_b = [np.zeros(b.shape) for b in self.biases]
            sum_gradient_w = [np.zeros(w.shape) for w in self.weights]
            sumLoss = 0
            randomIndices = np.random.choice([i for i in range(len(inputs))], size=len(inputs), replace=False)
            for index, row in enumerate(randomIndices):
                gradient_b, gradient_w, loss = self.train(np.matrix([inputs[row]]),np.matrix([labels[row]]))
                sumLoss += loss
                sum_gradient_b = [sb+gb for sb, gb in zip(sum_gradient_b,gradient_b)]
                sum_gradient_w = [sw+gw for sw, gw in zip(sum_gradient_w, gradient_w)]
                if ((index+1)%mini_batch == 0):
                    mini_batch_avg_loss = sumLoss/mini_batch
                    print('Avg loss for mini_batch', mini_batch_avg_loss)
                    epochsLosses.append(mini_batch_avg_loss)
                    self.biases = [b-(self.learning_rate/mini_batch)*sb
                                      for b, sb in zip(self.biases, sum_gradient_b)]
                    self.weights = [w-(self.learning_rate/mini_batch)*sw
                                     for w, sw in zip(self.weights, sum_gradient_w)]
                    sum_gradient_b = [np.zeros(b.shape) for b in self.biases]
                    sum_gradient_w = [np.zeros(w.shape) for w in self.weights]
        return (epochsLosses)

In [352]:
# Sigmoid must be applied to the last layer inorder for it to work
network = FullyConnectedNeuralNetwork(
    layers=[Layer(1), Layer(1), Layer(3, Sigmoid())],
    loss = crossEntropy(),
    learning_rate=0.005
)

In [355]:
network.feedforward(np.matrix([[1]]))

matrix([[ 0.9501245 ,  0.90999591,  0.58444958]])

In [354]:
epochsLosses = network.train_epochs_minibatch(np.array([[1]]), np.array([[1, 1, .5]]),epochs = 1000, mini_batch=2)

Avg loss for mini_batch 1.99276446027
Avg loss for mini_batch 1.99003223388
Avg loss for mini_batch 1.98730579639
Avg loss for mini_batch 1.9845851343
Avg loss for mini_batch 1.98187023419
Avg loss for mini_batch 1.97916108265
Avg loss for mini_batch 1.97645766637
Avg loss for mini_batch 1.97375997205
Avg loss for mini_batch 1.97106798646
Avg loss for mini_batch 1.9683816964
Avg loss for mini_batch 1.96570108873
Avg loss for mini_batch 1.96302615036
Avg loss for mini_batch 1.96035686824
Avg loss for mini_batch 1.95769322937
Avg loss for mini_batch 1.95503522079
Avg loss for mini_batch 1.9523828296
Avg loss for mini_batch 1.94973604293
Avg loss for mini_batch 1.94709484798
Avg loss for mini_batch 1.94445923195
Avg loss for mini_batch 1.94182918214
Avg loss for mini_batch 1.93920468586
Avg loss for mini_batch 1.93658573046
Avg loss for mini_batch 1.93397230336
Avg loss for mini_batch 1.93136439201
Avg loss for mini_batch 1.92876198389
Avg loss for mini_batch 1.92616506654
Avg loss for mi

In [294]:
import pandas as pd

In [274]:
test = pd.read_csv("train.csv")

In [275]:
test.head()

Unnamed: 0,ABV,appearance,aroma,overall,palate,taste,userBias,word_count,char_count,avg_word_len,...,Slovakia,Spain,Sri Lanka,Sweden,Switzerland,"Taiwan, Province of China",Thailand,Togo,United Kingdom,United States
0,0.092014,0.7,0.75,1.0,0.75,0.875,0.745,0.135593,0.135916,0.31262,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.192708,0.8,1.0,1.0,1.0,1.0,0.76625,0.238418,0.232877,0.270866,...,,,,,,,,,,
2,0.111111,0.8,0.875,1.0,0.875,0.875,0.775,0.114124,0.119221,0.425918,...,,,,,,,,,,
3,0.123264,0.8,0.875,1.0,0.75,0.875,0.711,0.213559,0.221747,0.344344,...,,,,,,,,,,
4,0.175347,0.8,0.875,1.0,0.875,0.875,0.755556,0.211299,0.209332,0.288806,...,,,,,,,,,,


In [276]:
test.columns

Index(['ABV', 'appearance', 'aroma', 'overall', 'palate', 'taste', 'userBias',
       'word_count', 'char_count', 'avg_word_len',
       ...
       'Slovakia', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland',
       'Taiwan, Province of China', 'Thailand', 'Togo', 'United Kingdom',
       'United States'],
      dtype='object', length=185)

In [40]:
def makeXandY(data, dependentNames, independentName, intercept):
    X = np.array(data[dependentNames])
    if (intercept):
        X = np.insert(X, 0, 1, axis=1)
    y = np.array(data[independentName])
    return X, y

In [41]:
x, y = makeXandY(test, ['ABV'], ['appearance'], False)

In [42]:
tempX = x[:100]
tempY = y[:100]
network.train_epochs(tempX, tempY)

a [[ 0.11979167]]
a [[ 0.1986624]]
Weights [matrix([[ 1.65839914]]), matrix([[ 0.19359448, -1.671152  , -0.08013476]])]
Weight gradient [array([[ 0.]]), array([[ 0.,  0.,  0.]])]
a [[ 0.20659722]]
a [[ 0.34262066]]
Weights [matrix([[ 1.65839914]]), matrix([[ 0.2017613 , -1.66166204, -0.07284088]])]
Weight gradient [array([[ 0.]]), array([[ 0.,  0.,  0.]])]
a [[ 0.16145833]]
a [[ 0.26776236]]
Weights [matrix([[ 1.65839914]]), matrix([[ 0.21290338, -1.64376113, -0.06244882]])]
Weight gradient [array([[ 0.]]), array([[ 0.,  0.,  0.]])]
a [[ 0.18229167]]
a [[ 0.30231234]]
Weights [matrix([[ 1.65839914]]), matrix([[ 0.22267676, -1.63086152, -0.05350482]])]
Weight gradient [array([[ 0.]]), array([[ 0.,  0.,  0.]])]
a [[ 0.14583333]]
a [[ 0.24184987]]
Weights [matrix([[ 1.65839914]]), matrix([[ 0.23300364, -1.61622616, -0.04391991]])]
Weight gradient [array([[ 0.]]), array([[ 0.,  0.,  0.]])]
a [[ 0.17534722]]
a [[ 0.29079568]]
Weights [matrix([[ 1.65839914]]), matrix([[ 0.24098468, -1.606338

In [60]:
type(tempX)

numpy.ndarray

In [73]:
mB = 2
for index, i in enumerate(range(10)):
    if ((index+1) % mB == 0):
        print('update batch')
    print(index+1)
    print((index+1) % mB)
    

1
1
update batch
2
0
3
1
update batch
4
0
5
1
update batch
6
0
7
1
update batch
8
0
9
1
update batch
10
0


In [68]:
1 % 1

0

In [300]:
network.feedforward(np.matrix(x[0]))

matrix([[ 0.87188217]])

In [140]:
np.random.randn(2,3)

array([[ 0.53518532,  0.2082547 ,  1.55785103],
       [-0.12484475, -0.74005127,  0.33178394]])