In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import KFold

%matplotlib inline

In [98]:
class Loss(object):
    
    def __call__(self, predicted, actual):
        """Calculates the loss as a function of the prediction and the actual.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (float) 
          The value of the loss for this batch of observations.
        """
        raise NotImplementedError
        
    def derivative(self, predicted, actual):
        """The derivative of the loss with respect to the prediction.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (np.ndarray, float) 
          The derivatives of the loss.
        """
        raise NotImplementedError
        
        
class SquaredErrorLoss(Loss):
    
    def __call__(self, predicted, actual):
        return 0.5*np.sum(
            (predicted - actual) ** 2
        )
    
    def delta(self, z_prime, predicted, actual):
        return (
            np.multiply((predicted - actual), z_prime.T)
        )

class crossEntropy(Loss):
    
    def __call__(self, predicted, actual):
        return np.sum(
            np.nan_to_num(-actual*np.log(predicted)-(1-actual)*np.log(1-predicted))
        )
    
    def delta(self, z_prime, predicted, actual):
        return (
            (predicted-actual)
        )

In [99]:
class ActivationFunction(object):
        
    def __call__(self, a):
        """Applies activation function to the values in a layer.
        
        Args:
          a (np.ndarray, float): the values from the previous layer (after 
            multiplying by the weights.
          
        Returns: (np.ndarray, float) 
          The values h = g(a).
        """
        return a
    
    def derivative(self, h):
        """The derivatives as a function of the outputs at the nodes.
        
        Args:
          h (np.ndarray, float): the outputs h = g(a) at the nodes.
          
        Returns: (np.ndarray, float) 
          The derivatives dh/da.
        """
        return np.ones(h.shape)
    
class ReLU(ActivationFunction):
    
    def __call__(self, a):
        return np.where(a > 0, a, 0)
    
    def derivative(self, a):
        return np.where(a > 0, 1, 0)

class Sigmoid(ActivationFunction):
    
    def __call__(self, a):
        return 1/(1 + np.exp(-a))
    
    def derivative(self, a):
        e = self.__call__(a)
        return  np.multiply(e, (1 - e))

In [100]:
class Layer(object):
    """A data structure for a layer in a neural network.
    
    Attributes:
      num_nodes (int): number of nodes in the layer
      activation_function (ActivationFunction)
      values_pre_activation (np.ndarray, float): most recent values
        in layer, before applying activation function
      values_post_activation (np.ndarray, float): most recent values
        in layer, after applying activation function
    """
    
    def __init__(self, num_nodes, activation_function=ActivationFunction()):
        self.num_nodes = num_nodes
        self.activation_function = activation_function
        
    def get_layer_values(self, values_pre_activation):
        """Applies activation function to values from previous layer.
        
        Stores the values (both before and after applying activation 
        function)
        
        Args:
          values_pre_activation (np.ndarray, float): 
            A (batch size) x self.num_nodes array of the values
            in layer before applying the activation function
        
        Returns: (np.ndarray, float)
            A (batch size) x self.num_nodes array of the values
            in layer after applying the activation function
        """
        self.values_pre_activation = values_pre_activation
        self.values_post_activation = self.activation_function(
            values_pre_activation
        )
        return self.values_post_activation
    
    def get_layer_derivatives(self, values_pre_activation):
        return self.activation_function.derivative(
            values_pre_activation
        )

In [170]:
class FullyConnectedNeuralNetwork(object):
    """A data structure for a fully-connected neural network.
    
    Attributes:
      layers (Layer): A list of Layer objects.
      loss (Loss): The loss function to use in training.
      learning_rate (float): The learning rate to use in backpropagation.
      weights (list, np.ndarray): A list of weight matrices,
        length should be len(self.layers) - 1
      biases (list, float): A list of bias terms,
        length should be equal to len(self.layers)
    """
    
    def __init__(self, layers, loss, learning_rate):
        self.layers = layers
        self.loss = loss
        self.learning_rate = learning_rate
        
        # initialize weight matrices and biases to zeros
        self.weights = []
        self.updatedWeights = []
        self.biases = []
        self.updatedBiases = []
        mu, sigma = 0, 1
        for i in range(1, len(self.layers)):
            w = np.matrix(np.random.normal(mu, sigma, (self.layers[i - 1].num_nodes, self.layers[i].num_nodes)))
            self.weights.append(w)
            self.updatedWeights.append(w)
            self.biases.append(np.zeros(self.layers[i].num_nodes))
            self.updatedBiases.append(np.zeros(self.layers[i].num_nodes))
        
    def feedforward(self, inputs):
        """Predicts the output(s) for a given set of input(s).
        
        Args:
          inputs (np.ndarray, float): A (batch size) x self.layers[0].num_nodes array
          
        Returns: (np.ndarray, float) 
          An array of the predicted output labels, length is the batch size
        """
        self.storedValuesZ = [inputs]
        self.storedValuesA = [inputs]
        a = inputs
        
        ## Iterate layers
        for i, layer in enumerate(self.layers):
            ## g(hw + b),  h = previous layer values
            if i != len(self.layers) - 1:
                print('a',a)
                z = np.matrix(np.add(a * self.weights[i], np.matrix(self.biases[i])))
                self.storedValuesZ.append(z)
                a = np.matrix(self.layers[i + 1].get_layer_values(z))
                self.storedValuesA.append(a)
        return a
    
    def predict(self, inputs):
        h = inputs
        ## Iterate layers
        for i, layer in enumerate(self.layers):
            ## g(hw + b),  h = previous layer values
            if i != len(self.layers) - 1:
                a = np.matrix(np.add(h * self.updatedWeights[i], np.matrix(self.updatedBiases[i])))
                h = self.layers[i+1].get_layer_values(a)
        return h
    
    def backProp(self, predicted, actual):
        # Update First weights
        z_prime = 0
        if (type(self.loss) == type(SquaredErrorLoss())):
            z_prime = self.layers[-1].get_layer_derivatives(self.storedValuesZ[-1]).T
        delta = self.loss.delta(z_prime, predicted, actual).T
        dLdw = np.multiply(delta,self.storedValuesA[-2]).T
        
        self.updatedWeights[-1]= self.weights[-1] - self.learning_rate * dLdw
        self.updatedBiases[-1] = self.biases[-1] - np.multiply(self.learning_rate, delta).T
        
        # Update rest of the weights
        for l in range(2, len(self.layers)):
            z = self.storedValuesZ[-l]
            dadz = self.layers[-l].get_layer_derivatives(z)
            delta = np.multiply(self.weights[-l + 1] * delta, dadz.T)
            self.updatedBiases[-l] = self.biases[-l] - np.multiply(self.learning_rate, delta).T
            self.updatedWeights[-l] = self.weights[-l] - np.multiply(self.learning_rate, np.dot(delta, self.storedValuesA[-l - 1])).T
        self.weights = self.updatedWeights
        self.biases = self.updatedBiases
        
    def train(self, inputs, labels):
        """Trains neural network based on a batch of training data.
        
        Args:
          inputs (np.ndarray): A (batch size) x self.layers[0].num_nodes array
          labels (np.ndarray): An array of ground-truth output labels, 
            length is the batch size.
        """
        predicted = self.feedforward(inputs)
        self.backProp(predicted, labels)
        return predicted
    
    def train_epochs(self, inputs, labels, epochs = 50):
        randomIndices = np.random.choice([i for i in range(len(inputs))], size=len(inputs), replace=False)
        
        for i in range(epochs):
            for index in randomIndices:
                self.train(np.matrix([inputs[index]]),np.matrix([labels[index]]))

In [183]:
network = FullyConnectedNeuralNetwork(
    layers=[Layer(1), Layer(1),Layer(3)],
    loss = crossEntropy(),
    learning_rate=0.05
)

In [197]:
network.feedforward(np.matrix([[1]]))

a [[1]]
a [[ 0.47245588]]


matrix([[ 1.00115113,  1.99889903,  2.99682001]])

In [196]:
for i in range(100):
  network.train(np.matrix([[1]]), np.matrix([[1, 2, 3]]))

a [[1]]
a [[ 0.41008928]]
a [[1]]
a [[ 0.41686078]]
a [[1]]
a [[ 0.4229586]]
a [[1]]
a [[ 0.42843672]]
a [[1]]
a [[ 0.43334762]]
a [[1]]
a [[ 0.43774167]]
a [[1]]
a [[ 0.44166657]]
a [[1]]
a [[ 0.44516712]]
a [[1]]
a [[ 0.44828497]]
a [[1]]
a [[ 0.45105864]]
a [[1]]
a [[ 0.4535235]]
a [[1]]
a [[ 0.45571184]]
a [[1]]
a [[ 0.45765308]]
a [[1]]
a [[ 0.45937381]]
a [[1]]
a [[ 0.4608981]]
a [[1]]
a [[ 0.46224758]]
a [[1]]
a [[ 0.46344167]]
a [[1]]
a [[ 0.4644978]]
a [[1]]
a [[ 0.46543152]]
a [[1]]
a [[ 0.46625672]]
a [[1]]
a [[ 0.46698579]]
a [[1]]
a [[ 0.46762975]]
a [[1]]
a [[ 0.4681984]]
a [[1]]
a [[ 0.46870043]]
a [[1]]
a [[ 0.46914356]]
a [[1]]
a [[ 0.46953463]]
a [[1]]
a [[ 0.46987972]]
a [[1]]
a [[ 0.47018419]]
a [[1]]
a [[ 0.47045278]]
a [[1]]
a [[ 0.4706897]]
a [[1]]
a [[ 0.47089867]]
a [[1]]
a [[ 0.47108297]]
a [[1]]
a [[ 0.4712455]]
a [[1]]
a [[ 0.47138883]]
a [[1]]
a [[ 0.47151521]]
a [[1]]
a [[ 0.47162664]]
a [[1]]
a [[ 0.47172489]]
a [[1]]
a [[ 0.47181152]]
a [[1]]
a [[ 0.4718

In [156]:
import pandas as pd

In [8]:
test = pd.read_csv("train.csv")

In [9]:
test.head()

Unnamed: 0,ABV,appearance,aroma,overall,palate,taste,userBias,word_count,char_count,avg_word_len,...,Slovakia,Spain,Sri Lanka,Sweden,Switzerland,"Taiwan, Province of China",Thailand,Togo,United Kingdom,United States
0,0.092014,0.7,0.75,1.0,0.75,0.875,0.745,0.135593,0.135916,0.31262,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.192708,0.8,1.0,1.0,1.0,1.0,0.76625,0.238418,0.232877,0.270866,...,,,,,,,,,,
2,0.111111,0.8,0.875,1.0,0.875,0.875,0.775,0.114124,0.119221,0.425918,...,,,,,,,,,,
3,0.123264,0.8,0.875,1.0,0.75,0.875,0.711,0.213559,0.221747,0.344344,...,,,,,,,,,,
4,0.175347,0.8,0.875,1.0,0.875,0.875,0.755556,0.211299,0.209332,0.288806,...,,,,,,,,,,


In [251]:
test.columns

Index(['ABV', 'appearance', 'aroma', 'overall', 'palate', 'taste', 'userBias',
       'word_count', 'char_count', 'avg_word_len',
       ...
       'Slovakia', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland',
       'Taiwan, Province of China', 'Thailand', 'Togo', 'United Kingdom',
       'United States'],
      dtype='object', length=185)

In [169]:
def makeXandY(data, dependentNames, independentName, intercept):
    X = np.array(data[dependentNames])
    if (intercept):
        X = np.insert(X, 0, 1, axis=1)
    y = np.array(data[independentName])
    return X, y

In [170]:
x, y = makeXandY(test, ['ABV'], ['appearance'], False)

In [299]:
tempX = x[:100]
tempY = y[:100]
network.train_epochs(tempX, tempY)

In [236]:
y[0]

array([ 0.7])

In [237]:
y[1]

array([ 0.8])

In [298]:
y[3]

array([ 0.8])

In [300]:
network.feedforward(np.matrix(x[0]))

matrix([[ 0.87188217]])

In [301]:
network.feedforward(np.matrix(x[1]))

matrix([[ 0.88438475]])