In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from nltk.stem.snowball import EnglishStemmer
from scipy.sparse.linalg import svds, eigs
from sklearn import datasets
import tensorflow as tf
from tensorflow.keras import layers
%matplotlib inline

In [2]:
class Loss(object):
    
    def __call__(self, predicted, actual):
        """Calculates the loss as a function of the prediction and the actual.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (float) 
          The value of the loss for this batch of observations.
        """
        raise NotImplementedError
        
    def derivative(self, predicted, actual):
        """The derivative of the loss with respect to the prediction.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (np.ndarray, float) 
          The derivatives of the loss.
        """
        raise NotImplementedError
              
class SquaredErrorLoss(Loss):
    #CHANGE THIS TO ** rather than np.square?
    def __call__(self, predicted, actual):
        return predicted-actual**2
    def derivative(self, predicted, actual):
        return 2*np.subtract(predicted, actual)
    
class ActivationFunction(object):
        
    def __call__(self, a):
        """Applies activation function to the values in a layer.
        
        Args:
          a (np.ndarray, float): the values from the previous layer (after 
            multiplying by the weights.
          
        Returns: (np.ndarray, float) 
          The values h = g(a).
        """
        return a
    
    def derivative(self, h):
        """The derivatives as a function of the outputs at the nodes.
        
        Args:
          h (np.ndarray, float): the outputs h = g(a) at the nodes.
          
        Returns: (np.ndarray, float) 
          The derivatives dh/da.
        """
        return 1
       
class ReLU(ActivationFunction):
    def __call__(self, a):
        return np.clip(a, 0, None)
    def derivative(self, h):
        return np.clip(h, 0, 1)

class Sigmoid(ActivationFunction):
    def __call__(self, a):
        return 1.0/(1.0+np.exp(-a))
    def derivative(self, h):
        return self.__call__(h)*(1-self.__call__(h))
    
class Layer(object):
    """A data structure for a layer in a neural network.
    
    Attributes:
      num_nodes (int): number of nodes in the layer
      activation_function (ActivationFunction)
      values_pre_activation (np.ndarray, float): most recent values
        in layer, before applying activation function
      values_post_activation (np.ndarray, float): most recent values
        in layer, after applying activation function
    """
    
    def __init__(self, num_nodes, activation_function=ActivationFunction()):
        self.num_nodes = num_nodes
        self.activation_function = activation_function
        
    def get_layer_values(self, values_pre_activation):
        """Applies activation function to values from previous layer.
        
        Stores the values (both before and after applying activation 
        function)
        
        Args:
          values_pre_activation (np.ndarray, float): 
            A (batch size) x self.num_nodes array of the values
            in layer before applying the activation function
        
        Returns: (np.ndarray, float)
            A (batch size) x self.num_nodes array of the values
            in layer after applying the activation function
        """
        self.values_pre_activation = values_pre_activation
        self.values_post_activation = self.activation_function(
            values_pre_activation
        )
        return self.values_post_activation

        
class FullyConnectedNeuralNetwork(object):
    """A data structure for a fully-connected neural network.
    
    Attributes:
      layers (Layer): A list of Layer objects.
      loss (Loss): The loss function to use in training.
      learning_rate (float): The learning rate to use in backpropagation.
      weights (list, np.ndarray): A list of weight matrices,
        length should be len(self.layers) - 1
      biases (list, float): A list of bias terms,
        length should be equal to len(self.layers)
    """
    
    def __init__(self, layers, loss, learning_rate):
        self.layers = layers
        self.loss = loss
        self.learning_rate = learning_rate
        
        # initialize weight matrices and biases to zeros
        self.weights = []
        self.biases = []
        for i in range(1, len(self.layers)):
            self.weights.append(
                np.random.normal(0, 1, (self.layers[i - 1].num_nodes, self.layers[i].num_nodes))
            )
            self.biases.append(
                np.zeros(self.layers[i].num_nodes)
            )
    
    def feedforward(self, inputs):
        """Predicts the output(s) for a given set of input(s).
        
        Args:
          inputs (np.ndarray, float): A (batch size) x self.layers[0].num_nodes array
          
        Returns: (np.ndarray, float) 
          An array of the predicted output labels, length is the batch size
        """
        # TODO: Implement feedforward prediction.
        # Make sure you use Layer.get_layer_values() at each layer to store the values
        # for later use in backpropagation.

        h = self.layers[0].get_layer_values(inputs)
        for i in range(1, len(self.layers)):
            b = self.biases[i-1]
            w = self.weights[i-1]
            z = np.matmul(h, w) + b
            h = self.layers[i].get_layer_values(z)
        return h
        
    def backprop(self, predicted, actual):
        """Updates self.weights and self.biases based on predicted and actual values.
        
        This will require using the values at each layer that were stored at the
        feedforward step.
        
        Args:
          predicted (np.ndarray, float): An array of the predicted output labels
          actual (np.ndarray, float): An array of the actual output labels
        """
        
        w_new = [np.zeros(w.shape) for w in self.weights]
        b_new = [np.zeros(b.shape) for b in self.biases]
        n = len(predicted)
        if(n == 1):
            delta = self.loss.derivative(predicted, actual)
            b_new[-1] = b_new[-1] + self.learning_rate * delta.T
            w_new[-1] = w_new[-1] + self.learning_rate * np.dot(delta, self.layers[-2].values_post_activation).T
            for i in range(2, len(self.layers)):
                a = (self.layers[-i].values_pre_activation)
                h = (self.layers[-i-1].values_post_activation)
                g_prime = self.layers[-i].activation_function.derivative(a)
                delta = np.multiply(np.dot(self.weights[-i+1], delta), g_prime.T)
                b_new[-i] = b_new[-i] + self.learning_rate * delta.T
                w_new[-i] = w_new[-i] + self.learning_rate * np.dot(delta, h).T

            self.weights = [np.subtract(x, y) for x, y in zip(self.weights, w_new)]
            self.biases = [np.subtract(x, y) for x, y in zip(self.biases, b_new)]
        else:
            for j in range(n):
                delta = np.array([self.loss.derivative(predicted[j], actual[j])])
                b_new[-1] = b_new[-1] + self.learning_rate * delta.T
                w_new[-1] = w_new[-1] + self.learning_rate * np.dot(delta, [self.layers[-2].values_post_activation[j]]).T
                for i in range(2, len(self.layers)):
                    a = [(self.layers[-i].values_pre_activation)[j]]
                    h = [(self.layers[-i-1].values_post_activation)[j]]
                    g_prime = self.layers[-i].activation_function.derivative(a)
                    delta = np.multiply(np.dot(self.weights[-i+1], delta), g_prime.T)
                    b_new[-i] = b_new[-i] + self.learning_rate * delta.T
                    w_new[-i] = w_new[-i] + self.learning_rate * np.dot(delta, h).T

            self.weights = [np.subtract(x, y/n) for x, y in zip(self.weights, w_new)]
            self.biases = [np.subtract(x, y/n) for x, y in zip(self.biases, b_new)]

        
    def train(self, inputs, labels):
        """Trains neural network based on a batch of training data.
        
        Args:
          inputs (np.ndarray): A (batch size) x self.layers[0].num_nodes array
          labels (np.ndarray): An array of ground-truth output labels, 
            length is the batch size.
        """
        predicted = self.feedforward(inputs)
#         print(predicted)
        self.backprop(predicted, labels)

In [3]:
df = pd.read_csv('final_data.csv', index_col=0)

In [4]:
df.dropna(inplace=True)

In [14]:
X = df.drop(['review/appearance', 'review/aroma', 'review/overall', 'review/palate', 'review/taste'], axis=1).values

In [15]:
y = df['review/overall'].values

In [5]:
# X = full_data[['avg_palate', 'avg_aroma', 'avg_overall', 'avg_taste', 'avg_appear']].value
X = df.drop(['review/appearance', 'review/aroma', 'review/overall', 'review/palate', 'review/taste', 'avg_palate', 'avg_aroma', 'avg_overall', 'avg_appear', 'avg_taste'], axis=1).values
y = df['review/overall'].values

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [23]:
# specify the model architecture
# l = [layers.Dense(896, activation="relu")] + ([layers.Dense(20, activation="relu")] * 20) + [layers.Dense(1)]
model = tf.keras.Sequential([
    layers.Dense(891),
    layers.Dense(448, activation="relu"),
#     layers.Dense(224, activation="relu"),
#     layers.Dense(112, activation="relu"),
#     layers.Dense(56, activation="relu"),
#     layers.Dense(28, activation="relu"),
#     layers.Dense(14, activation="relu"),
#     layers.Dense(7, activation="relu"),
#     layers.Dense(3, activation="relu"),
    layers.Dense(1)
])

# specify the loss function and optimization function
model.compile(optimizer=tf.train.GradientDescentOptimizer(0.000001),
              loss='mse')

# fit the model to data
model.fit(X_train, y_train, epochs=30, batch_size=128)

Epoch 1/30
Epoch 2/30

KeyboardInterrupt: 

In [18]:
predicted = model.predict(X_test)

In [19]:
np.mean((y_test-predicted)**2)

1.0083672676117426

In [22]:
X_train.shape

(28113, 896)

In [24]:
network_1 = FullyConnectedNeuralNetwork(
    layers=[Layer(896), Layer(448, ReLU()), Layer(1)],
    loss = SquaredErrorLoss(),
    learning_rate= 0.00000000001
)

In [None]:
for j in range(30):
    print("Epoch:", j)
    for i in range(0, len(y_train), 128):
        network_1.train(X_train[i:i+128], y_train[i:i+128])
    print("Completed")

Epoch: 0
Completed
Epoch: 1
Completed
Epoch: 2
