In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from nltk.stem.snowball import EnglishStemmer
from scipy.sparse.linalg import svds, eigs
from sklearn import datasets
import tensorflow as tf
from tensorflow.keras import layers
%matplotlib inline

In [2]:
class Loss(object):
    
    def __call__(self, predicted, actual):
        """Calculates the loss as a function of the prediction and the actual.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (float) 
          The value of the loss for this batch of observations.
        """
        raise NotImplementedError
        
    def derivative(self, predicted, actual):
        """The derivative of the loss with respect to the prediction.
        
        Args:
          predicted (np.ndarray, float): the predicted output labels
          actual (np.ndarray, float): the actual output labels
          
        Returns: (np.ndarray, float) 
          The derivatives of the loss.
        """
        raise NotImplementedError
              
class SquaredErrorLoss(Loss):
    #CHANGE THIS TO ** rather than np.square?
    def __call__(self, predicted, actual):
        return predicted-actual**2
    def derivative(self, predicted, actual):
        return 2*np.subtract(predicted, actual)
    
class ActivationFunction(object):
        
    def __call__(self, a):
        """Applies activation function to the values in a layer.
        
        Args:
          a (np.ndarray, float): the values from the previous layer (after 
            multiplying by the weights.
          
        Returns: (np.ndarray, float) 
          The values h = g(a).
        """
        return a
    
    def derivative(self, h):
        """The derivatives as a function of the outputs at the nodes.
        
        Args:
          h (np.ndarray, float): the outputs h = g(a) at the nodes.
          
        Returns: (np.ndarray, float) 
          The derivatives dh/da.
        """
        return 1
       
class ReLU(ActivationFunction):
    def __call__(self, a):
        return np.clip(a, 0, None)
    def derivative(self, h):
        return np.clip(h, 0, 1)

class Sigmoid(ActivationFunction):
    def __call__(self, a):
        return 1.0/(1.0+np.exp(-a))
    def derivative(self, h):
        return self.__call__(h)*(1-self.__call__(h))
    
class Layer(object):
    """A data structure for a layer in a neural network.
    
    Attributes:
      num_nodes (int): number of nodes in the layer
      activation_function (ActivationFunction)
      values_pre_activation (np.ndarray, float): most recent values
        in layer, before applying activation function
      values_post_activation (np.ndarray, float): most recent values
        in layer, after applying activation function
    """
    
    def __init__(self, num_nodes, activation_function=ActivationFunction()):
        self.num_nodes = num_nodes
        self.activation_function = activation_function
        
    def get_layer_values(self, values_pre_activation):
        """Applies activation function to values from previous layer.
        
        Stores the values (both before and after applying activation 
        function)
        
        Args:
          values_pre_activation (np.ndarray, float): 
            A (batch size) x self.num_nodes array of the values
            in layer before applying the activation function
        
        Returns: (np.ndarray, float)
            A (batch size) x self.num_nodes array of the values
            in layer after applying the activation function
        """
        self.values_pre_activation = values_pre_activation
        self.values_post_activation = self.activation_function(
            values_pre_activation
        )
        return self.values_post_activation

        
class FullyConnectedNeuralNetwork(object):
    """A data structure for a fully-connected neural network.
    
    Attributes:
      layers (Layer): A list of Layer objects.
      loss (Loss): The loss function to use in training.
      learning_rate (float): The learning rate to use in backpropagation.
      weights (list, np.ndarray): A list of weight matrices,
        length should be len(self.layers) - 1
      biases (list, float): A list of bias terms,
        length should be equal to len(self.layers)
    """
    
    def __init__(self, layers, loss, learning_rate):
        self.layers = layers
        self.loss = loss
        self.learning_rate = learning_rate
        
        # initialize weight matrices and biases to zeros
        self.weights = []
        self.biases = []
        for i in range(1, len(self.layers)):
            self.weights.append(
                np.random.normal(0, 1, (self.layers[i - 1].num_nodes, self.layers[i].num_nodes))
            )
            self.biases.append(
                np.zeros(self.layers[i].num_nodes)
            )
    
    def feedforward(self, inputs):
        """Predicts the output(s) for a given set of input(s).
        
        Args:
          inputs (np.ndarray, float): A (batch size) x self.layers[0].num_nodes array
          
        Returns: (np.ndarray, float) 
          An array of the predicted output labels, length is the batch size
        """
        # TODO: Implement feedforward prediction.
        # Make sure you use Layer.get_layer_values() at each layer to store the values
        # for later use in backpropagation.

        h = self.layers[0].get_layer_values(inputs)
        for i in range(1, len(self.layers)):
            b = self.biases[i-1]
            w = self.weights[i-1]
            z = np.matmul(h, w) + b
            h = self.layers[i].get_layer_values(z)
        return h
        
    def backprop(self, predicted, actual):
        """Updates self.weights and self.biases based on predicted and actual values.
        
        This will require using the values at each layer that were stored at the
        feedforward step.
        
        Args:
          predicted (np.ndarray, float): An array of the predicted output labels
          actual (np.ndarray, float): An array of the actual output labels
        """
        
        w_new = [np.zeros(w.shape) for w in self.weights]
        b_new = [np.zeros(b.shape) for b in self.biases]
        n = len(predicted)
        if(n == 1):
            delta = self.loss.derivative(predicted, actual)
            b_new[-1] = b_new[-1] + self.learning_rate * delta.T
            w_new[-1] = w_new[-1] + self.learning_rate * np.dot(delta, self.layers[-2].values_post_activation).T
            for i in range(2, len(self.layers)):
                a = (self.layers[-i].values_pre_activation)
                h = (self.layers[-i-1].values_post_activation)
                g_prime = self.layers[-i].activation_function.derivative(a)
                delta = np.multiply(np.dot(self.weights[-i+1], delta), g_prime.T)
                b_new[-i] = b_new[-i] + self.learning_rate * delta.T
                w_new[-i] = w_new[-i] + self.learning_rate * np.dot(delta, h).T

            self.weights = [np.subtract(x, y) for x, y in zip(self.weights, w_new)]
            self.biases = [np.subtract(x, y) for x, y in zip(self.biases, b_new)]
        else:
            for j in range(n):
                delta = np.array([self.loss.derivative(predicted[j], actual[j])])
                b_new[-1] = b_new[-1] + self.learning_rate * delta.T
                w_new[-1] = w_new[-1] + self.learning_rate * np.dot(delta, [self.layers[-2].values_post_activation[j]]).T
                for i in range(2, len(self.layers)):
                    a = [(self.layers[-i].values_pre_activation)[j]]
                    h = [(self.layers[-i-1].values_post_activation)[j]]
                    g_prime = self.layers[-i].activation_function.derivative(a)
                    delta = np.multiply(np.dot(self.weights[-i+1], delta), g_prime.T)
                    b_new[-i] = b_new[-i] + self.learning_rate * delta.T
                    w_new[-i] = w_new[-i] + self.learning_rate * np.dot(delta, h).T

            self.weights = [np.subtract(x, y/n) for x, y in zip(self.weights, w_new)]
            self.biases = [np.subtract(x, y/n) for x, y in zip(self.biases, b_new)]

        
    def train(self, inputs, labels):
        """Trains neural network based on a batch of training data.
        
        Args:
          inputs (np.ndarray): A (batch size) x self.layers[0].num_nodes array
          labels (np.ndarray): An array of ground-truth output labels, 
            length is the batch size.
        """
        predicted = self.feedforward(inputs)
#         print(predicted)
        self.backprop(predicted, labels)

In [3]:
beer = pd.read_csv("full_data.csv", index_col=0)
beer.dropna(inplace=True)

In [4]:
beer.head()

Unnamed: 0,beer/ABV,review/appearance,review/aroma,review/overall,review/palate,review/taste,review/text,isdst,"""The Wind Cried Mari..."" Scottish Heather Ale",1906 Reserva Especial,...,2.0.3,3.0.3,4.0.3,5.0.3,6.0.3,avg_palate,avg_aroma,avg_overall,avg_taste,avg_appear
0,5.0,4.0,4.0,4.0,4.0,4.0,Pours a clouded gold with a thin white head. N...,0.0,0,0,...,0,0,0,0,0,3.166667,3.833333,3.166667,3.0,3.666667
1,11.0,4.0,3.5,3.5,3.5,3.0,12oz bottle into 8oz snifter.\t\tDeep ruby red...,0.0,0,0,...,0,0,1,0,0,3.916667,3.972222,3.611111,3.833333,3.888889
2,4.7,3.5,4.0,3.5,3.5,3.5,First enjoyed at the brewpub about 2 years ago...,0.0,0,0,...,0,0,1,0,0,3.653846,3.461538,3.903846,3.769231,3.711538
3,4.4,3.0,3.0,2.5,3.0,3.0,First thing I noticed after pouring from green...,0.0,0,0,...,0,0,0,0,0,3.602564,3.43109,3.820513,3.661859,3.692308
4,4.4,4.0,3.0,3.0,3.5,2.5,A: pours an amber with a one finger head but o...,0.0,0,0,...,0,0,0,1,0,3.792135,3.646067,3.848315,3.758427,3.904494


In [5]:
stemmer = EnglishStemmer()

In [6]:
analyzer = CountVectorizer().build_analyzer()

def stemmed_words(doc):
    return (stemmer.stem(w) for w in analyzer(doc))

stem_vectorizer = CountVectorizer(
    stop_words = 'english',
    analyzer=stemmed_words,
    ngram_range = (1,2)
)
tfidf_transformer = TfidfTransformer()

In [7]:
## Vectorize the reviews
X_train_counts = stem_vectorizer.fit_transform(beer['review/text'])
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

In [8]:
u, s, v = svds(X_train_tfidf, k=100)

In [9]:
full_data = pd.concat([beer, pd.DataFrame(u)], axis=1, sort=False)

In [10]:
full_data.drop('review/text', inplace=True, axis=1)

In [11]:
X = full_data.drop(['review/appearance', 'review/aroma', 'review/overall', 'review/palate', 'review/taste'], axis=1).values

In [12]:
X.shape

(37494, 896)

In [13]:
y = full_data['review/overall'].values

In [14]:
full_data.to_csv('final_data.csv')

In [18]:
full_data.head()

Unnamed: 0,beer/ABV,review/appearance,review/aroma,review/overall,review/palate,review/taste,isdst,"""The Wind Cried Mari..."" Scottish Heather Ale",1906 Reserva Especial,2X Chocolate Porter,...,90,91,92,93,94,95,96,97,98,99
0,5.0,4.0,4.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,...,-0.003817,-0.001904,-0.002556,0.003672,-0.000728,-0.002721,-0.003397,0.003552,0.0004,0.003669
1,11.0,4.0,3.5,3.5,3.5,3.0,0.0,0.0,0.0,0.0,...,0.008872,-0.008459,-0.00539,0.001952,0.003362,-0.000566,-0.001022,-0.000789,-0.004414,0.005061
2,4.7,3.5,4.0,3.5,3.5,3.5,0.0,0.0,0.0,0.0,...,-0.003155,-0.001823,0.002242,0.002276,-0.000769,0.002075,0.004495,-0.000685,-0.007607,0.003911
3,4.4,3.0,3.0,2.5,3.0,3.0,0.0,0.0,0.0,0.0,...,-0.001222,0.002758,-0.001472,0.00055,0.008042,0.012155,0.009179,0.00469,0.001785,0.00316
4,4.4,4.0,3.0,3.0,3.5,2.5,0.0,0.0,0.0,0.0,...,0.003644,-0.009948,-0.001942,0.00549,0.003194,0.000178,0.004289,0.001215,-0.002551,0.005537


In [19]:
full_data.dropna(inplace=True)

In [2]:
full_data[full_data.isnull()]

NameError: name 'full_data' is not defined

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [837]:
l = [Layer(250)] + ([Layer(20, ReLU())] * 10) + [Layer(1)]

In [849]:
network_1 = FullyConnectedNeuralNetwork(
    layers=l,
    loss = SquaredErrorLoss(),
    learning_rate= 0.00000000000000000001
)

In [850]:
n=10
for i in range(0, len(y_train), n):
    network_1.train(X_train[i:i+n], y_train[i:i+n])

In [852]:
network_1.feedforward([X_test[5]])

array([[-684.74625613]])

In [853]:
predicted = network_1.feedforward(X_test)

In [854]:
predicted

array([[-1774.98918098],
       [-2845.03400576],
       [-2480.15262322],
       ..., 
       [-1133.30312844],
       [-5720.87303684],
       [-7483.86534858]])

In [855]:
predicted.min(), predicted.max()

(-21600.749241491692, 121.08819412854741)

In [856]:
np.mean((y_test - predicted.flatten())**2)

13301956.922905698

In [23]:
X_train

array([[  4.40000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -1.57359374e-03,  -4.17524945e-03,   6.08118718e-03],
       [  1.12000000e+01,   0.00000000e+00,   0.00000000e+00, ...,
         -5.61188148e-03,   9.76825454e-03,   6.26696735e-03],
       [  5.90000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -1.32121412e-04,  -6.73356530e-03,   2.80870574e-03],
       ..., 
       [  4.40000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -3.83761954e-03,  -1.08731993e-02,   4.39019528e-03],
       [  1.05000000e+01,   0.00000000e+00,   0.00000000e+00, ...,
         -4.31864509e-03,  -5.39410540e-03,   4.79386121e-03],
       [  1.20000000e+01,   0.00000000e+00,   0.00000000e+00, ...,
          2.14395446e-03,  -2.11244398e-03,   4.37316158e-03]])

In [31]:
y_train = (y_train - np.mean(y_train))/(np.std(y_train))

In [33]:
X_train = (X_train - np.mean(X_train))/(np.std(X_train))

nan

In [16]:
X_train

array([[  6.60000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -5.43027851e-03,   4.53009133e-03,   3.44912530e-03],
       [  8.50000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -4.24265087e-04,  -1.75124991e-03,   3.06964660e-03],
       [  8.30000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -8.63554913e-03,   5.45475078e-03,   5.04183697e-03],
       ..., 
       [  6.30000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -6.23733596e-04,  -1.26662604e-03,   4.72117017e-03],
       [  1.20000000e+01,   0.00000000e+00,   0.00000000e+00, ...,
          3.20949235e-03,   3.91589245e-03,   5.28097794e-03],
       [  7.20000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
         -9.33489452e-04,  -1.09086857e-02,   5.34232776e-03]])

In [34]:
# specify the model architecture
# l = [layers.Dense(896, activation="relu")] + ([layers.Dense(20, activation="relu")] * 20) + [layers.Dense(1)]
model = tf.keras.Sequential([
    layers.Dense(896, activation="relu"),
    layers.Dense(1)
])

# specify the loss function and optimization function
model.compile(optimizer=tf.train.GradientDescentOptimizer(0.000000000000001),
              loss='mse')

# fit the model to data
model.fit(X_train, y_train, epochs=5, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f96dd323ef0>

In [18]:
model.predict(X_test)

array([[ nan],
       [ nan],
       [ nan],
       ..., 
       [ nan],
       [ nan],
       [ nan]], dtype=float32)

In [56]:
network = FullyConnectedNeuralNetwork(
    layers=[Layer(10), Layer(20, ReLU()), Layer(1)],
    loss = SquaredErrorLoss(),
    learning_rate=0.001
)

In [57]:
diabetes = datasets.load_diabetes()

In [58]:
X = diabetes.data

In [59]:
y = diabetes.target

In [62]:
for i in range(len(y)):
    network.train([X[i]], y[i])

In [63]:
np.mean((y - network.feedforward(X))**2)

8344.7284204534008