### Amazon Sentiment Data

In [None]:
import lxmls.readers.sentiment_reader as srs
from lxmls.deep_learning.bench import Data
corpus = srs.SentimentCorpus("books")
amazon_sentiment_data = Data(corpus=corpus)

### A Shallow Model: Log-Linear in Numpy

In [None]:
from lxmls.deep_learning.mlp import glorot_weight_init, index2onehot
from lxmls.deep_learning.bench import Model
import numpy as np
from scipy.misc import logsumexp

class NumpyLogLinear(Model):
    
    def __init__(self, **config):
        
        # Initialize parameters
        weight_shape = (config['input_size'], config['num_classes'])
        # after Xavier Glorot et al
        self.weight = glorot_weight_init(weight_shape, 'softmax')
        self.bias = np.zeros((1, config['num_classes']))
        self.learning_rate = config['learning_rate']
        
    def log_forward(self, input=None):  
        """Forward pass of the computation graph"""
        
        # Linear transformation
        z = np.dot(input, self.weight.T) + self.bias
        
        # Softmax implemented in log domain
        log_tilde_z = z - logsumexp(z, axis=1)[:, None]
        
        return log_tilde_z
        
    def predict(self, input=None):
        """Most probable class index"""
        return np.argmax(np.exp(self.log_forward(input)), axis=1)      
     
    def update(self, input=None, output=None):
        """Stochastic Gradient Descent update"""
        
        # Probabilities of each class
        class_probabilities = np.exp(self.log_forward(input))
        batch_size, num_classes = class_probabilities.shape
        
        # Error derivative at softmax layer
        I = index2onehot(output, num_classes)
        error = (class_probabilities - I) / batch_size
        
        # Weight gradient
        gradient_weight = np.zeros(self.weight.shape)
        for l in np.arange(batch_size):
            gradient_weight += np.outer(error[l, :], input[l, :])
        
        # Bias gradient
        gradient_bias = np.sum(error, axis=0, keepdims=True)
        
        # SGD update
        self.weight = self.weight - self.learning_rate * gradient_weight
        self.bias = self.bias - self.learning_rate * gradient_bias

### Training Bench

In [None]:
def train(data=None, model=None, batch_size=None, num_epochs=None):
    """Training bench for model and data"""

    # Get batch iterators for train and test
    train_batches = data.batches('train', batch_size=batch_size)
    test_set = data.batches('test', batch_size=None)[0]

    # Epoch loop
    for epoch in range(num_epochs):

        # Batch loop
        for batch in train_batches:
            model.update(input=batch['input'], output=batch['output'])

        # Prediction for this epoch
        hat_y = model.predict(input=test_set['input'])

        # Evaluation
        accuracy = 100*np.mean(hat_y == test_set['output'])

        # Inform user
        print("Epoch %d: accuracy %2.2f %%" % (epoch+1, accuracy))

### Exercise 2.1 Train Log Linear in Numpy
Try to understand mode above, run it and test some hyperparematers

In [None]:
np_loglin = NumpyLogLinear(
    input_size=corpus.nr_features,
    num_classes=2, 
    learning_rate=0.05
)

In [None]:
train(
    data=amazon_sentiment_data,
    model=np_loglin,
    num_epochs=10,
    batch_size=5
)

### Exercise 2.2 Implement Backpropagation for an MLP in Numpy and train it

In [None]:
from lxmls.deep_learning.numpy_mlp import NumpyMLP
np_mlp = NumpyMLP(
    geometry=[corpus.nr_features, 20, 2],
    activation_functions=['sigmoid', 'softmax'],
    learning_rate=0.05
)

In [None]:
train(
    data=amazon_sentiment_data,
    model=np_mlp,
    num_epochs=10,
    batch_size=5
)

### Exercise 2.3  Train Log Linear in Pytorch
Try to understand mode above, compare it with Exercise 2.1. Run it and test some hyperparematers

In [None]:
import torch
from torch.autograd import Variable

class PytorchLogLinear(Model):
    
    def __init__(self, **config):
        
        # Initialize parameters
        weight_shape = (config['input_size'], config['num_classes'])
        # after Xavier Glorot et al
        self.weight = glorot_weight_init(weight_shape, 'softmax')
        self.bias = np.zeros((1, config['num_classes']))
        self.learning_rate = config['learning_rate']
        
        # IMPORTANT: Cast to pytorch format
        self.weight = Variable(torch.from_numpy(self.weight).float(), requires_grad=True)
        self.bias = Variable(torch.from_numpy(self.bias).float(), requires_grad=True)
        
    def _log_forward(self, input=None):  
        """Forward pass of the computation graph in logarithm domain (pytorch)"""
        
        # IMPORTANT: Cast to pytorch format
        input = Variable(torch.from_numpy(input).float(), requires_grad=False)
        
        # Linear transformation
        z =  torch.matmul(input, torch.t(self.weight)) + self.bias
        
        # Softmax implemented in log domain
        log_tilde_z = torch.nn.LogSoftmax()(z)
        
        # NOTE that this is a pytorch class!
        return log_tilde_z
            
    def predict(self, input=None):
        """Most probably class index"""
        log_forward = self._log_forward(input).data.numpy()
        return np.argmax(np.exp(log_forward), axis=1)
        
    def update(self, input=None, output=None):
        """Stochastic Gradient Descent update"""
        
        # IMPORTANT: Class indices need to be casted to LONG
        true_class = Variable(torch.from_numpy(output).long(), requires_grad=False)
        
        # Compute negative log-likelihood loss
        loss = torch.nn.NLLLoss()(self._log_forward(input), true_class)
        # Use autograd to compute the backward pass.
        loss.backward()
        
        # SGD update
        self.weight.data -= self.learning_rate * self.weight.grad.data
        self.bias.data -= self.learning_rate * self.bias.grad.data
        
        # Zero gradients
        self.weight.grad.data.zero_()
        self.bias.grad.data.zero_()
        
        return loss.data.numpy()

In [None]:
pt_loglin = PytorchLogLinear(
    input_size=corpus.nr_features,
    num_classes=2, 
    learning_rate=0.05
)

In [None]:
train(
    data=amazon_sentiment_data,
    model=pt_loglin,
    num_epochs=10,
    batch_size=5
)

### Exercise 2.4 Implement Pytorch Forward pass

In [None]:
from lxmls.deep_learning.pytorch_mlp import PytorchMLP
pt_mlp = PytorchMLP(
    geometry=[corpus.nr_features, 20, 2],
    activation_functions=['sigmoid', 'softmax'],
    learning_rate=0.05
)

In [None]:
#pt_mlp._log_forward(input=data.batches('test', batch_size=None)[0]['input']).data.numpy()

In [None]:
train(
    data=amazon_sentiment_data,
    model=pt_mlp,
    num_epochs=10,
    batch_size=5
)