# Part A: Autoencoder Neural Networks



Imports

In [None]:
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data

import numpy as np
import torch

import matplotlib.pyplot as plt

from utils import *

from scipy.sparse import load_npz
import csv
import os

Initialize Autoencoder

In [None]:
class AutoEncoder(nn.Module):
    torch.manual_seed(42)
    def __init__(self, num_question, k=100):
        """ Initialize a class AutoEncoder.

        :param num_question: int
        :param k: int
        """
        super(AutoEncoder, self).__init__()

        # Define linear functions.
        self.encoder = nn.Linear(num_question, k) #self.g
        self.decoder = nn.Linear(k, num_question) #self.h

    def get_weight_norm(self):
        """ Return ||W^1||^2 + ||W^2||^2.

        :return: float
        """
        g_w_norm = torch.norm(self.encoder.weight, 2) ** 2
        h_w_norm = torch.norm(self.decoder.weight, 2) ** 2

        return g_w_norm + h_w_norm

    def forward(self, inputs):
        """ Return a forward pass given inputs.

        :param inputs: user vector.
        :return: user vector.
        """

        x = torch.sigmoid(self.encoder(inputs))
        out = torch.sigmoid(self.decoder(x))

        return out

Accuracy Evaluation Function

In [None]:
def evaluate(model, train_data, valid_data):
    """ Evaluate the valid_data on the current model.

    :param model: Module
    :param train_data: 2D FloatTensor
    :param valid_data: A dictionary {user_id: list,
    question_id: list, is_correct: list}
    :return: float
    """
    # Tell PyTorch you are evaluating the model.
    model.eval()

    total = 0
    correct = 0

    for i, u in enumerate(valid_data["user_id"]):
        inputs = Variable(train_data[u]).unsqueeze(0)
        output = model(inputs)

        guess = output[0][valid_data["question_id"][i]].item() >= 0.5
        if guess == valid_data["is_correct"][i]:
            correct += 1
        total += 1
    return correct / float(total)

Training Function

In [None]:
def train(model, lr, lamb, train_matrix, zero_train_data, train_data, valid_data, num_epoch):
    """ Train the neural network, where the objective also includes
    a regularizer.

    :param model: Module
    :param lr: float
    :param lamb: float
    :param train_data: 2D FloatTensor
    :param zero_train_data: 2D FloatTensor
    :param train_data: Dict
    :param valid_data: Dict
    :param num_epoch: int
    :return: None
    """

    # Tell PyTorch you are training the model.
    model.train()

    # Define optimizers and loss function.
    optimizer = optim.SGD(model.parameters(), lr=lr)
    num_student = train_matrix.shape[0]
    train_losses = []
    train_accs = []
    val_accs = []
    eps = []

    for epoch in range(0, num_epoch):
        train_loss = 0.
        eps.append(epoch)

        for user_id in range(num_student):
            inputs = Variable(zero_train_data[user_id]).unsqueeze(0)  #answers to all questions by a student
            target = inputs.clone()

            optimizer.zero_grad()
            output = model(inputs)

            # Mask the target to only compute the gradient of valid entries.
            nan_mask = np.isnan(train_matrix[user_id].unsqueeze(0).numpy())
            target[0][nan_mask] = output[0][nan_mask]

            loss = torch.sum((output - target) ** 2.)
            loss.backward()

            train_loss += loss.item()
            optimizer.step()
        train_acc = evaluate(model, zero_train_data, train_data)
        valid_acc = evaluate(model, zero_train_data, valid_data)

        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_accs.append(valid_acc)
        print("Epoch: {} \tTraining Cost: {:.6f}\t "
              "Valid Acc: {}".format(epoch, train_loss, valid_acc))

      #plotting
    plt.title("Training Loss vs. Epochs")
    plt.plot(eps, train_losses, label="Training Curve")
    plt.xlabel("Epochs")
    plt.ylabel("Training Loss")
    plt.show()

    plt.title("Accuracy vs. Epochs")
    plt.plot(eps, train_accs, label="Training Curve")
    plt.plot(eps, val_accs, label="Validation Curve")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend(loc='best')
    plt.show()

Main Function

In [None]:
if __name__ == "__main__":
  zero_train_matrix, train_matrix, train_data, valid_data, test_data = load_data()

  # Set model hyperparameters.
  k = 50
  num_questions = zero_train_matrix.shape[1]
  model = AutoEncoder(num_question = num_questions, k = k)

  # Set optimization hyperparameters.
  lr = 0.01
  num_epoch = 40
  lamb = 0

  train(model, lr, lamb, train_matrix, zero_train_matrix, train_data,
          valid_data, num_epoch)

# Part B: Regularized Autoencoder Neural Networks

Training Function

In [None]:
def train_reg(model, lr, lamb, train_matrix, zero_train_data, train_data, valid_data, num_epoch):
    """ Train the neural network, where the objective also includes
    a regularizer.

    :param model: Module
    :param lr: float
    :param lamb: float
    :param train_data: 2D FloatTensor
    :param zero_train_data: 2D FloatTensor
    :param valid_data: Dict
    :param num_epoch: int
    :return: None
    """

    norm = model.get_weight_norm()
    norm = norm.detach().numpy()
    print("norm = ", norm)

    # Tell PyTorch you are training the model.
    model.train()

    # Define optimizers and loss function.
    optimizer = optim.SGD(model.parameters(), lr=lr)
    num_student = train_matrix.shape[0]

    eps = []
    train_losses = []
    train_accs = []
    val_accs = []

    for epoch in range(0, num_epoch):
        train_loss = 0.

        for user_id in range(num_student):
            inputs = Variable(zero_train_data[user_id]).unsqueeze(0)  #answers to all questions by a student
            target = inputs.clone()

            optimizer.zero_grad()
            output = model(inputs)

            # Mask the target to only compute the gradient of valid entries.
            nan_mask = np.isnan(train_matrix[user_id].unsqueeze(0).numpy())
            target[0][nan_mask] = output[0][nan_mask]

            loss = torch.sum(((output - target) ** 2.)) + lamb*norm/2
            loss.backward()

            train_loss += loss.item()
            optimizer.step()

        train_acc = evaluate(model, zero_train_data, train_data)
        valid_acc = evaluate(model, zero_train_data, valid_data)

        eps.append(epoch)
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_accs.append(valid_acc)
        print("Epoch: {} \tTraining Cost: {:.6f}\t "
              "Valid Acc: {}".format(epoch, train_loss, valid_acc))

      #plotting
    plt.title("Training Loss vs. Epochs")
    plt.plot(eps, train_losses, label="Training Curve")
    plt.xlabel("Epochs")
    plt.ylabel("Training Loss")
    plt.show()

    plt.title("Accuracy vs. Epochs")
    plt.plot(eps, train_accs, label="Training Curve")
    plt.plot(eps, val_accs, label="Validation Curve")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend(loc='best')
    plt.show()

In [None]:
if __name__ == "__main__":
  zero_train_matrix, train_matrix, train_data, valid_data, test_data = load_data()

  k = 50
  num_questions = zero_train_matrix.shape[1]
  model_reg = AutoEncoder(num_question = num_questions, k = k)

  # Set optimization hyperparameters.
  lr = 0.01
  num_epoch = 40
  lamb = 0.001

  train_reg(model_reg, lr, lamb, train_matrix, zero_train_matrix, train_data, valid_data, num_epoch)

norm =  596.55023
Epoch: 0 	Training Cost: 13674.634931	 Valid Acc: 0.622495060683037
Epoch: 1 	Training Cost: 12463.829630	 Valid Acc: 0.6363251481795089
Epoch: 2 	Training Cost: 11755.400912	 Valid Acc: 0.6515664690939882
Epoch: 3 	Training Cost: 11162.529329	 Valid Acc: 0.6642675698560542
Epoch: 4 	Training Cost: 10671.385089	 Valid Acc: 0.6699125035280835
Epoch: 5 	Training Cost: 10273.309865	 Valid Acc: 0.6758396838837144
Epoch: 6 	Training Cost: 9935.320303	 Valid Acc: 0.6804967541631386
Epoch: 7 	Training Cost: 9642.127272	 Valid Acc: 0.6819079875811459
Epoch: 8 	Training Cost: 9389.196759	 Valid Acc: 0.6809201241885408
Epoch: 9 	Training Cost: 9156.072784	 Valid Acc: 0.6812023708721423
Epoch: 10 	Training Cost: 8945.150385	 Valid Acc: 0.6799322607959356
Epoch: 11 	Training Cost: 8771.104641	 Valid Acc: 0.6797911374541349
Epoch: 12 	Training Cost: 8592.795979	 Valid Acc: 0.678662150719729
Epoch: 13 	Training Cost: 8432.629589	 Valid Acc: 0.6775331639853232
Epoch: 14 	Training Co