In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np
import pylab as plt
from linearclassifier import linear_predict, perceptron_update, plot_predictions, log_reg_train
from scipy.io import loadmat

In [None]:
# load cardio data from MATLAB data file

variables = dict()
loadmat('processedCardio.mat', variables)

train_labels = variables['trainLabels'].ravel() - 1 # the original MATLAB data was 1-indexed
test_labels = variables['testLabels'].ravel() - 1
train_data = variables['trainData']
test_data = variables['testData']

# get data dimensions and sizes
num_dim, num_train = train_data.shape
_, num_test = test_data.shape

classes = np.unique(train_labels)
num_classes = len(classes)

In [None]:
plt.hist(train_labels)
plt.xlabel('Class')
plt.ylabel('Number of Examples')
plt.show()

In [None]:
# Perceptron experiment

epochs = 40

model = { 'weights': np.zeros((num_dim, num_classes)) }
train_accuracy = np.zeros(epochs)
test_accuracy = np.zeros(epochs)

for epoch in range(epochs):
    # first measure training and testing accuracy
    predictions = linear_predict(train_data, model)
    train_accuracy[epoch] = np.sum(predictions == train_labels) / num_train

    predictions = linear_predict(test_data, model)
    test_accuracy[epoch] = np.sum(predictions == test_labels) / num_test

    # run perceptron training
    mistakes = 0
    for i in range(num_train):
        correct = perceptron_update(train_data[:, i], model, train_labels[i])
        
        if not correct:
            mistakes += 1
    
    print("Finished epoch %d with %d mistakes." % (epoch, mistakes))

In [None]:
# Plot results of perceptron

train_line = plt.plot(range(epochs), train_accuracy, label="Training")
test_line = plt.plot(range(epochs), test_accuracy, label="Testing")
plt.title('Cardiotocography Data')
plt.xlabel('Iteration')
plt.ylabel('Perceptron Accuracy')
plt.legend()

plt.show()

In [None]:
# Logistic regression gradient check

# first check if the gradient and objective function are consistent with each other
_ = log_reg_train(train_data, train_labels, {'lambda': 0.1}, 
              {'weights': np.random.randn(num_dim * num_classes)}, check_gradient=True)

In [None]:
# Train logistic regression

lambda_vals = 10 ** np.linspace(-12, 2, 15)

model = {'weights': np.zeros((num_dim, num_classes))}
    
train_accuracy = np.zeros(lambda_vals.size)
test_accuracy = np.zeros(lambda_vals.size)

for i in range(lambda_vals.size):
    params = {'lambda': lambda_vals[i]}
    
    model = log_reg_train(train_data, train_labels, params, model)
    
    print("Finished learning with lambda = %e" % params['lambda'])
    
    train_predictions = linear_predict(train_data, model)
    train_accuracy[i] = np.sum(train_predictions == train_labels) / num_train

    test_predictions = linear_predict(test_data, model)
    test_accuracy[i] = np.sum(test_predictions == test_labels) / num_test

In [None]:
# Plot results of logistic regression parameter sweep

train_line = plt.semilogx(lambda_vals, train_accuracy, label="Training")
test_line = plt.semilogx(lambda_vals, test_accuracy, label="Testing")
plt.title('Cardiotocography Data')
plt.xlabel('lambda')
plt.ylabel('Logistic Regression Accuracy')
plt.legend()
plt.show()