In [1]:
from pdb import set_trace as debugger
from tqdm import tqdm
from math import sqrt
import numpy as np
import pandas
    
def upload_cifar(filename):
    import cPickle
    fo = open(filename, 'rb')
    data = cPickle.load(fo)
    fo.close()

    features = data['data']
    labels = data['labels']
    labels = np.atleast_2d( labels ).T 
    
    # squash classes 0-4 into class 0, and squash classes 5-9 into class 1
    labels[ labels < 5 ] = 0 
    labels[ labels >= 5 ] = 1 
    return features, labels

def upload_iris(filename):
    data = pandas.read_csv(filename)
    m = data.as_matrix()
    labels = m[:,0]
    labels[ labels==2 ] = 1  # squash class 2 into class 1
    labels = np.atleast_2d( labels ).T 
    features = m[:,1:5]
    return features, labels

### Perceptron class; it mainly trains the weights for us to use later
class Perceptron:
    
    def __init__(self, features, labels, c = 1, theta = 0):
        self.c = c
        self.theta = theta
        # Add a bias input into every instance
        self.features = np.insert(features, features.shape[1], 1, axis=1)
        self.labels = labels  
        # Create a (1,n+1) matrix of weights, +1 for bias input
        self._w = np.matrix(np.random.rand(features.shape[1] + 1))
        # L2 norm change list
        self.l2_change = [sqrt(self._w.dot(self._w.T).item())]
        
    def run(self):
        for _ in tqdm(xrange(100)):
            for instance, label in zip(self.features, self.labels):
                # Get our prediction 
                z_pred = int(self._w.dot(instance).item() > self.theta)
                # Compare to label and adjust weights if necessary
                lbl = label.item()
                if lbl != z_pred: 
                    self.adjust_weights(lbl - z_pred, instance)
            # Compute and save the L2 norm for an epoch
            self.l2_change.append(sqrt(self._w.dot(self._w.T).item()))
            

    def adjust_weights(self, learning_direction, instance):
        weight_changes = np.multiply(instance, (self.c * learning_direction))
        self._w = np.add(self._w, weight_changes)


In [None]:
### Plotting functions ###
import matplotlib.pyplot as plt

def plot_accuracy(dataset_name, models, test_features, test_labels):
    pass

def plot_l2_norm(dataset_name, models):
    pass



In [2]:
#### Run perceptron on IRIS dataset #####
from collections import namedtuple

# Namedtuple  to hold perceptron data
ntuple = namedtuple("Perceptron", ['learning_rate', 'weights', 'l2_change'])

iris_models = list()
features, labels = upload_iris("Fisher.csv")
for l_rate in [1, 0.1, 0.01]:
    # Run perceptron
    prcp = Perceptron(features, labels, c=l_rate)
    prcp.run()
    # Save the model data to plot later
    iris_models.append(ntuple(l_rate, prcp._w, prcp.l2_change))



100%|██████████| 100/100 [00:00<00:00, 278.71it/s]
100%|██████████| 100/100 [00:00<00:00, 276.75it/s]
100%|██████████| 100/100 [00:00<00:00, 271.35it/s]


In [None]:
%matplotlib inline

### Plot IRIS model data ###

test_features, test_labels = features, labels
plot_accuracy('IRIS', iris_models, test_features, test_labels)
plot_l2_norm('IRIS', iris_models)

In [3]:
#### Run perceptron on CIFAR-10 dataset #####

import os
from collections import defaultdict

cifar_models = defaultdict(list)
BASE_DIR = 'cifar-10-batches-py'

for f_name in filter(lambda x: 'data' in x, os.listdir(BASE_DIR)):
    # Get relative path to file
    f_path = os.path.join(BASE_DIR, f_name)
    features, labels = upload_cifar(f_path)
    for l_rate in [0.001, 0.00001]:
        prcp = Perceptron(features, labels, c=l_rate)
        prcp.run()
        # Save the model data to plot later
        cifar_models[f_path].append(ntuple(l_rate, prcp._w, prcp.l2_change))


100%|██████████| 100/100 [00:50<00:00,  2.02it/s]
100%|██████████| 100/100 [00:47<00:00,  2.09it/s]
100%|██████████| 100/100 [00:52<00:00,  1.78it/s]
100%|██████████| 100/100 [00:50<00:00,  2.15it/s]
100%|██████████| 100/100 [00:52<00:00,  2.13it/s]
100%|██████████| 100/100 [00:53<00:00,  2.06it/s]
100%|██████████| 100/100 [00:52<00:00,  2.03it/s]
100%|██████████| 100/100 [00:50<00:00,  2.08it/s]
100%|██████████| 100/100 [00:57<00:00,  1.91it/s]
100%|██████████| 100/100 [01:00<00:00,  1.14it/s]


In [None]:
%matplotlib inline
### Plot CIFAR-10 model data ###

for f_path, models in cifar_models.iteritems():
    test_features, test_labels = None, None # There's a test batch there...
    plot_accuracy(f_path, models, test_features, test_labels)
    plot_l2_norm(f_path, models)

