In [30]:
# Import modules for helper functions.

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
sns.set_context('notebook')
%matplotlib inline


In [31]:
# Helper function to create random points for the training set.

def PointGenerator(M):
    x = []
   
    for i in range(M):
        x_value = random.uniform(-1, 1)
           
        y_value = random.uniform(-1, 1)
                    
        new = [x_value, y_value]  
                
        x.append(new)
    
    return x

In [32]:
# Helper Function to create target function. Returns slope and y - intercept of line to be used later.

def LineGenerator():
    first_x_point = random.uniform(-1, 1)
    
    second_x_point = random.uniform(-1, 1)
    
    first_y_point = random.uniform(-1, 1)
    
    second_y_point = random.uniform(-1, 1)
    
    first_point = [first_x_point, first_y_point]
    second_point = [second_x_point, second_y_point]
    
    slope = (first_point[1] - second_point[1]) / (first_point[0] - 
             second_point[0])   
    intercept = first_y_point - slope * first_x_point
    
    
    return(slope, intercept)

In [33]:
# Perceptron Algorithm on N training data points.

def Perceptron(N):
    # Initialize our training set.
    training_set = []

    # Generate a list of N points.
    x = PointGenerator(N)
    
    # Create the target function.
    line = LineGenerator()

    # Extract information from target function for labeling.
    slope = line[0] 
    intercept = line[1]
    
    # Label each random point with a value of 1 if above or on line and -1 if below it.
    for i in range(N):
        if x[i][1] >= x[i][0] * slope + intercept:
            output = 1
        else:
            output = -1
        # Create artificial coordinate x0 and put output at end of each point to create a 4-tuple.
        training_set.append((1, x[i][0], x[i][1], output))
    
    # Initialize weight vector to 0.
    weight = [0, 0, 0]  
    
    # Initialize number of iterations to 0.
    iterations = 0
    
    # Initialize misclassified list and put all points in at start since they all start misclassified.
    misclassified = []
    for i in range(N):
        misclassified.append(training_set[i])
    
    # While there are still misclassified points, choose a random one, 
    # update the weight vector accordingly, empty the misclassified list, and then
    # check to see if any points are still misclassified while keeping track of iterations.
    while (misclassified != []):
        selection = random.choice(misclassified)
        weight[0] += (selection[0] * selection[3])
        weight[1] += (selection[1] * selection[3])
        weight[2] += (selection[2] * selection[3])
        misclassified = []
        for i in range(N):
            if np.sign(training_set[i][0] * weight[0] + 
               training_set[i][1] * weight[1] + training_set[i][2] * weight[2]) != training_set[i][3]:
                misclassified.append(training_set[i])
        iterations += 1
        
    
    # Initialize counter for disagreement values.   
    counter = 0
    
    # Generate large amountof out of sample data points.
    list_points = PointGenerator(10**4)
    
    # For each point in the out of sample group, see if it is above or below line and give value accordingly.
    for point in list_points:
        if point[1] >= slope * (point[0]) + intercept:
            output = 1
        else:
            output = -1
        point.append(output)
    
   
       
    # For each point in the out of sample group, see if our weight vector gives same value that was given with 
    # target function. If not, add one to counter.
    for point in list_points:    
        if (np.sign(weight[0] + weight[1] * point[0] + weight[2] * point[1]) != point[2]):
            counter += 1

    # Find number incorrect over total number of points.
    disagreement = counter / 10**4   
    
    return(iterations, disagreement)
        

In [34]:
# Run 1000 times for 10 training data points. Calculate average iterations and disagreement.
average_iterations = 0
average_disagreement = 0
for i in range(1000):
    result = Perceptron(10)
    average_iterations += result[0]
    average_disagreement += result[1]
average_iterations = average_iterations / 1000
average_disagreement = average_disagreement / 1000
print('iterations = %s' % average_iterations)
print('disagreement = %s' %average_disagreement)


iterations = 13.305
disagreement = 0.10931250000000017


In [35]:
# Run algorithm 1000 times for 100 training data points. Calculate average iterations and disagreement.
average_iterations = 0
average_disagreement = 0
for i in range(1000):
    result = Perceptron(100)
    average_iterations += result[0]
    average_disagreement += result[1]
average_iterations = average_iterations / 1000
average_disagreement = average_disagreement / 1000
print('iterations = %s' % average_iterations)
print('disagreement = %s' % average_disagreement)

iterations = 106.721
disagreement = 0.01364059999999999
