In [10]:
# Problems 7-10 on https://work.caltech.edu/homework/hw1.pdf

import random
import matplotlib.pyplot as plt
import numpy as np

def runs(n, run_count=1000):
    total_iterations = 0
    total_p_disagree = 0
    for i in range(0, run_count):
        iterations, p_disagree = run(n)
        total_iterations += iterations
        total_p_disagree += p_disagree
    
    avg_iterations = total_iterations / run_count
    avg_p_disagree = total_p_disagree / run_count

    return avg_iterations, avg_p_disagree

def run(n):
    line = generate_line()
    xs = []
    ys = []
    for i in range(n):
        x = generate_point()
        y = evaluate_x(line, x)
        xs.append(x)
        ys.append(y)
    # display_data(line, xs, ys)

    # initialize weights at 0
    weights = np.array([0.0] * 3) # d + 1 weights
        
    iterations = 0 # keep track of how many iterations of the PLA we have to do
    while True:
        # Predict y's with current weights, and note which ones are misclassified
        
        misclassified_points = []
        for i, x in enumerate(xs):
            x = np.array([1] + x) # Set x_0 = 1
            predicted_y = np.sign(x @ weights)
            if predicted_y != ys[i]:
                misclassified_points.append(i)

        # break if no more misclassified points
        if len(misclassified_points) == 0:
            break
        
        iterations += 1

        # Pick a random misclassified point
        misclassified_i = random.choice(misclassified_points)
        x = np.array([1] + xs[misclassified_i]) # Set x_0 = 1
        y = ys[misclassified_i]
        # update weights
        weights = weights + (y * x) 
    
    # Generate 30 random points and determine how many disagree
    misclassified_count = 0
    for i in range(0, 30):
        x = generate_point()
        y = evaluate_x(line, x)
        x = np.array([1] + x) # Set x_0 = 1
        predicted_y = np.sign(x @ weights)
        if predicted_y != y:
            misclassified_count += 1

    p_disagree = misclassified_count / 30
    return iterations, p_disagree

# Returns the intercept and slope of randomly generated line
def generate_line():
    point_1 = generate_point()
    point_2 = generate_point()
    return slope_and_intercept(point_1,point_2)

def generate_point():
    return [random.uniform(-1,1), random.uniform(-1,1)]

def slope_and_intercept(point_1, point_2):
    point_1, point_2 = sorted([point_1, point_2])
    slope = (point_2[1] - point_1[1]) / (point_2[0] - point_1[0])
    intercept = point_1[1] - (slope * point_1[0])
    return slope, intercept

# Returns 1 if above the line, -1 if on the bottom
def evaluate_x(line, x):
    slope, intercept = line
    line_height = (slope * x[0]) + intercept
    if x[1] > line_height:
        return 1
    else:
        return -1

def display_data(line, xs, ys):
    plt.axline(xy1=(0, line[1]), slope=line[0])
    for i, x in enumerate(xs):
        y = ys[i]
        if y == 1:
            marker = '+'
        else:
            marker = '_'
        plt.scatter(x[0], x[1], color='#1f77b4', marker=marker)

print(runs(10))

(10.006, 0.11023333333333342)
