In [None]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Use appropriate pandas data import, insert the file path
df = pd.read_csv('')
df.head()

In [None]:
#Define X our (x,y) points and y our classifications for analysis
X = df[df.columns[1:3]].to_numpy()
y = df[df.columns[3]].to_numpy()

In [None]:
#determines the class of the value "a" as either 1, -1 based on the formula with weights
def sign(a):
    if a >= 0:
        return 1
    else:
        return -1

#function casts list for each tp, fp, tn, fn and determines accuracy
def confusion_matrix(y_predicted, y_actual, prints = False):
    true_positive = [] #create lists 
    true_negative = []
    false_positive = []
    false_negative = []
    #if the input classification vector and predicted class match or not, append the appropriate vector
    for i in range(len(y_actual)):
        if (y_actual[i] == 1) and (y_predicted[i] == 1): 
            true_positive.append(i)
        elif (y_actual[i] == -1) and (y_predicted[i] == -1):
            true_negative.append(i)
        elif (y_actual[i] == -1) and (y_predicted[i] == 1):
            false_positive.append(i)
        elif (y_actual[i] == 1) and (y_predicted[i] == -1):
            false_negative.append(i)
    #print each subset of a confusion matrix and the algorithm's overall accuracy 
    if prints:
        print("\nTrue Positive Count:", len(true_positive))
        print("False Positive Count:", len(false_positive))
        print("False Negative Count:", len(false_negative))
        print("True Negative Count:", len(true_negative))
        print("\nAccuracy:", 100*(len(true_positive) + len(true_negative))/len(y_actual), "%")
    return np.array(true_positive), np.array(false_positive), np.array(false_negative), np.array(true_negative)

#initialize weights to be a random vector of 3 coefficients
w = (2 * np.random.random_sample(3)) - 1


"""The following function takes in our X or points and an initial weight vector and outputs the predicted class 
vector based on the initial formula specified on each point in our dataframe"""

def perceptron_predict(X, w):
    y_predicted = np.zeros(X.shape[0])
    for i in range(X.shape[0]):
        y_predicted[i] = sign((w[0]*X[i,0]) + (w[1]*X[i,1]) + w[2]) #initial weight function   
    return y_predicted

""" Once we have used the initial weight vector we will now update the weights at the next iteration based on 
the formulas specified for each element of the weight vector, we will be sure to begin from an incorrectly 
classified index  """

def perceptron_update(X, y, false_indices, w_old):
    i = int(np.random.choice(false_indices))
    w0 = w_old[0] + (y[i]*X[i,0]) #updated weight function
    w1 = w_old[1] + (y[i]*X[i,1])
    w2 = w_old[2] + y[i]
    return np.array([w0, w1, w2])

"""Now that we have our initial classification function, the perceptron prediction, and the update rule in 
place, apply all functions in our perceptron and institute a maximum of 10,000 iterations. This will output
the final weights or coefficients of the final classification forumla for all our points, as well as the 
confusion matrix, and accuracy of the model"""

def perceptron_train(X, y, w_initial, max_iterations = 10000):
    iterations = 0
    w = w_initial
    
    while iterations < max_iterations:
        y_predicted = perceptron_predict(X, w)  
        true_pos, false_pos, false_neg, true_neg = confusion_matrix(y_predicted, y)
        accuracy = (len(true_pos) + len(true_neg))/len(y)
        if accuracy == 1:
            break
            
        #What are the false indicies?
        false_indices = np.concatenate((false_pos, false_neg))
        
        #Apply the weight update function defined above to reduced falsely classified points
        w = perceptron_update(X, y, false_indices, w)
        iterations += 1
        
    if accuracy == 1:
        print("Final Weights with Bias:","\n\n", w[0],"\n",w[1],"\n",w[2])
        print("\nIterations Used:", iterations)
        print("\nFinal Weight Vector:", w)
        
    else:
        print("ERROR: Unable to converge within iteration limit. Data may not be linearly separable.")
        print("\nIterations Used:", iterations)
 
    return w

In [None]:
weights = (2 * np.random.random_sample(3)) - 1
perc_train = perceptron_train(X, y, weights)
y_predicted = perceptron_predict(X, perc_train)
tp, fp, fn, tn = confusion_matrix(y_predicted, y, True)