In [7]:
import numpy as np
import pandas as pd

In [8]:
class Preprocess:
    def __init__(self,filename):
        self.df = pd.read_csv(filename, header=None)
        self.df.insert(0,"ones",np.ones((len(self.df),1)),True)
    
    def train_test_split(self):
        rows = len(self.df.index)
        cols = len(self.df.columns)
        split = 0.7
        split_ind = int(split*rows)
        shuffled_ind = np.random.permutation(rows)
        shuffled_df = self.df.take(shuffled_ind)

        x_train = shuffled_df.iloc[:split_ind,:cols-1]
        y_train = shuffled_df.iloc[:split_ind,cols-1]
        x_test = shuffled_df.iloc[split_ind:,:cols-1]
        y_test = shuffled_df.iloc[split_ind:,cols-1]
        
        return (x_train, y_train, x_test, y_test)

In [9]:
class LinearPerceptron:
    def __init__(self,x_train,y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.X = x_train.to_numpy()
        self.Y = y_train.to_numpy()
        self.coef = np.zeros((len(self.X[0])))
        self.train_err = []
    
    def predict(self,x):
        return np.array([1 if k>0 else 0 for k in np.dot(x,self.coef)])
    
    def fit(self,epochs):
        print("Running Linear Perceptron")
        N = len(self.X)
        arr = []
        i = 0
        m = 0
        while True:
            m = 0
            
            for j in range(self.X.shape[0]):
                if i>epochs:
                    break
                temp = np.dot(self.coef, self.X[j])
                curr_y = 1 if temp > 0 else 0
                if self.Y[j] != curr_y:
                    self.coef = self.coef + (self.Y[j]-curr_y)*self.X[j]
                    m=m+1
                i=i+1
            
            if i>epochs or m==0:
                break
            
        print("Finished Linear Perceptron - misclassifications : {}".format(m))
        return self.coef

In [10]:
def find_err(lp_curr, x_curr, y_curr):
    error = 0
    N = y_curr.shape[0]
    predicted = lp_curr.predict(x_curr)
    for i in range(N):
        if predicted[i]!=y_curr[i]:
            error = error + 1
    return (error/N)*100

In [11]:
prep_data1 = Preprocess("dataset_LP_1.txt")
(x_train1, y_train1, x_test1, y_test1) = prep_data1.train_test_split()
lp1 = LinearPerceptron(x_train1, y_train1)
lp1.fit(1000000)
print("Training Accuracy is : {}%".format(100-find_err(lp1,x_train1.to_numpy(),y_train1.to_numpy())))
print("Testing Accuracy is : {}%".format(100-find_err(lp1,x_test1.to_numpy(),y_test1.to_numpy())))

Running Linear Perceptron
Finished Linear Perceptron - misclassifications : 12
Training Accuracy is : 98.02083333333333%
Testing Accuracy is : 97.0873786407767%


In [12]:
prep_data2 = Preprocess("dataset_LP_2.csv")
(x_train2, y_train2, x_test2, y_test2) = prep_data2.train_test_split()
lp2 = LinearPerceptron(x_train2, y_train2)
lp2.fit(1000000)
print("Training Accuracy is : {}%".format(100-find_err(lp2,x_train2.to_numpy(),y_train2.to_numpy())))
print("Testing Accuracy is : {}%".format(100-find_err(lp2,x_test2.to_numpy(),y_test2.to_numpy())))

Running Linear Perceptron
Finished Linear Perceptron - misclassifications : 0
Training Accuracy is : 100.0%
Testing Accuracy is : 100.0%
