### Homework #2

In [41]:
import numpy as np
import random
import matplotlib.pyplot as plt
import warnings
%matplotlib inline

class Regression:
    
    def __init__(self, N, get_line = True, weights_line=np.array([0,0,0])):
        """If get_line is set to False then weights_line vector must be supplied"""
        if get_line:
            self.v1 = np.random.uniform(-1, 1, 2 )
            self.v2 = np.random.uniform(-1, 1, 2 )
            self.weights_line = self.make_line(self.v1, self.v2)
        else:
            self.weights_line = weights_line
            if np.array_equal(weights_line, np.array([0,0,0])):
                warnings.warn('Did not set weights_line vector')
                
        self.N = N
        return

    def make_line(self, v1, v2):
        """This function returns a vector for the random line formed
            return: weights of line vector
        
        """
        slope = (v2[1] - v1[1])/(v2[0] - v1[0])
        intercept = v2[1] - slope*v2[0]
        return np.array([intercept, slope, -1])
    
    def generate_data(self):
        """This function classifies randomly generated N points based on random line
        args:
               N (int) - Number of points generated
               weights (numpy array, size 3) - weights for the random line
        Return:
               data (list of length N) - randomly generated points"""
        self.data = []
        self.X = []
        self.y = []
        
        for i in range(self.N):
            x = np.concatenate((np.array([1]), np.random.uniform(-1, 1, 2 )))
            if np.dot(self.weights_line, x)> 0:
                self.data.append((x, 1))
                self.X.append(x)
                self.y.append([1])
            else:
                self.data.append((x, -1))
                self.X.append(x)
                self.y.append([-1])
        self.X = np.array(self.X)
        self.y = np.array(self.y)
        return self

    def classify(self):
        """This function implements the regression algorithm 
           args:
               data (tuple(numpy array of size 2, int))- 
               w (numpy array, size 3) - weights for the initial zero vector
           Return:
               w (numpy array, size 3) - weights for the final line
        """  
        self.w = np.linalg.inv(np.dot(self.X.T, self.X)).dot(self.X.T).dot(self.y)
        self.w = self.w.reshape(3)
        return self
    
    def insample_error(self):
        misclassified_list = []
        for (x,y) in self.data:
            if np.sign(np.dot(self.w, x)) != y:
                misclassified_list.append((x,y))
        return len(misclassified_list)/len(self.data)    
    
    def label_data(self, data):
        """This function classifies data based on weights of line
           args:
               data (tuple(numpy array of size 2, int))- 
               w (numpy array, size 3) - weights for the line
           Return:
               misclassified_list (list) - list of misclassified points
        """    
        misclassified_list = []
        for (x,y) in data:
            if np.sign(np.dot(self.w, x)) != y:
                misclassified_list.append((x,y))
        return misclassified_list
    
    def classify_perceptron(self, data):
        """This function implements the perceptron learning algorithm 
           args:
               data (tuple(numpy array of size 2, int))- 
               w (numpy array, size 3) - weights for the initial zero vector
           Return:
               w (numpy array, size 3) - weights for the final line
        """  
        misclassified_list = self.label_data(data)
        self.steps = 1
        while len(misclassified_list)!= 0:
            (xi,yi) = random.choice(misclassified_list)
            self.w = self.w + xi*yi  
            misclassified_list = self.label_data(data)
            self.steps +=1
        return self 

In [37]:
insample_error_list = [] 
outsample_error_list = []
iterations = 500
for i in range(iterations):
    x = Regression(100).generate_data().classify()
    insample_error_list.append(x.insample_error())
    y = Regression(1000, get_line = False, weights_line=x.w).generate_data().classify()
    outsample_error_list.append(y.insample_error())
    if i%100==0:
        print ("{:.2f}".format(100*(i+1)/iterations), '% -> ', end='')
print ('100 %')
print(np.mean(insample_error_list), np.mean(outsample_error_list ))

0.20 % -> 20.20 % -> 40.20 % -> 60.20 % -> 80.20 % -> 100 %
0.03974 0.030386


In [44]:
iterations = 1000
steps_list = [] 
for i in range(iterations):
    x = Regression(10).generate_data().classify()
    y = x.classify_perceptron(x.data)
    steps_list.append(y.steps)
    if i%100==0:
        print ("{:.2f}".format(100*(i+1)/iterations), '% -> ', end='')
print ('100 %')
print(np.mean(steps_list))    

0.10 % -> 10.10 % -> 20.10 % -> 30.10 % -> 40.10 % -> 50.10 % -> 60.10 % -> 70.10 % -> 80.10 % -> 90.10 % -> 100 %
5.216
