In [1]:
import pandas as pd
import numpy as np
import collections

In [2]:
def construct_pandas_frame(html, attributes):
    '''
    Creats a pandas dataframe from a csv like data format from a csv
    Also assumes that the header is not in the csv representation nor the index name
    :param: html - string for the location of the csv like data on website
    :param: attributes - list of strings of the given data set in order
    :returns: pandas dataframe
    '''
    df = pd.read_csv(html, header = None)
    df.columns = attributes
    return df
    
    

In [3]:
class Perceptron(object):
    def __init__(self, eta, iters):
        '''
        simple constructor function
        :param: _iter - int, number of iterations
        :param: eta - int, acts as a scalar how much to change weights by
        '''
        self.eta = eta
        self.iters = iters
    
    def learn(self, row_vectors, output_vectors):
        '''
        Moves through each row of attributes, and finds a prediction.
        Then if necessary, updates the weights to see if 
        '''
        #Generate random number for the length of all rows
        generator = np.random.RandomState(1)
        
        #Because the output_vector and row_vector sizes are equal we just pick one to find size
        self.weights = generator.normal(loc=0.0, scale=.01, size = len(row_vectors[0])+1)
        for iter in range(self.iters):
            error = 0 #initializes error counter to be zero of iter
            
            for row_vector, output_vector in zip(row_vectors, output_vectors):
               
                #create a prediction using the weights and given row_vector
                prediction = self.predict(row_vector)
                error = error + np.where(output_vector == prediction,0,1) 
                for j in range(len(row_vectors[0])):
                    self.weights[j] = self.weights[j] + self.eta * (output_vector - prediction) * row_vector[j]
            print(f"error: {error}, weights {self.weights}")
            
    def predict(self, row_vector):
        '''
        Takes a row_vector and uses dot product across weights, if output is positive
        scales the prediction to be 1, else zeros
        :param: row_vector - vector, that contains attribute data about single sample
        :returns: a prediction as a 1 or -1
        '''
        input = np.dot(row_vector, self.weights[1:] + self.weights[0])
        prediction = np.where(input>=0, 1, -1)
        return prediction
        

In [4]:
html = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
attributes = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = construct_pandas_frame(html, attributes)


In [5]:
x = df.iloc[0:100,[0,2]].values #row vector
y = df.iloc[0:100,4].values #output vectors
y = np.where(y=='Iris-setosa',1,-1)
verify_count = collections.Counter(y) # This is just a quick check we have 50 setosa, 50 not setosa
verify_count
len(x[0])
#model = perceptron(eta = 0.1, iter =10)
#model.learn(x,y)

2

In [6]:
model = Perceptron(eta= .1, iters = 10)
model.learn(x,y)

error: 1, weights [-1.38375655 -0.94611756 -0.00528172]
error: 3, weights [-0.78375655 -1.32611756 -0.00528172]
error: 3, weights [-0.18375655 -1.70611756 -0.00528172]
error: 3, weights [ 0.41624345 -2.08611756 -0.00528172]
error: 3, weights [ 0.89624345 -2.52611756 -0.00528172]
error: 2, weights [ 0.51624345 -3.18611756 -0.00528172]
error: 3, weights [ 1.11624345 -3.56611756 -0.00528172]
error: 3, weights [ 1.71624345 -3.94611756 -0.00528172]
error: 3, weights [ 2.31624345 -4.32611756 -0.00528172]
error: 4, weights [  1.81624345e+00  -5.64611756e+00  -5.28171752e-03]
