In [73]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
class Perceptron:
    def __init__(self, eta=0.01, n_iter=10): # eta : learning rate
        self.eta    = eta
        self.n_iter = n_iter
    
    def fit(self, X, y): # X: data matrix, y: target values
        n_rows, n_factors = shape(X)
        self.weights = np.zeros(n_factors + 1)
        self.errors = [] # keep track of error count per iter
        
        for i in n_iter:
            error_count = 0 
            for row, target_value in zip(X, y): # zip matches each row with the target_value
                update = self.eta * (target - self.predict(row)) # when the prediction and target match, the elem in update is 0; otherwise update causes the weights to move in the opposite direction of the prediction                
                self.weight += update * np.insert(row, 0, 1) # add 1 to the front of row, so that the constant also gets updated # syntax: np.insert(array, index, same_type_values_to_be_inserted) => vector (matrix will be flattened)
                error_count += int(update != 0), # increment error count if prediction is wrong
            self.errors.append(error_count)
        return self
    
    # return 1 or -1
    def predict(self, row):
        return np.where(self.net_input(row) >= 0, 1, -1) # syntax: np.where(cond, value_when_true, value_when_false) => array; if net > 0, assign positive class
    
    # returns a float value
    def net_input(self, row):
        return np.dot(row, self.weight[1:]) + self.weight[0] # dot product plus constant
        

In [97]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
df.columns = ['sepal_length','sepal_width', 'petal_length','petal_width', 'class'] # or set names = ['a','b','c','d','e'] when creating df
df.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [105]:
y = np.where(df.iloc[0:100, 4] == 'Iris-setosa', 1, -1) # syntax: iloc for accessing by index, loc by accessing by values - which can be set by pd.set_index, pd.set_headers
X = df.iloc[0:100, 0:2].values # values return array instead of dataframe
plt.scatter()

array([[5.1, 3.5],
       [4.9, 3. ],
       [4.7, 3.2],
       [4.6, 3.1],
       [5. , 3.6],
       [5.4, 3.9],
       [4.6, 3.4],
       [5. , 3.4],
       [4.4, 2.9],
       [4.9, 3.1],
       [5.4, 3.7],
       [4.8, 3.4],
       [4.8, 3. ],
       [4.3, 3. ],
       [5.8, 4. ],
       [5.7, 4.4],
       [5.4, 3.9],
       [5.1, 3.5],
       [5.7, 3.8],
       [5.1, 3.8],
       [5.4, 3.4],
       [5.1, 3.7],
       [4.6, 3.6],
       [5.1, 3.3],
       [4.8, 3.4],
       [5. , 3. ],
       [5. , 3.4],
       [5.2, 3.5],
       [5.2, 3.4],
       [4.7, 3.2],
       [4.8, 3.1],
       [5.4, 3.4],
       [5.2, 4.1],
       [5.5, 4.2],
       [4.9, 3.1],
       [5. , 3.2],
       [5.5, 3.5],
       [4.9, 3.1],
       [4.4, 3. ],
       [5.1, 3.4],
       [5. , 3.5],
       [4.5, 2.3],
       [4.4, 3.2],
       [5. , 3.5],
       [5.1, 3.8],
       [4.8, 3. ],
       [5.1, 3.8],
       [4.6, 3.2],
       [5.3, 3.7],
       [5. , 3.3],
       [7. , 3.2],
       [6.4, 3.2],
       [6.9,