In [65]:
%matplotlib inline

In [83]:
import pandas as pd
from numba import jit
import seaborn as sns
from collections import defaultdict
import numpy as np

In [149]:
iris_train = pd.read_csv("../data/iris/iris_train.data", names = ["sepal_length","sepal_width","petal_length","petal_width","label"])
iris_test = pd.read_csv("../data/iris/iris_test.data", names = ["sepal_length","sepal_width","petal_length","petal_width","label"])

In [150]:
class Transformation(object):
    def __init__(self, df_train):
        self.cols_stats = defaultdict(lambda: {})
        cols = list(df_train.columns)
        cols.remove("label")
        self.cols = cols
        for col in cols:
            self.cols_stats[col]['mean'] = df_train[col].mean()
            self.cols_stats[col]['std'] = df_train[col].std(ddof=0)
        
    def transform(self, df_orig):
        df = df_orig.copy(deep = True)
        for col in self.cols:
            df[col] = (df[col] - self.cols_stats[col]['mean']) / self.cols_stats[col]['std']
        return df        

In [151]:
trans = Transformation(iris_train)
iris_train_z = trans.transform(iris_train)
iris_test_z = trans.transform(iris_test)

In [None]:
sns.pairplot(iris_test_z, hue="label")

In [238]:
class Perceptron(object):
    def __init__(self, df_train, df_test, label = "label", max_iterations = 5, eta = .01):
        self.df_train = df_train.copy(deep = True)
        self.df_test = df_test.copy(deep = True)
        self.target =  np.array(self.df_train.label == "Iris-setosa")
        self.max_iterations = max_iterations
        self.eta = eta
        self.predictors = list(self.df_train.columns)
        self.predictors.remove(label)
        np.random.seed(2)
        self.weights = np.random.rand(len(self.predictors) + 1) * 2 - 1
        self.df_train.drop('label', axis=1, inplace=True)
        self.df_test['label'] = self.df_test['label'] == "Iris-setosa"
        
    def response(self, x):
        y = self.weights.dot(x)
        return 1 if y >= 0 else 0

    def train(self):
        stop = False
        iterations = 0
        while not stop:
            for index, row in self.df_train.iterrows():
                input = np.append(np.array(row), 1)
                self.weights += self.eta * (self.target[index] - self.response(input)) * input
            print "after", self.weights, self.test_error()
            iterations += 1
            if iterations > self.max_iterations:
                stop = True
            
    def test_error(self):
        predictions = []
        for index, row in self.df_test.iterrows():
            input = np.append(np.array(row[:4]), 1)
            predictions.append(self.response(input))
        error = sum(predictions != self.df_test.label)
        return error * 1.0 / len(self.df_test)
        

In [239]:
clf = Perceptron(iris_train_z, iris_test_z)

In [240]:
clf.weights

array([-0.1280102 , -0.94814754,  0.09932496, -0.12935521, -0.1592644 ])

In [241]:
clf.train()

after [-0.2364591  -0.5375032  -0.18986885 -0.39531671 -0.0692644 ] 0.266666666667
after [-0.22169132 -0.31147836 -0.29179982 -0.48368952 -0.0592644 ] 0.0666666666667
after [-0.20154554 -0.24786972 -0.30825228 -0.49772404 -0.0692644 ] 0.0666666666667
after [-0.18139976 -0.18426108 -0.32470474 -0.51175855 -0.0792644 ] 0.0333333333333
after [-0.16387792 -0.14678938 -0.33160637 -0.51899897 -0.0992644 ] 0.0
after [-0.15587599 -0.12397419 -0.33574148 -0.52261919 -0.1092644 ] 0.0
