In [2]:
import numpy as np
np.set_printoptions(threshold=10000,suppress=True)
import pandas as pd
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split

In [23]:
class Adaline(object):
    def __init__(self, n_iterations=100, learning_rate=0.01, random_state=1):
        self.n_iterations=n_iterations
        self.learning_rate=learning_rate
        self.random_state=random_state
        self.coef_ = None

    def fit(self, X, y):
        rgen = np.random.RandomState(self.random_state)
        self.coef_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])
        for _ in range(self.n_iterations):
            activation_function_output = self.activation_function(self.net_input(X))
            errors = y - activation_function_output
            self.coef_[1:] = self.coef_[1:] + self.learning_rate*X.T.dot(errors)
            self.coef_[0] = self.coef_[0] + self.learning_rate*errors.sum()

    def net_input(self, X):
        weighted_sum = np.dot(X, self.coef_[1:]) + self.coef_[0]
        return weighted_sum

    def activation_function(self, X):
        return X

    def predict(self, X):
        return np.where(self.activation_function(self.net_input(X)) >= 0.0, 1, 0)

    def reset(self):
        rgen = np.random.RandomState(self.random_state)
        self.coef_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])

    def score(self, X, y):
        if self.coef_ is None:
            self.reset()
        missclassified_data_count = 0
        for xi, target in zip(X, y):
            output = self.predict(xi)
            if (target != output):
                missclassified_data_count+=1
        total_data_count=len(X)
        self.score_ = (total_data_count - missclassified_data_count)/total_data_count
        return self.score_

In [32]:
breast = pd.read_csv('./breast-cancer-wisconsin.txt', header=None, delim_whitespace=True)
X = breast.iloc[:,:-1].values
Y = breast.iloc[:,-1].values
Y = np.where(Y == 2, 0, 1)
model = Adaline(n_iterations=10, learning_rate=0.1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
model.fit(x_train, y_train)
print(model.score(x_test, y_test))

model2 = Adaline(n_iterations=100)
print(model2.score(x_test, y_test))

0.39285714285714285
0.34285714285714286
