# Binary classification using perceptron

1. Load data
2. Explore data
3. Prepare data
4. Split data
5. Feature scaling
6. Choose model and (hyper) parameters
7. Train model
8. Evaluate model
9. Make predictions (unlabeled data)


In [None]:
import os, sys, pathlib
UTILS_FOLDER = 'S00 - Utils'
curPath = os.getcwd()
parPath = pathlib.Path(curPath).parent
utilPath = os.path.join(parPath, UTILS_FOLDER)
for p in [curPath, str(parPath), utilPath]:
    sys.path.append(p)

## Load libraries


In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from utils import plot_ds

## Load data


In [None]:
# Load data from SKLearn
dataObj = datasets.load_iris()

In [None]:
dir(dataObj)

In [None]:
print(dataObj.DESCR)

In [None]:
print(dataObj.feature_names)
print(dataObj.target_names)

In [None]:
df = pd.DataFrame(dataObj.data, columns=dataObj.feature_names)
df["target"] = dataObj.target
df.head()

## Explore data


In [None]:
df.hist(figsize=(10, 10))
display()

In [None]:
df.iloc[:, 0:4].boxplot(figsize=(10, 8))
display()

## Prepare data

- Choose only petal length and petal width as features
- Eliminate one class of flower


In [None]:
# Choose subsets of columns
df2 = df.iloc[:, [2, 3, 4]]

In [None]:
df2.head()

In [None]:
df2["target"].value_counts()

In [None]:
# Filering only certain classes
filt = (df2["target"] == 0) | (df2["target"] == 1)
df3 = df2[filt].copy()
df3["target"].value_counts()

In [None]:
df3.hist(figsize=(10, 8))
display()

In [None]:
df3.plot.scatter(x="petal length (cm)", y="petal width (cm)")
display()

In [None]:
X = df3.iloc[:, [0, 1]].values
y = df3.iloc[:, 2].values

## Split data

- Train and test data
- Stratified splitting


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1, stratify=y
)

# Standardize data


In [None]:
sc = StandardScaler()

In [None]:
dir(sc)

In [None]:
sc.fit(X_train)

In [None]:
# Mean and Variance ([STD]^2)
print(sc.mean_)
print(sc.var_)

In [None]:
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [None]:
np.concatenate((X_train, X_train_std), axis=1)

In [None]:
cols = ["X_1", "X_2", "X_std_1", "X_std_2"]
temp = pd.DataFrame(np.concatenate((X_train, X_train_std), axis=1), columns=cols)
temp.head()

In [None]:
temp.boxplot()
display()

## Choose model and (hyper) parameters


In [None]:
eta0 = 0.01
random_state = 1
n_iter_no_change = 10

In [None]:
ppn = Perceptron(
    eta0=eta0,
    random_state=random_state,
    verbose=1,
    n_iter_no_change=n_iter_no_change,
)

## Train model


In [None]:
# Training
ppn.fit(X_train_std, y_train)

In [None]:
# Model weights and bias
W = ppn.coef_
w0 = ppn.intercept_
print(f"Weights are {W}.")
print(f"Bias is {w0}.")

## Evaluate model


In [None]:
# Prediction from test data
y_pred = ppn.predict(X_test_std)

In [None]:
y_pred

### Manual


In [None]:
Z = (
    X_test_std.dot(
        W.reshape(
            2,
        )
    )
    + w0
)


print(Z)

In [None]:
phi = Z
np.where(phi > 0, 1, 0)

In [None]:
# Misclassification from the test samples
sumMiss = (y_test != y_pred).sum()

In [None]:
# Accuracy score from the test samples
accuracyScore = accuracy_score(y_test, y_pred)

In [None]:
print(f"Misclassified examples: {sumMiss}")
print(f"Accuracy score: {accuracyScore}")

In [None]:
plot_ds(X_train_std, X_test_std, y_train, y_test, ppn)

## Make prediction

- Unlabeled data


In [None]:
X1_min = X[:, 0].min()
X1_max = X[:, 0].max()
X2_min = X[:, 1].min()
X2_max = X[:, 1].max()

In [None]:
X1_new = np.random.uniform(low=X1_min, high=X1_max, size=(50, 1))
X2_new = np.random.uniform(low=X2_min, high=X2_max, size=(50, 1))
X_new = np.concatenate((X1_new, X2_new), axis=1)
temp = pd.DataFrame(X_new, columns=["X1", "X2"])
temp.head()

In [None]:
temp.boxplot()
display()

In [None]:
temp.plot.scatter(x="X1", y="X2")
display()

In [None]:
X_new_std = sc.transform(X_new)

In [None]:
y_new = ppn.predict(X_new_std)
print(y_new)

In [None]:
plot_ds(X_train_std, None, y_train, None, ppn)