In [None]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt

# scikit-learn imports
from sklearn import datasets                          # to import iris dataset
from sklearn.model_selection import train_test_split  # to split data into training/test sets
from sklearn.preprocessing import StandardScaler      # for feature scaling
from sklearn.linear_model import Perceptron           # to train perceptron model
from sklearn.metrics import accuracy_score            # to calculate accuracy metrics

# import plot_decision_regions function
from helper import plot_decision_regions

In [None]:
# load iris dataset
iris = datasets.load_iris()

# assign matrix X and vector y
X = iris.data[:, [2, 3]]
y = iris.target

# print class labels
print('Class labels:', np.unique(y))

In [None]:
# split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1, stratify=y)

In [None]:
# verify proportion of class labels are the same in training and test sets
print('Labels counts in y:', np.bincount(y))
print('Labels counts in y_train:', np.bincount(y_train))
print('Labels counts in y_test:', np.bincount(y_test))

In [None]:
# initialize and fit object
sc = StandardScaler()
sc.fit(X_train)

# standardize features
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [None]:
# initialize and train perceptron
ppn = Perceptron(max_iter=40, eta0=0.1, random_state=1)
ppn.fit(X_train_std, y_train)

In [None]:
# make predictions
y_pred = ppn.predict(X_test_std)

# print number of misclassified samples
print(f'Misclassified samples: {(y_test != y_pred).sum()}')

In [None]:
# print classification accuracy of test set predictions ...
# ... using accuracy_score
print(f'Accuracy: {accuracy_score(y_test, y_pred):.2f}')

In [None]:
# print classification accuracy of test set predictions ...
# ... using pnn.score
print(f'Accuracy: {ppn.score(X_test_std, y_test):.2f}')

In [None]:
# combine training and test datasets
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

# plot decision regions
plt.figure(figsize=(10, 10))
plot_decision_regions(X=X_combined_std,
                      y=y_combined,
                      classifier=ppn, 
                      test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()