In [13]:
import warnings
import numpy as np
import matplotlib.pyplot as plt



from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, classification_report, f1_score, precision_score, recall_score

In [2]:
iris = load_iris()

print("Key Values:\n", list(iris.keys()))
print("\nFeatue names: \n", list(iris.feature_names))
print("\nTarget names:\n", list(iris.target_names))

Key Values:
 ['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename']

Featue names: 
 ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

Target names:
 ['setosa', 'versicolor', 'virginica']


In [3]:
X = iris["data"][:,(2,3)]
y = (iris["target"]==2).astype(int)

print(X.shape)
print(y.shape)

(150, 2)
(150,)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
scaler = StandardScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
%%time
warnings.filterwarnings('ignore')

param_grid = {'alpha' : [0.05, 0.01, 0.001],
              'penalty' : ["l2", "l1"],
              'learning_rate' : ["constant", "optimal", "invscaling", "adaptive"],
              'eta0' : [0.1, 0.01, 0.001],
              'max_iter' : [500, 1000, 3000],
              'tol' : [1e-3, 1e-5, 1e-8]}

sgd_clf = SGDClassifier(loss='log')
sgd_clf_cv = GridSearchCV(sgd_clf, param_grid, scoring = 'f1_micro', cv=3, verbose=1, n_jobs=-1)
sgd_clf_cv.fit(X_train, y_train)

params_optimal = sgd_clf_cv.best_params_

print("\nBest Score (F1 micro): ", sgd_clf_cv.best_score_)
print("\nOptimal Hyperparameter Values: ", params_optimal)


Fitting 3 folds for each of 648 candidates, totalling 1944 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done 560 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 1944 out of 1944 | elapsed:    4.8s finished



Best Score (F1 micro):  0.9666666666666667

Optimal Hyperparameter Values:  {'alpha': 0.05, 'eta0': 0.1, 'learning_rate': 'invscaling', 'max_iter': 3000, 'penalty': 'l2', 'tol': 0.001}
CPU times: user 2.53 s, sys: 158 ms, total: 2.69 s
Wall time: 5.89 s


In [9]:
sgd = SGDClassifier(loss = 'log', **params_optimal)
sgd.fit(X_train, y_train)

SGDClassifier(alpha=0.05, eta0=0.1, learning_rate='invscaling', loss='log',
              max_iter=3000)

In [37]:
y_train_predicted = sgd.predict(X_train)
print("Train Accuracy: ", np.mean(y_train_predicted==y_train))
print("\nTrain Confusion Matrix: \n", confusion_matrix(y_train, y_train_predicted))

y_test_predicted = sgd.predict(X_test)
print("\nTest Accuracy: {:5.3f}".format(np.mean(y_test == y_test_predicted)))
print("\nTest Confusion Matrix: \n", confusion_matrix(y_test, y_test_predicted))
print("\nTest Precision = %.2f"%precision_score(y_test, y_test_predicted))
print("Test Recall = ", recall_score(y_test, y_test_predicted))
print("Test F1 Score = %.2f"%f1_score(y_test, y_test_predicted))
print("\nClassification Report: \n", classification_report(y_test, y_test_predicted))

Train Accuracy:  0.975

Train Confusion Matrix: 
 [[78  3]
 [ 0 39]]

Test Accuracy: 0.967

Test Confusion Matrix: 
 [[18  1]
 [ 0 11]]

Test Precision = 0.92
Test Recall =  1.0
Test F1 Score = 0.96

Classification Report: 
               precision    recall  f1-score   support

           0       1.00      0.95      0.97        19
           1       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30

