In [1]:
import warnings
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score

In [2]:
iris = load_iris()

print(iris.keys())
print(iris.feature_names)
print(iris.target_names)
print(iris.data.shape)

#print(iris.DESCR)


dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']
(150, 4)


In [3]:
X = iris["data"][:, (2, 3)]  # petal length, petal width
y = iris["target"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [5]:
%%time

warnings.filterwarnings('ignore')


param_grid = {'alpha': [0.05, 0.01, 0.001],
              'penalty' : ["l2"],
              'learning_rate': ["constant", "optimal", "invscaling", "adaptive"], 
              'max_iter':[500, 1000, 3000, 7000],
              'eta0': [0.1, 0.01, 0.001],
              'tol': [1e-3, 1e-5, 1e-8],
              'loss': ['hinge', 'log', 'modified_huber']}

  
sgd_clf = SGDClassifier()

sgd_clf_cv = GridSearchCV(sgd_clf, param_grid, scoring='accuracy', cv=3, verbose=1, n_jobs=-1)
sgd_clf_cv.fit(X_train, y_train)

params_optimal = sgd_clf_cv.best_params_

print("Best Score (accuracy): %f" % sgd_clf_cv.best_score_)
print("Optimal Hyperparameter Values: ", params_optimal)
print("\n")

Fitting 3 folds for each of 1296 candidates, totalling 3888 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 3810 tasks      | elapsed:   20.1s


Best Score (accuracy): 0.975000
Optimal Hyperparameter Values:  {'alpha': 0.001, 'eta0': 0.01, 'learning_rate': 'optimal', 'loss': 'hinge', 'max_iter': 500, 'penalty': 'l2', 'tol': 0.001}


CPU times: user 2.32 s, sys: 109 ms, total: 2.43 s
Wall time: 20.7 s


[Parallel(n_jobs=-1)]: Done 3888 out of 3888 | elapsed:   20.6s finished


In [6]:
sgd = SGDClassifier(**params_optimal)
sgd.fit(X_train, y_train)

SGDClassifier(alpha=0.001, eta0=0.01, max_iter=500)

In [8]:
print("\nNo. of Iterations:", sgd.n_iter_ )

print("\nWeight Coefficients:\n", sgd.coef_ )

print("\nWeight Intercept:\n", sgd.intercept_ )


y_test_predict = sgd.predict(X_test)
#print(y_test_predict)

accuracy_score_test = np.mean(y_test_predict == y_test)
print("\nTest Accuracy: ", accuracy_score_test)

# Confusion Matrix
print("\nTest Confusion Matrix (Test Data):\n", confusion_matrix(y_test, y_test_predict))


No. of Iterations: 39

Weight Coefficients:
 [[-10.11586678  -5.45377165]
 [  2.2839089   -9.00732609]
 [ 10.0271207   10.60362867]]

Weight Intercept:
 [ 29.52254559  -6.01173558 -65.87214375]

Test Accuracy:  0.8

Test Confusion Matrix (Test Data):
 [[10  0  0]
 [ 0  3  6]
 [ 0  0 11]]


In [9]:
%%time

warnings.filterwarnings('ignore')

param_grid = {'alpha': [0.05, 0.01, 0.001],
              'penalty' : ["l2", "l1"],
              'learning_rate': ["constant", "optimal", "invscaling", "adaptive"], 
              'max_iter':[100, 500, 1000, 3000, 7000],
              'eta0': [0.01, 0.001],
              'tol': [1e-3, 1e-5, 1e-8],
              'loss': ['hinge', 'log', 'modified_huber']}


sgd_clf = SGDClassifier(early_stopping=True)

sgd_clf_cv = GridSearchCV(sgd_clf, param_grid, scoring='accuracy', cv=3, verbose=1, n_jobs=-1)
sgd_clf_cv.fit(X_train, y_train)

params_optimal = sgd_clf_cv.best_params_

print("Best Score (accuracy): %f" % sgd_clf_cv.best_score_)
print("Optimal Hyperparameter Values: ", params_optimal)
print("\n")

Fitting 3 folds for each of 2160 candidates, totalling 6480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 200 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 1400 tasks      | elapsed:   11.2s
[Parallel(n_jobs=-1)]: Done 3400 tasks      | elapsed:   27.7s
[Parallel(n_jobs=-1)]: Done 6200 tasks      | elapsed:   50.3s


Best Score (accuracy): 0.950000
Optimal Hyperparameter Values:  {'alpha': 0.05, 'eta0': 0.001, 'learning_rate': 'optimal', 'loss': 'hinge', 'max_iter': 3000, 'penalty': 'l2', 'tol': 0.001}


CPU times: user 5.31 s, sys: 181 ms, total: 5.5 s
Wall time: 55.3 s


[Parallel(n_jobs=-1)]: Done 6480 out of 6480 | elapsed:   55.2s finished


In [10]:

sgd_early_stopping = SGDClassifier(**params_optimal, early_stopping=True)


sgd_early_stopping.fit(X_train, y_train)

SGDClassifier(alpha=0.05, early_stopping=True, eta0=0.001, max_iter=3000)

In [11]:
print("\nNo. of Iterations:", sgd_early_stopping.n_iter_ )


print("\nWeight Coefficients:\n", sgd_early_stopping.coef_ )

print("\nWeight Intercept:\n", sgd_early_stopping.intercept_ )


y_test_predict = sgd_early_stopping.predict(X_test)
#print(y_test_predict)

accuracy_score_test = np.mean(y_test_predict == y_test)
print("\nTest Accuracy: ", accuracy_score_test)

# Confusion Matrix
print("\nTest Confusion Matrix (Test Data):\n", confusion_matrix(y_test, y_test_predict))


No. of Iterations: 6

Weight Coefficients:
 [[-1.26131563 -0.4722317 ]
 [ 0.44481179 -0.65503106]
 [ 0.67635766  1.13944939]]

Weight Intercept:
 [ 3.54596945 -1.7012629  -5.70425702]

Test Accuracy:  1.0

Test Confusion Matrix (Test Data):
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [12]:
print("\nWeight Coefficients (Without Early Stopping):\n", sgd.coef_ )
print("\nWeight Coefficients (Early Stopping):\n", sgd_early_stopping.coef_)

print("\n")
print("\nNo. of Iterations (Without Early Stopping):", sgd.n_iter_ )
print("\nNo. of Iterations (Early Stopping):", sgd_early_stopping.n_iter_ )


Weight Coefficients (Without Early Stopping):
 [[-10.11586678  -5.45377165]
 [  2.2839089   -9.00732609]
 [ 10.0271207   10.60362867]]

Weight Coefficients (Early Stopping):
 [[-1.26131563 -0.4722317 ]
 [ 0.44481179 -0.65503106]
 [ 0.67635766  1.13944939]]



No. of Iterations (Without Early Stopping): 39

No. of Iterations (Early Stopping): 6
