In [49]:
from argparse import Namespace
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets 
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [50]:
args = Namespace(
    seed=1234,
    train_size=0.75,
    test_size=0.25,
    num_iter=100,
)

In [51]:
dataset = datasets.load_iris(as_frame=True)
print("Features:", dataset.feature_names)
X_train, X_test, y_train, y_test = train_test_split(
    dataset.data, dataset.target, train_size=args.train_size, test_size=args.test_size, random_state=args.seed)

Features: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [52]:
X_scaler = StandardScaler().fit(X_train) # standardize the train and test sets
#y_scaler = StandardScaler().fit(y_train.values.reshape(-1,1)) 
standardized_X_train = X_scaler.transform(X_train)
#standardized_y_train = y_scaler.transform(y_train.values.reshape(-1,1)).ravel()
standardized_X_test = X_scaler.transform(X_test)
#standardized_y_test = y_scaler.transform(y_test.values.reshape(-1,1)).ravel()

In [53]:
log_reg = SGDClassifier(loss="log", penalty="none", max_iter=args.num_iter, 
                        random_state=args.seed)
log_reg.fit(X=standardized_X_train, y=y_train)

SGDClassifier(loss='log', max_iter=100, penalty='none', random_state=1234)

In [54]:
pred_train = log_reg.predict(standardized_X_train) 
pred_test = log_reg.predict(standardized_X_test)
train_acc = accuracy_score(y_train, pred_train)
test_acc = accuracy_score(y_test, pred_test)
print ("train acc: {0:.2f}, test acc: {1:.2f}".format(train_acc, test_acc))

train acc: 0.96, test acc: 0.97


In [55]:
cm = confusion_matrix(y_test, pred_test)
cm

array([[13,  0,  0],
       [ 0, 15,  0],
       [ 0,  1,  9]])