In [4]:
%run ..\convention.ipynb



# Task 

Implement Batch Gradient Descent with early stopping for Softmax Regression
(without using Scikit-Learn).

# Solution

In [33]:
from sklearn.base import BaseEstimator, TransformerMixin
def softmax(arr):
    res = np.exp(arr - arr.max(axis = 1, keepdims = True))
    return res / res.sum(axis = 1, keepdims = True)

class SoftmaxRegression(BaseEstimator, TransformerMixin):
    def __init__(self, alpha = 0, n_iter = 200, warn_start = False, RandomState = 0, learning_rate = .001):
        self.alpha = alpha
        self.warn_start = warn_start
        self.RandomState = RandomState
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.coefs_ = None
        self.intercept_ = None
    def fit(self, X, y):
        M, N = X.shape
        C = len(np.unique(y))
        y = pd.get_dummies(y).values
        
        if self.warn_start == False or self.coefs_ is None:
            r = np.random.RandomState(self.RandomState)
            self.coefs_ = np.random.rand(C, N)
            self.intercept_ = np.random.rand(C)
        
        W, b = self.coefs_, self.intercept_
        for _ in range(self.n_iter):
            z = X @ W.T + b
            a = softmax(z)
            error = a - y
            dW = np.array([np.sum(X * error[:, [i]], axis = 0) for i in range(C)])
            b -= self.learning_rate * (self.alpha * b + error.sum(0))
            W -= self.learning_rate * (self.alpha * W + dW)
        
        self.coefs_ = W
        self.intercept_ = b
        self.cost = -np.sum(y * np.log(a + 1e-10)) #cross-entropy
        return self
    
    def predict(self, X):
        return softmax(X @ self.coefs_.T + self.intercept_).argmax(axis = 1)
        
        
        
        
    
        
        
            
        
        

In [16]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state = 42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [61]:
from sklearn.metrics import accuracy_score
clf = SoftmaxRegression(warn_start = True, n_iter = 1, learning_rate = 0.1)
prev_test_score = 0
while True:
    best_coefs_ = clf.coefs_
    best_intercept_ = clf.intercept_
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    cur_test_score = accuracy_score(y_pred, y_test)
    print(clf.cost, '|', cur_test_score)    
    if cur_test_score < prev_test_score: break
    prev_test_score = cur_test_score

clf.coefs_ = best_coefs_
clf.intercept_ = best_intercept_

116.01424929841798 | 0.7894736842105263
144.78743791474085 | 0.8947368421052632
38.03764969782503 | 0.9736842105263158
20.296143984362466 | 1.0
13.259509375107395 | 1.0
11.769200722462479 | 1.0
11.638915411681355 | 0.9736842105263158


In [63]:
from sklearn.metrics import accuracy_score
y_pred = clf.predict(X_test)
print('Test score: %.2f' % accuracy_score(y_pred, y_test))
print('Train score: %.2f' % accuracy_score(clf.predict(X_train), y_train))

Test score: 0.97
Train score: 0.96


# Scikit learning implementation

In [64]:
from sklearn.linear_model import LogisticRegression
#by default, LogisticRegression is implemented in skclearn by One-vs-All method
#to use Softmax regression, set the keyword multi_class = 'multinomial'

clf = LogisticRegression(multi_class='multinomial', solver = 'lbfgs', C = 10)
clf.fit(X_train, y_train)


LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [65]:
'Train score: {}'.format(clf.score(X_train, clf.predict(X_train)))

'Train score: 1.0'

In [66]:
'Test score: {}'.format(clf.score(X_test, clf.predict(X_test)))

'Test score: 1.0'