# Softmax Regression

In [1]:
import numpy as np

class SoftmaxRegression:
    def __init__(self, learning_rate=0.1, epochs=1000):
        self.epochs = epochs
        self.learning_rate = learning_rate
        
    def fit(self, X, y):
        N, self.p = X.shape
        if y.shape[0] != N:
            raise ValueError
        self.k = int(np.max(y)) + 1
        self.beta = np.random.rand(self.p + 1, self.k)
        X = self._pad_ones(X)
        X_ = np.expand_dims(X, axis=2)
        X_ = np.transpose(X_, (1, 2, 0))

        for _ in range(self.epochs):
            y_hat = self._predict_probs(X)
            y_hat[np.arange(N), y] -= 1.0
            y_hat = np.expand_dims(y_hat, axis=2)
            y_hat = np.swapaxes(y_hat, 0, 2)
            grad = np.sum(X_ * y_hat, axis=2) / N
            self.beta -= self.learning_rate * grad            

    def _predict_probs(self, X):
        tmp = np.exp(np.dot(X, self.beta))
        return tmp / np.expand_dims(np.sum(tmp, axis=1), axis=1)
        
    def predict(self, X):
        if X.shape[1] != self.p:
            raise ValueError
        X = self._pad_ones(X)
        return np.argmax(self._predict_probs(X), axis=1)        
        
    def _pad_ones(self, X):
        N = X.shape[0]
        return np.concatenate([np.ones([N, 1]), X], axis=1)

In [2]:
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split

In [3]:
X, y = load_iris(return_X_y=True)
X = normalize(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
clf = SoftmaxRegression(epochs=10000, learning_rate=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("accuracy: {:f}".format(accuracy))

accuracy: 0.966667
