In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression

In [2]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer

train = fetch_20newsgroups(subset='train', categories=['alt.atheism', 'talk.religion.misc'])
vectorizer = CountVectorizer(stop_words="english", min_df=5)
vectors = np.asarray(vectorizer.fit_transform(train.data).todense())

In [288]:
def softmax(z):
    return np.exp(z) / np.exp(z).sum(1)[:, np.newaxis]

def crossentropy(x, y):
    ce = np.log((x * y).sum(1))
    return -ce.sum()

class SoftmaxRegression(object):
    def __init__(self, lr=0.1, epochs=10):
        self.lr = lr
        self.epochs = epochs

    def fit(self, X, y):
        self.w = np.random.uniform(low=-1., high=1., size=(X.shape[1], y.shape[1]))
        self.b = np.random.uniform(low=-1., high=1., size=(y.shape[1]))

        for _ in range(self.epochs):
            # print(self.w)
            z = X.dot(self.w) + self.b
            z = softmax(z)
            loss = crossentropy(z, y)

            # Calculate gradients
            dW = X.T.dot(z - y) / X.shape[0]
            db = z.sum(0) / X.shape[0]

            self.w = self.w - self.lr * dW
            self.b = self.b - self.lr * db
            print(loss/X.shape[0])
            print(np.argmax(z, 1))

    def predict_proba(self, X):
        z = X.dot(self.w) + self.b
        z = softmax(z)
        return z

In [289]:
x = np.random.uniform(low=-1., high=1., size=(5, 6))
y = np.array([[0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0]])

In [290]:
lr = SoftmaxRegression(epochs=200)
lr.fit(x, y)

2.429505242892496
[5 5 3 5 2]
2.388285287814952
[5 5 3 5 2]
2.3481697336451846
[5 5 3 5 2]
2.3091130084688345
[5 5 3 5 2]
2.2710716801991024
[5 5 3 5 2]
2.2340044319300416
[5 5 3 5 2]
2.1978720200720794
[5 5 3 5 2]
2.162637219102411
[5 5 3 5 2]
2.1282647562048944
[5 5 3 5 2]
2.094721238558601
[5 5 3 5 2]
2.061975075570371
[5 5 3 5 2]
2.029996397937942
[5 5 3 5 2]
1.9987569750761136
[5 5 3 5 2]
1.9682301321358815
[5 5 3 5 2]
1.9383906675911184
[5 5 1 5 2]
1.909214772154104
[5 5 1 5 2]
1.880679949604609
[5 5 1 5 2]
1.852764939972251
[5 5 1 5 2]
1.8254496453936362
[5 5 1 5 2]
1.7987150588701304
[5 5 1 5 2]
1.772543196075135
[5 5 1 5 2]
1.7469170302982548
[5 5 1 5 2]
1.7218204305649514
[5 5 1 5 2]
1.6972381029317911
[5 5 1 5 2]
1.6731555349273386
[5 5 1 5 2]
1.6495589430854405
[4 5 1 5 2]
1.6264352234998132
[4 5 1 5 2]
1.6037719053153432
[4 5 1 5 2]
1.581557107061569
[4 5 1 5 2]
1.55977949572658
[4 5 1 5 2]
1.538428248464616
[4 5 1 5 2]
1.5174930168274214
[4 5 1 5 2]
1.4969638934076162
[4 

In [292]:
np.argmax(lr.predict_proba(x), 1)

array([1, 2, 1, 0, 4])