In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression

In [2]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer

train = fetch_20newsgroups(subset='train', categories=['alt.atheism', 'talk.religion.misc'])
vectorizer = CountVectorizer(stop_words="english", min_df=5)
vectors = np.asarray(vectorizer.fit_transform(train.data).todense())

In [285]:
def softmax(z):
    return np.exp(z) / np.exp(z).sum(1)[:, np.newaxis]

def crossentropy(x, y):
    ce = np.log((x * y).sum(1))
    return -ce.sum()

class SoftmaxRegression(object):
    def __init__(self, lr=0.1, epochs=10):
        self.lr = lr
        self.epochs = epochs

    def fit(self, X, y):
        self.w = np.random.uniform(low=-1., high=1., size=(X.shape[1], y.shape[1]))
        self.b = np.random.uniform(low=-1., high=1., size=(y.shape[1]))

        for _ in range(self.epochs):
            # print(self.w)
            z = X.dot(self.w) + self.b
            z = softmax(z)
            loss = crossentropy(z, y)

            # Calculate gradients
            dW = X.T.dot(z - y) / X.shape[0]
            db = z.sum(0) / X.shape[0]

            self.w = self.w - self.lr * dW
            self.b = self.b - self.lr * db
            print(loss/X.shape[0])
            print(np.argmax(z, 1))

    def predict(self, X):
        pass

In [286]:
x = np.random.uniform(low=-1., high=1., size=(5, 6))
y = np.array([[0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0]])

In [287]:
lr = SoftmaxRegression(epochs=200)
lr.fit(x, y)

1.9699347503231415
[1 5 5 5 5]
1.9334417623534967
[1 5 5 5 5]
1.8979835304269848
[1 5 5 5 5]
1.863523493915006
[1 5 5 5 5]
1.8300263181252838
[1 5 5 5 5]
1.797457909445131
[1 5 5 5 5]
1.7657854225738845
[1 5 5 5 5]
1.7349772604175342
[1 5 5 5 5]
1.7050030672188292
[1 5 5 5 5]
1.675833715491743
[1 5 5 5 5]
1.647441287323597
[1 5 1 5 5]
1.6197990506033737
[1 2 1 5 5]
1.5928814307315293
[1 2 1 5 5]
1.5666639783644807
[1 2 1 5 5]
1.5411233337449315
[1 2 1 5 5]
1.5162371881658228
[1 2 1 5 5]
1.4919842431095987
[1 2 1 5 2]
1.4683441675943336
[1 2 1 5 2]
1.4452975542432083
[1 2 1 5 2]
1.4228258745732816
[1 2 1 5 2]
1.4009114339733455
[1 2 1 5 2]
1.3795373268091655
[1 2 1 5 2]
1.358687392058119
[1 2 1 5 2]
1.3383461698349763
[1 2 1 5 2]
1.3184988591272926
[1 2 1 5 2]
1.2991312770136734
[1 2 1 5 2]
1.2802298195920756
[1 2 1 5 2]
1.2617814247994363
[1 2 1 5 2]
1.2437735372591834
[1 2 1 5 2]
1.2261940752504943
[1 2 1 5 2]
1.2090313998532376
[1 2 1 5 2]
1.192274286285953
[1 2 1 5 2]
1.175911897421

In [51]:
np.array([[1, 2, 3], [4, 5, 6]]).sum(0) #- np.array([[1, 0, 0]])

array([5, 7, 9])

In [146]:
np.array([1, 2, 3]).dot(np.array([1, 2, 3]))

14

In [201]:
softmax(x)

array([[0.07218434, 0.2013218 , 0.23452418, 0.07656565, 0.07224773,
        0.3431563 ],
       [0.0835608 , 0.12328371, 0.07127313, 0.17164807, 0.06835536,
        0.48187892],
       [0.07647032, 0.20040759, 0.1249502 , 0.1257835 , 0.2753365 ,
        0.19705189],
       [0.24958671, 0.09553723, 0.21197873, 0.24724828, 0.06897457,
        0.12667447],
       [0.33066292, 0.14937373, 0.11404556, 0.09154738, 0.15063097,
        0.16373943]])