In [1]:
# Install a conda package in the current Jupyter kernel
import sys

In [None]:
!conda install --yes --prefix {sys.prefix} numpy

In [2]:
import numpy as np

class softmax_layer:
    def dim_in(self):
        return 0
    
    def dim_out(self):
        return 0
    
    def update_weigths(self, gradient, learning_rate):
        return gradient
    
    def __call__(self, x):
        #exp = np.exp(x.T - np.max(x, axis=1))
        exp = np.exp(x)
        return (exp.T / np.sum(exp, axis=1))

In [3]:
class linear_layer:
    def __init__(self, dim_in, dim_out):
        self._A = np.random.rand(dim_in,dim_out)
        self._B = np.random.rand(dim_out)
        self._dim_in = dim_in
        self._dim_out = dim_out
        
    def update_weigths(self, gradient, learning_rate):
        self._A = self._A - learning_rate * gradient
        return gradient
    
    def dim_in(self):
        return self._dim_in
    
    def dim_out(self):
        return self._dim_out
    
    def __call__(self, x):
        return x.T.dot(self._A) + self._B

In [4]:
def cross_entropy_loss(y_true, y_predicted, epsilon=1e-10):
    predictions = np.clip(y_predicted, epsilon, 1. - epsilon)
    M = predictions.shape[1]
    return -np.sum(y_true * np.log(predictions)) / M

In [5]:
class sequential_model:
    def __init__(self, *layers, learning_rate=0.1):
        self._learning_rate = learning_rate
        self._layers = []
        last_dim_out = 0
        for layer in layers:
            if last_dim_out != 0 and layer.dim_in() != 0 and last_dim_out != layer.dim_in():
                print('dimension dont match layer out dim {} , next layer dim in {}'.format(last_dim_out, layer.dim_in()))
                raise 
            self._layers.append(layer)
            if layer.dim_out() != 0:
                last_dim_out = layer.dim_out()
                
    def update_weigths(self, gradient):
        for layer in reversed(self._layers):
            gradient = layer.update_weigths(gradient, self._learning_rate)
            
    def __call__(self, x):
        for layer in self._layers:
            x = layer(x)
        return x

In [6]:
model = sequential_model(
        linear_layer(2, 2),
        softmax_layer()
        )

In [230]:
!conda install --yes --prefix {sys.prefix} scipy

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.





  current version: 4.9.1
  latest version: 4.9.2

Please update conda by running

    $ conda update -n base -c defaults conda




In [7]:
import scipy.io
mat = scipy.io.loadmat('SwissRollData.mat')

In [8]:
X = mat['Yt']
Y = mat['Ct']
print(X.shape)

(2, 20000)


In [9]:
Y_predicted = model(X)
print(Y.shape)
print(Y_predicted.shape)
print(Y_predicted.T)

(2, 20000)
(2, 20000)
[[0.4050652  0.5949348 ]
 [0.40266708 0.59733292]
 [0.40659561 0.59340439]
 ...
 [0.43022843 0.56977157]
 [0.42512818 0.57487182]
 [0.4395     0.5605    ]]


In [10]:
print(cross_entropy_loss(Y, Y_predicted))

0.7123076369326017


In [11]:
def softmax_gradient(X, Y_true, Y_predicted):
    M = X.shape[1]
    grad = Y_predicted - Y_true
    return 1/M * X.dot((Y_predicted - Y_true).T)

In [12]:
def accuracy(Y_true, Y_predicted):
    return np.sum(np.argmax(Y_true, axis=0) == np.argmax(Y_predicted, axis=0)) / Y_true.shape[1]

In [13]:
print(softmax_gradient(X, Y, Y_predicted))
print(accuracy(Y, Y_predicted))

def SGD(model, X, Y, epoch):
    loss = []
    accuracy_sgd = []
    for i in range(epoch):
        Y_predicted = model(X)
        gradient = softmax_gradient(X, Y, Y_predicted)
        model.update_weigths(gradient)
        accuracy_sgd.append(accuracy(Y, Y_predicted))
        loss.append(cross_entropy_loss(Y, Y_predicted))
    return loss, accuracy_sgd

[[ 0.00666356 -0.00666356]
 [-0.01158237  0.01158237]]
0.49465


In [14]:
SGD(model, X, Y, 100)

([0.7123076369326017,
  0.7122721407473724,
  0.712237492960379,
  0.712203673312955,
  0.7121706620270722,
  0.7121384397941416,
  0.7121069877640637,
  0.7120762875345245,
  0.7120463211405312,
  0.7120170710441819,
  0.7119885201246648,
  0.7119606516684864,
  0.7119334493599155,
  0.7119068972716474,
  0.711880979855678,
  0.7118556819343861,
  0.7118309886918178,
  0.7118068856651703,
  0.711783358736471,
  0.7117603941244454,
  0.7117379783765726,
  0.7117160983613225,
  0.7116947412605726,
  0.7116738945621982,
  0.7116535460528337,
  0.7116336838108019,
  0.7116142961992061,
  0.7115953718591822,
  0.7115768997033067,
  0.7115588689091603,
  0.7115412689130374,
  0.711524089403804,
  0.7115073203168991,
  0.7114909518284754,
  0.7114749743496753,
  0.7114593785210442,
  0.7114441552070708,
  0.7114292954908569,
  0.711414790668913,
  0.711400632246074,
  0.711386811930536,
  0.7113733216290086,
  0.7113601534419814,
  0.711347299659103,
  0.7113347527546674,
  0.711322505383207

In [15]:
Y_predicted = model(X)
print(Y_predicted.T)
print(cross_entropy_loss(Y, Y_predicted))

[[0.4057926  0.5942074 ]
 [0.40430412 0.59569588]
 [0.40658706 0.59341294]
 ...
 [0.4093695  0.5906305 ]
 [0.40665841 0.59334159]
 [0.41478353 0.58521647]]
0.7109540630450099
