In [None]:
import numpy as np

# 1 Softmax
### $\sigma(z_i)=\frac{e^{z_i}}{\sum_{j=0}^K e^{z_j}}$for $i=1,.....,K$ and $z=(z_1,....,z_k)\in R^k$

### Python Implementation

In [None]:
def softmax(L):
    exp_l = np.exp(L)
    return np.divide(exp_l, exp_l.sum())

### Usage

In [None]:
L = [5,6,7]
softmax(L)

# 2 Sigmoid

### Python Implementation

In [None]:
# Activation (sigmoid) function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_v2(X):
    return np.divide(1, 1+np.exp(np.dot(-1, X)))

### Usage

In [None]:
X = np.array([-100, -10, 0, 10, 100])
sigmoid(X)

# 3 Sigmoid derivative

### Python Implementation

In [None]:
def sigmoid_prime(x):
    return sigmoid(x) * (1-sigmoid(x))

### Usage

# 4 Prediction Formula
$\hat{y}=\sigma(WX+b)=w_1x_1+w_2x_2+......+w_nx_n+b$

### Python Implementation

In [None]:
# Output (prediction) formula
def predict(features, weights, bias):
    return sigmoid(np.matmul(features, weights) + bias)

### Usage

In [None]:
weights =[0,0]
features=[0,0]

bias=0

predict(features, weights, bias)

# 5 Log Loss Function

### Python Implementation

In [None]:
def error_formula(y, predicted):
    return y * np.log(predicted) + (1 - y) * np.log(1 - predicted)

### Usge

In [None]:
y = np.array([[1],[0]])

weights  = np.array([1,1])
features = np.array([[1,1],[0.1, 0.5]])

bias = -1

predicted = predict(features, weights, bias)

error_formula(y,predicted)

# 6 Cross Etropy
### $Cross-Entropy= -\sum_{i=0}^n y_iln(p_i)+(1-y_i)ln(1-p_i)$

### Python Implementation

In [None]:
def cross_entropy(y, predicted):
    return -np.sum(error_formula(y,predicted))
    
def cross_entropy_v2(Y, P):
    Y = np.float_(Y)
    P = np.float_(P)
    return -np.sum(Y * np.log(P) + (1-Y) * np.log(1-P))

### Usage

In [None]:
y = np.array([[1],[0]])

weights  = np.array([1,1])
features = np.array([[1,1],[0.1, 0.5]])

bias = -1

predicted = predict(features, weights, bias)

cross_entropy(y,predicted)

In [None]:
# Gradient descent step
def update_weights(x, y, weights, bias, learnrate):
    y_hat = predict(x, weights, bias)
    error = y - y_hat
    weights = weights + learnrate * error * x
    bias = bias +  learnrate * error
    return weights, bias

In [None]:
x=np.array([1,1])
weights = np.random.normal(scale=1 / 2**.5, size=2)
y=np.array([1])
bias=1
update_weights(x,y,weights, bias, 0.001)