In [2]:
import torch

#### Loss Functions

In [3]:
def mean_squared_error(y_pred, y_true):
    return torch.mean(torch.square(y_pred - y_true))

In [4]:
def mean_absolute_errors(y_pred, y_true):
    return torch.mean(torch.abs(y_pred - y_true))

In [5]:
def binary_cross_entropy(y_pred, y_true, eps=1e-15):
    y_pred = torch.clamp(y_pred, eps, 1. - eps)
    return - torch.mean(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))

In [6]:
def categorical_cross_entropy(y_true, y_pred):
    return - torch.mean(y_true * torch.log(y_pred), dim=1)

#### Activations

In [7]:
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

In [8]:
def tanh(x):
    return (torch.exp(-x) - torch.exp(x)) / (torch.exp(-x) + torch.exp(x))

In [9]:
def softmax(x):
    return torch.exp(x) / torch.sum(torch.exp(x), dim=1)

In [10]:
def relu(x):
    return torch.max(x, torch.tensor(0))

In [11]:
def leaky_relu(x, alpha):
    return torch.max(x, alpha * x)

In [13]:
def swish(x):
    return x * sigmoid(x)

In [14]:
def exponent_linear_unit(x, alpha=0.1):
    if x >= torch.tensor(0):
        return x
    else:
        return alpha * (torch.exp(x) - 1)

In [15]:
def scaled_exponent_linear_unit(x, alpha=0.1, lambda_p=1.2):
    if x >= torch.tensor(0):
        return lambda_p * x
    else:
        return lambda_p * alpha * (torch.exp(x) - 1)

#### Optimizer

In [16]:
def compute_gradients(y_pred, y_true, x):
    dw = - 2 * torch.mean(y_pred - y_true) * x
    db = - 2 * torch.mean(y_pred - y_true)

    return dw, db

In [17]:
def gradient_descent(y_true, x, w, b, learning_rate, iterations):
    for i in range(iterations):
        y_pred = w * x + b
        loss = mean_squared_error(y_pred, y_true)

        dw, db = compute_gradients(y_pred, y_true, x)
        w = w - learning_rate * dw
        b = b - learning_rate * db

    return w, b

#### Logistic Regression

In [19]:
class LogisticRegression:
    def __init__(self, in_featurs, n_labels):
        self.W = torch.randn(in_featurs, n_labels)
        self.b = torch.zeros(n_labels)

    def sigmoid(self, x):
        return 1 / (1 + torch.exp(-x))
    
    def cross_entropy(self, y_pred, y_true):
        return torch.mean(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred))
    
    def compute_gradients(self, y_pred, y_true, x):
        df = y_pred - y_true

        dw = torch.mean(x * df)
        db = torch.mean(df)

        return dw, db
    
    def backward(self, learning_rate, dw, db):
        self.W = self.W - learning_rate * dw
        self.b = self.b - learning_rate * db

    def forward(self, X, y, learning_rate, iterations):
        for i in range(iterations):
            y_pred = self.W * X + self.b
            loss = self.cross_entropy(y_pred, y)
            dw, db = self.compute_gradients(y_pred, y, X)
            self.backward(learning_rate, dw, db)

        return self.W, self.b

#### Linear Regression

In [20]:
class LinearRegression:
    def __init__(self, in_features, out_features):
        self.W = torch.randn(in_features, out_features)
        self.b = torch.randn(out_features)

    def mean_squared_error(self, y_pred, y_true):
        return torch.mean(torch.square(y_pred - y_true))
    
    def compute_gradients(self, y_pred, y_true, x):
        df = (y_pred - y_true)

        dw = - 2 * torch.mean(df) * x
        db = - 2 * torch.mean(df)

        return dw, db
    
    def backward(self, dw, db, learning_rate):
        self.W = self.W - learning_rate * dw
        self.b = self.b - learning_rate * db

    def forward(self, X, y, learning_rate, iterations):
        for i in range(iterations):
            y_pred = self.W * X + self.b
            loss = self.mean_squared_error(y_pred, y)
            dw, db = self.compute_gradients(y_pred, y, X)
            self.backward(dw, db, learning_rate)

        return self.W, self.b