In [None]:
import numpy as np
from functools import wraps
from enum import Enum
from tqdm import tqdm

In [None]:
def conditional_generator(func):
    @wraps(func)
    def wrapper(self, *args, **kwargs):
        if self.is_generator:
            return func(self, *args, **kwargs)
        for values in func(self, *args, **kwargs):
            pass
        return values
    return wrapper

In [None]:
class Activation(Enum):
    RELU = lambda x: (x > 0) * x, \
           lambda x: x > 0

    SIGMOID = lambda x: 1 / (1 + np.exp(-x)), \
              lambda x: x * (1 - x)

    TANH = lambda x: np.tanh(x), \
           lambda x: 1 - (x ** 2),
    
    SOFTMAX = lambda x: (ex := np.exp(x)) / np.sum(ex, axis=1, keepdims=True), \
              lambda x: (_ for _ in ()).throw(Exception("activation function softmax only works on output"))

In [None]:
class BasicNeuralNetwork:

    def __init__(self, hid_size, epochs=3, alpha=1e-3, batch_size=32, skip_remaining=True,
                 hid_activation=None, out_activation=None, dropout=1,
                 as_probs=False, is_generator=False, random_seed=None):
        self.hid_size = hid_size
        self.epochs = epochs
        self.alpha = alpha
        self.batch_size = batch_size
        self.skip_remaining = skip_remaining
        self.hid_activation = hid_activation
        self.out_activation = out_activation
        self.dropout = dropout
        self.as_probs = as_probs
        self.is_generator = is_generator
        self.random_seed = random_seed

        if self.hid_activation is not None:
            self.__hid_activation_fun, self.__hid_activation_deriv = self.hid_activation.value
        if self.out_activation is not None:
            self.__out_activation_fun, _ = self.out_activation.value
        if self.as_probs and self.out_activation is not Activation.SOFTMAX:
            self.__to_probs, _ = Activation.SOFTMAX.value

        self.__rng = np.random.default_rng(seed=self.random_seed)
    
    @conditional_generator
    def fit(self, train_samples, train_labels):
        assert(len(train_samples) == len(train_labels))

        # initialize coefficients with values between -0.1 and 0.1
        self.coeffs_in_to_hid = 0.2 * self.__rng.random((len(train_samples.T), self.hid_size)) - 0.1
        self.coeffs_hid_to_out = 0.2 * self.__rng.random((self.hid_size, len(train_labels.T))) - 0.1

        for _ in range(self.epochs):
            for i in range(0, len(train_samples), self.batch_size):
                samples = train_samples[i:i + self.batch_size]
                labels = train_labels[i:i + self.batch_size]
                batch_size = len(samples)
                if batch_size < self.batch_size and self.skip_remaining:
                    continue

                dropout_mask = self.__rng.choice((0, 1), size=(batch_size, self.hid_size), 
                                                 p=(1 - self.dropout, self.dropout))

                layers_in = samples
                layers_hid = layers_in.dot(self.coeffs_in_to_hid)
                if self.hid_activation is not None:
                    layers_hid = self.__hid_activation_fun(layers_hid)
                layers_hid *= dropout_mask * (1 / self.dropout)
                layers_out = layers_hid.dot(self.coeffs_hid_to_out)
                if self.out_activation is not None:
                    layers_out = self.__out_activation_fun(layers_out)

                deltas_out = labels - layers_out
                deltas_hid = deltas_out.dot(self.coeffs_hid_to_out.T)
                if self.hid_activation is not None:
                    deltas_hid *= self.__hid_activation_deriv(layers_hid)
                deltas_hid *= dropout_mask

                self.coeffs_hid_to_out += layers_hid.T.dot(deltas_out) * self.alpha
                self.coeffs_in_to_hid += layers_in.T.dot(deltas_hid) * self.alpha
            yield

    def predict(self, samples):
        assert(len(samples.T) == len(self.coeffs_in_to_hid))

        # predictions for test samples are made in one batch
        layers_in = samples
        layers_hid = layers_in.dot(self.coeffs_in_to_hid)
        if self.hid_activation is not None:
            layers_hid = self.__hid_activation_fun(layers_hid)
        layers_out = layers_hid.dot(self.coeffs_hid_to_out)
        if self.out_activation is not None:
            layers_out = self.__out_activation_fun(layers_out)
        if self.as_probs and self.out_activation is not Activation.SOFTMAX:
            layers_out = self.__to_probs(layers_out)

        return layers_out

    def evaluate(self, preds, labels):
        assert(len(preds) == len(labels))
        assert(len(preds.T) == len(labels.T) == len(self.coeffs_hid_to_out.T))
        errors = ((labels - preds) ** 2).sum(axis=0)
        loss = sum(errors) / len(preds)
        n_correct = sum([np.argmax(pred) == np.argmax(label)
                        for pred, label in zip(preds, labels)])
        accuracy = n_correct / len(preds)
        return loss, accuracy

In [None]:
class BasicSparseNeuralNetwork:
    
    def __init__(self, dict_size, hid_size, epochs=3, alpha=1e-3,
                 hid_activation=None, out_activation=None, dropout=1, as_probs=False,
                 factor_words_freq=True, is_generator=False, random_seed=None):
        self.dict_size = dict_size
        self.hid_size = hid_size
        self.epochs = epochs
        self.alpha = alpha
        self.hid_activation = hid_activation
        self.out_activation = out_activation
        self.dropout = dropout
        self.as_probs = as_probs
        self.factor_words_freq = factor_words_freq
        self.is_generator = is_generator
        self.random_seed = random_seed

        if self.hid_activation is not None:
            self.__hid_activation_fun, self.__hid_activation_deriv = self.hid_activation.value
        if self.out_activation is not None:
            self.__out_activation_fun, _ = self.out_activation.value
        if self.as_probs and self.out_activation is not Activation.SOFTMAX:
            self.__to_probs, _ = Activation.SOFTMAX.value

        self.__rng = np.random.default_rng(seed=self.random_seed)

    @conditional_generator
    def fit(self, train_samples, train_labels):
        assert(len(train_samples) == len(train_labels))

        # initialize coefficients using values between -0.1 and 0.1
        self.coeffs_in_to_hid = 0.2 * self.__rng.random((self.dict_size, self.hid_size)) - 0.1
        self.coeffs_hid_to_out = 0.2 * self.__rng.random((self.hid_size, len(train_labels.T))) - 0.1

        for _ in range(self.epochs):
            for sample, label in zip(train_samples, train_labels):
                dropout_mask = self.__rng.choice((0, 1), size=self.hid_size, 
                                                 p=(1 - self.dropout, self.dropout))

                layer_in = sample
                if self.factor_words_freq:
                    layer_hid = layer_in.T[1].dot(self.coeffs_in_to_hid[layer_in.T[0]])
                else:
                    layer_hid = self.coeffs_in_to_hid[layer_in.T[0]].sum(axis=0)
                if self.hid_activation is not None:
                    layer_hid = self.__hid_activation_fun(layer_hid)
                layer_hid *= dropout_mask * (1 / self.dropout)
                layer_out = layer_hid.dot(self.coeffs_hid_to_out)
                if self.out_activation is not None:
                    layer_out = self.__out_activation_fun(layer_out)

                deltas_out = label - layer_out
                deltas_hid = deltas_out.dot(self.coeffs_hid_to_out.T)
                if self.hid_activation is not None:
                    deltas_hid *= self.__hid_activation_deriv(layer_hid)
                deltas_hid *= dropout_mask

                self.coeffs_hid_to_out += np.outer(layer_hid, deltas_out) * self.alpha
                self.coeffs_in_to_hid[layer_in.T[0]] += deltas_hid * self.alpha
            yield

    def predict(self, samples):
        preds = []
        for sample in samples:
            layer_in = sample
            if self.factor_words_freq:
                layer_hid = layer_in.T[1].dot(self.coeffs_in_to_hid[layer_in.T[0]])
            else:
                layer_hid = self.coeffs_in_to_hid[layer_in.T[0]].sum(axis=0)
            if self.hid_activation is not None:
                layer_hid = self.__hid_activation_fun(layer_hid)
            layer_out = layer_hid.dot(self.coeffs_hid_to_out)
            if self.out_activation is not None:
                layer_out = self.__out_activation_fun(layer_out)

            preds.append(layer_out)
        preds = np.array(preds)
        if self.as_probs and self.out_activation is not Activation.SOFTMAX:
            preds = self.__to_probs(preds)
        return preds

    def evaluate(self, preds, labels):
        assert(len(preds) == len(labels))
        assert(len(preds.T) == len(labels.T) == len(self.coeffs_hid_to_out.T))
        errors = ((labels - preds) ** 2).sum(axis=0)
        loss = sum(errors) / len(preds)
        n_correct = sum([np.argmax(pred) == np.argmax(label)
                        for pred, label in zip(preds, labels)])
        accuracy = n_correct / len(preds)
        return loss, accuracy