In [1]:
import numpy as np
import copy
from sklearn.datasets import make_blobs, make_moons
from typing import List, NoReturn

In [2]:
class Module:
    def forward(self, x):
        raise NotImplementedError()
    
    def backward(self, d):
        raise NotImplementedError()
        
    def update(self, alpha):
        pass

# Linear

In [3]:
class Linear(Module):
    def __init__(self, in_features: int, out_features: int):     
        self.W = np.random.sample((in_features, out_features))
        self.b = -np.random.sample(out_features)
        
        self.X = None
        self.grad_loss_w = None
        self.grad_loss_b = None
    
    
    def forward(self, x: np.ndarray) -> np.ndarray:
        self.X = x
        y = x @ self.W + self.b        
        return y
    
    
    def backward(self, d: np.ndarray) -> np.ndarray:
        # градиент по w
        self.grad_loss_w = ((self.X).T @ d) 
        
        # градиент по b
        self.grad_loss_b = d if d.ndim == 1 else np.sum(d, axis=0)
                
        # градиент по x
        d_new = d @ (self.W).T
        return d_new
        
        
    def update(self, alpha: float) -> NoReturn:
        ### нормировка на размер батча
        N = self.X.shape[0] if self.X.ndim > 1 else 1
    
        self.W = self.W - (alpha / N) * self.grad_loss_w 
        self.b = self.b - (alpha / N) * self.grad_loss_b

# ReLU

In [12]:
class ReLU(Module):
    def __init__(self):
        self.X = None
    
    def forward(self, x: np.ndarray) -> np.ndarray:
        self.X = x
        y = (x + np.abs(x)) / 2        
        return y
        
    def backward(self, d) -> np.ndarray:
        ones = np.sign(self.X) 
        grad_out_in = (ones + np.abs(ones)) / 2
        d_new = d * grad_out_in        
        return d_new
    

# Softmax

In [13]:
class Softmax(Module):
    def __init__(self):
        self.X = None
        self.prob_matrix = None
        
    def forward(self, x: np.ndarray) -> np.ndarray:
        if x.ndim == 1:
            y = self.softmax(x)
        else:
            y = np.array([self.softmax(x[i]) for i in range(x.shape[0])])
        self.X = x
        self.prob_matrix = y        
        return y
        
    def softmax(self, x):
        x_norm = x - np.max(x)
        expo = np.exp(x_norm)
        summ = np.sum(expo)
        return expo / summ
        
    def backward(self, d) -> np.ndarray:
        return self.prob_matrix - d

# MLPClassifier

In [6]:
class MLPClassifier:
    def __init__(self, modules: List[Module], epochs: int = 40, alpha: float = 0.01):     
        modules.append(Softmax())
        self.modules = modules
        self.epochs = epochs
        self.alpha = alpha
        
            
    def fit(self, X: np.ndarray, y: np.ndarray, batch_size=32) -> NoReturn:
        data = np.c_[X, y]
        
        # количество классов
        cnt = int(np.max(y)) + 1 
        
        for _ in range(self.epochs):
            
            np.random.shuffle(data)
            batches = [data[i:i + batch_size] for i in range(0, len(data), batch_size)]
            
            for batch_plus in batches:               
                batch = batch_plus[:,:-1]
                y_true = batch_plus[:,-1]
                                
                # one_hot
                elems = int(batch.shape[0])
                one_hot = np.zeros((elems, cnt), dtype=int)
                for i in range(elems):
                    one_hot[i][int(y_true[i])] = 1
                
                # forward                
                y_pred = self.predict(batch)
         
                # backward
                d = self.modules[-1].backward(one_hot) # softmax + xent
                for layer in self.modules[-2::-1]:
                    d = layer.backward(d)
                
                # update
                for layer in self.modules[:-1:2]:
                    layer.update(self.alpha)
             
            
    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        for layer in self.modules:
            X = layer.forward(X)
        return X
    
    
    def predict(self, X) -> np.ndarray:
        p = self.predict_proba(X)
        return np.argmax(p, axis=1)

In [7]:
p = MLPClassifier([
    Linear(4, 64),
    ReLU(),
    Linear(64, 64),
    ReLU(),
    Linear(64, 2)
])

X = np.random.randn(50, 4)
y = [(0 if x[0] > x[2]**2 or x[3]**3 > 0.5 else 1) for x in X]
p.fit(X, y)

# Test

In [10]:
X, y = make_moons(400, noise=0.075)
X_test, y_test = make_moons(400, noise=0.075)

best_acc = 0
for _ in range(25):
    p = MLPClassifier([
    Linear(2, 64),
    ReLU(),
    Linear(64, 64),
    ReLU(),
    Linear(64, 2)
])

    p.fit(X, y)
    best_acc = max(np.mean(p.predict(X_test) == y_test), best_acc)
print("Accuracy", best_acc)

Accuracy 0.9625


In [11]:
X, y = make_blobs(400, 2, centers=[[0, 0], [2.5, 2.5], [-2.5, 3]])
X_test, y_test = make_blobs(400, 2, centers=[[0, 0], [2.5, 2.5], [-2.5, 3]])
best_acc = 0
for _ in range(25):
    p = MLPClassifier([
    Linear(2, 64),
    ReLU(),
    Linear(64, 64),
    ReLU(),
    Linear(64, 3)
])

    p.fit(X, y)
    best_acc = max(np.mean(p.predict(X_test) == y_test), best_acc)
print("Accuracy", best_acc)

Accuracy 0.955
