In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [61]:
class AdaLineSGD:
    def __init__(self, lr=1e-1, batch_size=32, n_epoch=500):
        self.lr = lr
        self.batch_size = batch_size
        self.n_epoch = n_epoch
        self.W = None
        self.b = None
    
    def fit(self, xs, ys):
        N, F = xs.shape
        n_step = N // self.batch_size
        self.W = np.random.normal(loc=0.0, scale=0.01, size=(F, 1))
        self.b = np.random.normal(loc=0.0, scale=0.01, size=(1,))
        
        for ep in range(self.n_epoch):
            indices = np.random.permutation(n_step * self.batch_size)
            avg_loss = 0.0
            for it in range(n_step):
                s = it * self.batch_size
                t = min(s + self.batch_size, N)
                x_batch = xs[indices[s:t]]
                y_batch = ys[indices[s:t]]
                loss = self.partial_fit(x_batch, y_batch)
                avg_loss = ((avg_loss * it) + loss) / (it + 1)
            yield avg_loss
            
    def partial_fit(self, x_batch, y_batch):
        B, F = x_batch.shape
        y_batch = np.expand_dims(y_batch, axis=-1)
        p_batch = self.forward(x_batch) # [B, 1]
        l_batch = 0.5 * np.sum((y_batch - p_batch)**2, axis=1) # [B, 1]
        loss = l_batch.mean() # float
        
        dL = np.ones((B, 1)) / B # [B, 1]
        dP = dL * (p_batch - y_batch) # [B, 1]
        dW = x_batch.T @ dP # [F, 1]
        dB = dP # [B, 1]
        db = dB.sum(axis=0) # [1]
        
        self.W = self.W - self.lr * dW
        self.b = self.b - self.lr * db
        return float(loss)
    
    def forward(self, x_batch):
        return x_batch @ self.W + self.b
    
    def predict_proba(self, xs):
        return self.forward(xs).squeeze(axis=1)
    
    def predict(self, xs):
        prob = self.predict_proba(xs)
        return np.where(prob < 0.5, 0, 1)

    
xs = np.random.rand(512, 2)
ys = xs[:, 0] * 3 + xs[:, 1] * 4

clf = AdaLineSGD(lr=1e-1, batch_size=16, n_epoch=50)
losses = list(clf.fit(xs, ys))
print(losses[-10:])
print()

ps = clf.predict_proba(xs)
print(ys[:10])
print(ps[:10])
print()

print(clf.W)
print(clf.b)

[4.6175667786415375e-07, 3.305484858857643e-07, 2.3691816982199495e-07, 1.686538402088523e-07, 1.194190991984521e-07, 8.574614180393907e-08, 6.127896012904877e-08, 4.3583812656525643e-08, 3.110112272069053e-08, 2.227411162907789e-08]

[3.27377676 4.23510884 3.47594833 4.55908043 5.63942627 4.1167708
 4.6614786  1.81238954 5.34861495 4.17503194]
[3.27374915 4.23499918 3.47589438 4.55900723 5.63921601 4.11678868
 4.66129473 1.81258707 5.34838859 4.17504587]

[[2.99948524]
 [3.99957281]]
[0.00050392]
