In [138]:
import torch, numpy as np, pandas as pd
import torchvision.datasets as ds

In [139]:
ds = ds.MNIST(root= './data',download=True)

In [140]:
x = ds.data.reshape(ds.data.shape[0],ds.data.shape[1]*ds.data.shape[2])

In [141]:
x = x/255

In [142]:
y = ds.targets.reshape(ds.targets.shape[0],1)

In [143]:
x.shape

torch.Size([60000, 784])

In [157]:
def params_init(n_units):
    l = len(n_units)
    
    weights = [ (torch.rand( n_units[i], n_units[i+1] ) -0.3) /n_units[i+1]*4  for  i in range(l-1)]
    baises =  [ (torch.rand(n_units[i+1])-0.5  ) *0.1   for  i in range(l-1)]
    for p in weights+baises: p.requires_grad_()
    return weights, baises   
    

In [146]:
def predict(x,w,b):
    l = len(w)
    res = x
    for i in range(l):
        res = (res @ w[i] ) +b[i]
        if i != (l-1):
            res = torch.clip(res,0.)
    print(res)
    return torch.sigmoid(res)
        

In [149]:
def cost(y,pred):
    m = y.shape[0]
    cost = torch.sum(  (y*torch.log(pred)) + ((1-y)*torch.log(1-pred))  )*(-1/m)
    return cost

In [159]:
def update_params(w,b,lr):
    for p in w+b:
        p.sub_(p.grad*lr)
        p.grad.zero_()

In [171]:
def loss(x, w,b, y): 
    return torch.abs(predict(x, w,b)-y).mean()


In [178]:
def one_epoch(w,b, lr):
    cost = loss(x, w,b, y)
    cost.backward()
    with torch.no_grad(): update_params(w,b, lr)
    print(f"{cost:.3f}", end="; ")

In [174]:
def train_model(epochs=30, lr=0.01):
    torch.manual_seed(442)
    w,b = params_init([x.shape[1],64,32,16,1])
    for i in range(epochs): one_epoch(w,b, lr=lr)
    return w,b

In [180]:
w,b = train_model(400,1e-3)

tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        

3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
 

3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
 

3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
 

3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
 

3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
 

3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
 

3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
        [31.1483],
        ...,
        [35.6147],
        [32.7582],
        [33.5121]], grad_fn=<AddBackward0>)
3.651; tensor([[43.7742],
        [48.5430],
 