In [1]:
pip install fastbook

Collecting fastbook
  Downloading fastbook-0.0.29-py3-none-any.whl.metadata (13 kB)
Collecting graphviz (from fastbook)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Collecting transformers (from fastbook)
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting datasets (from fastbook)
  Downloading datasets-4.4.2-py3-none-any.whl.metadata (19 kB)
Collecting ipywidgets<8 (from fastbook)
  Downloading ipywidgets-7.8.5-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting sentencepiece (from fastbook)
  Downloading sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (10 kB)
Collecting ipython-genutils~=0.2.0 (from ipywidgets<8->fastbook)
  Downloading ipython_genutils-0.2.0-py2.py3-none-any.whl.metadata (755 bytes)
Collecting widgetsnbextension~=3.6.10 (from ipywidgets<8->fastbook)
  Downloading widgetsnbextension-3.6.10-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting jupyterlab-widgets<3,>=1.0.0 (from ipywidgets<8->fastbook)
  Downloading 

In [1]:
from fastai.vision.all import *
from fastbook import *


In [2]:
path = untar_data(URLs.MNIST)

In [3]:
Path.BASE_PATH = path

In [4]:
path.ls()

[Path('training'), Path('testing')]

In [5]:
(path/'testing').ls()

# store all the digit files into a dict
digits = [ '0','1','2','3','4','5','6','7','8','9']
files_by_digit = {}
for d in digits:
    files_by_digit[d] = (path/'training'/d).ls().sorted()

valid_by_digit = {}
for d in digits:
    valid_by_digit[d] = (path/'testing'/d).ls().sorted()


stacked_img_tensors = []

for d, files in files_by_digit.items():
    img_tensor = [tensor(Image.open(o)) for o in files]
    stacked_img_tensors.append(torch.stack(img_tensor).float()/255)
    print(d, " tensor stack completed")

valid_stacked_img_tensors = []

for d, files in valid_by_digit.items():
    img_tensor = [tensor(Image.open(o)) for o in files]
    valid_stacked_img_tensors.append(torch.stack(img_tensor).float()/255)
    print(d, " tensor stack completed")


#calculates the mean absolute error
def mnist_distance(a,b): return (a-b).abs().mean((-1,-2))


0  tensor stack completed
1  tensor stack completed
2  tensor stack completed
3  tensor stack completed
4  tensor stack completed
5  tensor stack completed
6  tensor stack completed
7  tensor stack completed
8  tensor stack completed
9  tensor stack completed
0  tensor stack completed
1  tensor stack completed
2  tensor stack completed
3  tensor stack completed
4  tensor stack completed
5  tensor stack completed
6  tensor stack completed
7  tensor stack completed
8  tensor stack completed
9  tensor stack completed


In [6]:

train_x = torch.cat(stacked_img_tensors).view(-1, 28*28)
train_y = tensor(
    sum(([int(d)] * len(files_by_digit[d]) for d in digits), [])
).unsqueeze(1)
train_x.shape,train_y.shape

(torch.Size([60000, 784]), torch.Size([60000, 1]))

In [7]:
dset = list(zip(train_x,train_y))
x,y = dset[0]
x.shape,y.shape

(torch.Size([784]), torch.Size([1]))

In [8]:
valid_x = torch.cat(valid_stacked_img_tensors).view(-1, 28*28)
valid_y = tensor(
    sum(([int(d)] * len(valid_by_digit[d]) for d in digits), [])
).unsqueeze(1)
valid_dset = list(zip(valid_x,valid_y))

In [9]:
#initalize random weights per pixel function
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()
weights = init_params((28*28,1))
bias = init_params(1)

In [10]:
def linear1(xb): return xb@weights + bias
preds = linear1(train_x)
preds

tensor([[ -9.2751],
        [-23.0076],
        [-15.8010],
        ...,
        [ -8.8975],
        [-10.6736],
        [ -6.4075]], grad_fn=<AddBackward0>)

In [11]:
corrects = (preds>0.0).float() == train_y
corrects

tensor([[ True],
        [ True],
        [ True],
        ...,
        [False],
        [False],
        [False]])

In [13]:
corrects.float().mean().item()

0.09515000134706497

In [14]:
def sigmoid(x): return 1/(1+torch.exp(-x))

In [15]:
def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()

In [16]:
dl = DataLoader(dset, batch_size=256)
xb,yb = first(dl)
valid_dl = DataLoader(valid_dset, batch_size=256)

In [17]:
batch = train_x[:4]
batch.shape

torch.Size([4, 784])

In [18]:
def train_epoch(model, lr, params):
    for xb,yb in dl:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad*lr
            p.grad.zero_()

In [19]:
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()

In [20]:
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

In [21]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

In [33]:
lr = 2.
params = weights,bias
validate_epoch(linear1)

TypeError: train_epoch() takes 1 positional argument but 3 were given

In [24]:
for i in range(50):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')

0.1748 0.1795 0.1818 0.1838 0.1848 0.1857 0.1866 0.188 0.1885 0.1889 0.1896 0.1898 0.1907 0.191 0.1914 0.1915 0.1921 0.1924 0.1926 0.1934 0.1937 0.1937 0.1937 0.1939 0.194 0.194 0.1941 0.1944 0.1946 0.1951 0.1954 0.1954 0.1955 0.1956 0.1957 0.1958 0.1958 0.1958 0.1962 0.1962 0.1963 0.1964 0.1966 0.1968 0.197 0.1971 0.1971 0.1971 0.1973 0.1974 

In [25]:
linear_model = nn.Linear(28*28,1)
w,b = linear_model.parameters()
w.shape,b.shape

(torch.Size([1, 784]), torch.Size([1]))

In [49]:
class BasicOptim:
    def __init__(self,params,lr): self.params,self.lr = list(params),lr

    def step(self, *args, **kwargs):
        for p in self.params: p.data -= p.grad.data * self.lr

    def zero_grad(self, *args, **kwargs):
        for p in self.params: p.grad = None

            
opt = BasicOptim(linear_model.parameters(), 1)

In [50]:
def train_epoch(model):
    for xb,yb in dl:
        calc_grad(xb, yb, model)
        opt.step()
        opt.zero_grad()

In [51]:
validate_epoch(linear_model)

0.203

In [52]:
def train_model(model, epochs):
    for i in range(epochs):
        train_epoch(model)
        print(validate_epoch(model), end=' ')

In [53]:
train_model(linear_model, 50)

0.2008 0.2004 0.2004 0.2004 0.2005 0.2005 0.2005 0.2005 0.2006 0.2006 0.2006 0.2006 0.2006 0.2006 0.2006 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2007 0.2008 0.2008 0.2008 0.2008 0.2009 0.2009 0.2009 0.2009 0.2009 0.2009 