In [None]:
from datasets import load_dataset
import torchvision.transforms.functional as TF
import torch
from torch import nn,tensor
from miniai.datasets import *
import torch.nn.functional as F
from operator import itemgetter

In [None]:
x,y = 'image','label'
name = 'fashion_mnist'
dsr = load_dataset(name)
dsr

Found cached dataset fashion_mnist (/Users/arun/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/0a671f063342996f19779d38c0ab4abef9c64f757b35af8134b331c294d7ba48)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

### Practice python iter and generator

In [None]:
%load_ext memory_profiler

In [None]:
# sum of "n" integers
# store each int in-memory
def firstn(n):
    num,nums = 0,[]
    while num < n:
        nums.append(num)
        num += 1
    return nums

%time sum_first_n = sum(firstn(100000000))
%memit sum_first_n = sum(firstn(100000000))

sum_first_n

CPU times: user 9.18 s, sys: 2.4 s, total: 11.6 s
Wall time: 12.5 s
peak memory: 1543.41 MiB, increment: 1252.23 MiB


4999999950000000

In [None]:
# make it generator by creating generator class object
class Firstn:
    def __init__(self, n):
        self.n,self.num = n,0
    
    def __iter__(self):
        return self
    
    def __next__(self):
        return self.next()
    
    def next(self):
        if self.num < self.n:
            cur, self.num = self.num, self.num+1
            return cur
        raise StopIteration()

In [None]:
%time sum_first_n = sum(Firstn(100000000))
%memit sum_first_n = sum(Firstn(100000000))

sum_first_n

CPU times: user 12.5 s, sys: 41.9 ms, total: 12.6 s
Wall time: 12.6 s
peak memory: 360.19 MiB, increment: 0.00 MiB


4999999950000000

In [None]:
# memory reduction
round(1113.02/0.03)

37101

In [None]:
# defining class is lot of boilerplate codes
# generator is shortcut way of doing the same iterator using funciton
def gfirstn(n):
    num = 0
    while num < n:
        yield num
        num += 1

In [None]:
%time sum_first_n = sum(gfirstn(100000000))
%memit sum_first_n = sum(gfirstn(100000000))

sum_first_n

CPU times: user 4.83 s, sys: 18.3 ms, total: 4.85 s
Wall time: 4.86 s
peak memory: 359.00 MiB, increment: 0.01 MiB


4999999950000000

In [None]:
# as sequence using __getitem__
class Sfirstn:
    def __init__(self,n):
        self.num,self.n = 0,n
    
    def __getitem__(self,k):
        if self.num < self.n:
            cur, self.num = self.num, self.num+1
            return cur
        raise StopIteration()

In [None]:
%time sum_first_n = sum(Sfirstn(100000000))
%memit sum_first_n = sum(Sfirstn(100000000))

sum_first_n

CPU times: user 9.78 s, sys: 22.8 ms, total: 9.8 s
Wall time: 9.84 s
peak memory: 372.71 MiB, increment: 0.00 MiB


4999999950000000

In [None]:
# callable object with builtin iter function
class Cfirstn:
    def __init__(self, n):
        self.num,self.n = 0,n
    
    def __call__(self):
        if self.num < self.n:
            cur,self.num = self.num, self.num+1
            return cur
        raise StopIteration()

In [None]:
%time sum_first_n = sum(iter(Cfirstn(100000000), None))
%memit sum_first_n = sum(iter(Cfirstn(100000000), None))

sum_first_n

CPU times: user 10.3 s, sys: 36.2 ms, total: 10.3 s
Wall time: 10.3 s
peak memory: 376.36 MiB, increment: 0.00 MiB


4999999950000000

In [None]:
# nested generator functions
def gthrd():
    for i in range(2):
        yield 'hey'
        
def gscnd():
    for i in range(1):
        yield from gthrd()

def gfrst():
    for i in range(2):
        yield from gscnd()

In [None]:
t = gfrst()
for i in t:
    print(i)

hey
hey
hey
hey


## Create DataLoaders

In [None]:
@inplace
def transformi(b):
    b[x] = [torch.flatten(TF.to_tensor(i)) for i in b[x]]

dsrt = dsr.with_transform(transformi)

bs = 50
dls = DataLoaders.from_dd(dsrt, batch_size=bs)

xb,yb = next(iter(dls.train))
xb.shape,yb.shape

## Basic Learner 

In [None]:
from torch import optim
import fastcore.all as fc

In [None]:
class Learner:
    def __init__(self, model, dls, lr, loss_func, opt_func=optim.SGD):
        fc.store_attr()
    
    def calc_stats(self):
        n = len(self.xb)
        self.accs.append((self.preds.argmax()==self.yb).float().sum())
        self.losses.append(self.loss*n)
        self.ns.append(n)
        
    def one_batch(self):
        self.xb,self.yb = self.batch
        self.preds = self.model(self.xb)
        self.loss = self.loss_func(self.preds, self.yb)
        if self.model.training:
            self.loss.backward()
            self.opt.step()
            self.opt.zero_grad()
        self.calc_stats()
            
    def one_epoch(self, train):
        self.model.training = train
        self.dl = self.dls.train if train else self.dls.valid
        for self.batch in self.dl:
            self.one_batch()
        ns = sum(self.ns) or 1
        avg_acc = sum(self.accs).item()/ns
        avg_loss = sum(self.losses).item()/ns
        print(f'train:{train}, acc:{avg_acc:.3}, loss:{avg_loss:.3}')
    
    def fit(self, n_epochs):
        self.n_epochs = n_epochs
        self.ns,self.accs,self.losses = [],[],[]
        self.opt = self.opt_func(self.model.parameters(), lr=self.lr)
        for self.epoch in range(self.n_epochs):
            self.one_epoch(True)

In [None]:
n,nh,nout = 28*28,50,10
model = nn.Sequential(nn.Linear(n,nh), nn.ReLU(), nn.Linear(nh, nout))

In [None]:
model

Sequential(
  (0): Linear(in_features=784, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=10, bias=True)
)

In [None]:
learner = Learner(model, dls, lr=0.1, loss_func=F.cross_entropy)

In [None]:
learner.fit(5)

In [None]:
t = tensor([[1, 2, 3.]])

In [None]:
m = nn.Sequential(nn.Linear(3, 3), nn.ReLU(), nn.Linear(3, 2))

In [None]:
t.shape

torch.Size([1, 3])

In [None]:
m(t)

tensor([[ 0.3255, -0.7221]], grad_fn=<AddmmBackward0>)

In [None]:
xb[:1].shape

torch.Size([1, 784])

In [None]:
g = 150
m = nn.Sequential(nn.Linear(g, 3), nn.ReLU(), nn.Linear(3, 2))
k = torch.randn(1, g)
m(k)