In [1]:
import torch
from torch import nn,optim

In [2]:
def vgg_block(num_convs,in_channels,out_channles):
    layers=[]
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels,out_channles,kernel_size=3,padding=1))
        layers.append(nn.ReLU())
        in_channels=out_channles
    layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
    return nn.Sequential(*layers)
        

In [3]:
conv_arch=((1,64),(1,128),(2,256),(2,512),(2,512))

In [4]:
def vgg(conv_arch):
    conv_blks=[]
    in_channels=1
    for (num_convs,out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs,in_channels,out_channels))
        in_channels=out_channels
    
    return nn.Sequential(
        *conv_blks,nn.Flatten(),
        nn.Linear(out_channels*7*7,4096),nn.ReLU(),nn.Dropout(0.5),
        nn.Linear(4096,4096),nn.ReLU(),nn.Dropout(0.5),
        nn.Linear(4096,10)
    
    )
    

In [5]:
net=vgg(conv_arch)

In [6]:
X=torch.randn(size=(1,1,224,224))
for blk in net:
    X=blk(X)
    print(blk.__class__.__name__,' output shape :',X.shape)

Sequential  output shape : torch.Size([1, 64, 112, 112])
Sequential  output shape : torch.Size([1, 128, 56, 56])
Sequential  output shape : torch.Size([1, 256, 28, 28])
Sequential  output shape : torch.Size([1, 512, 14, 14])
Sequential  output shape : torch.Size([1, 512, 7, 7])
Flatten  output shape : torch.Size([1, 25088])
Linear  output shape : torch.Size([1, 4096])
ReLU  output shape : torch.Size([1, 4096])
Dropout  output shape : torch.Size([1, 4096])
Linear  output shape : torch.Size([1, 4096])
ReLU  output shape : torch.Size([1, 4096])
Dropout  output shape : torch.Size([1, 4096])
Linear  output shape : torch.Size([1, 10])


In [10]:
def evaluate_acc_gpu(net,data_iter,device=None):
    if isinstance(net,nn.Module):
        device='cuda' if torch.cuda.is_available() else 'cpu'
        net.to(device)
        net.eval()
        l_sum=[]
        for X,y in data_iter:
            X,y=X.to(device),y.to(device)
            y_hat=net(X)
            l_sum.append((sum(torch.argmax(y_hat,dim=1).reshape(y.shape) ==y) / y.shape[0]).item())
        return sum(l_sum) / len(l_sum)
        
    

In [13]:
def train_ch6(net,train_iter,test_iter,num_epochs,lr,device):
    def init_weight(m):
        if type(m) == nn.Linear or type(m)==nn.Conv2d:
            torch.nn.init.xavier_uniform_(m.weight)
    net.apply(init_weight)
    print('training on : ',device)
    net.to(device)
    
    optimizer=optim.SGD(net.parameters(),lr=lr)
    loss=nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        tr_l=[]
        tr_acc=[]
        net.train()
        for X,y in train_iter:
            X,y=X.to(device),y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                tr_l.append(l.item())
                tr_acc.append((sum(torch.argmax(y_hat,dim=1).reshape(y.shape) == y) / y.shape[0]).item())
        test_acc=evaluate_acc_gpu(net,test_iter,device)
        print('epoch : ',epoch ,' train loss : ',sum(tr_l)/len(tr_l),' train acc : ', sum(tr_acc)/len(tr_acc),'test acc : ',test_acc)