In [34]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.optim as optim
from torch.hub import load_state_dict_from_url
import torch.nn.functional as F

In [4]:
vgg_type={
    'A':[32,    'M',64,      'M',128,128,      'M',256,256,         'M',256,256,         'M'],
    'B':[64,    'M',128,     'M',256,256,      'M',512,512,         'M',512,512,         'M'],
    'C':[64,64, 'M',128,128, 'M',256,256,      'M',512,512,         'M',512,512,         'M'],
    'D':[64,64, 'M',128,128, 'M',256,256,      'M',512,512,512,     'M',512,512,512,     'M'],
    'E':[64,64, 'M',128,128, 'M',256,256,256,  'M',512,512,512,512, 'M',512,512,512,512, 'M'],
}

In [18]:
class VGG(nn.Module):
    def __init__(self,features,num_classes=10,init_weight=False):
        super(VGG,self).__init__()
        
        self.features=features
        self.avgpool=nn.AdaptiveAvgPool2d((7,7))
        self.classifier=nn.Sequential(
            nn.Linear(256*7*7,4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096,4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096,num_classes)
        )
        
    def forward(self,x):
        x=self.features(x)
        x=self.avgpool(x)
        x=torch.flatten(x,1)
        x=self.classifier(x)
            
        return x

In [19]:
def vgg11(pretrained=False,progress=True,**kwargs):
    return _vgg('vgg11','A',pretrained,progress,**kwargs)

In [20]:
def _vgg(arch,vgg_type_name,pretrained,progress,**kwargs):
    if pretrained:
        kwargs['init_weight']=False
    model=VGG(make_layers(vgg_type[vgg_type_name]),**kwargs)
    
    if pretrained:
        state_dict=load_state_dict_from_url(model_urls[arch],progress=progress)
        model.load_state_dict(state_dict)
    return model

In [21]:
def make_layers(vgg_arch):
    layers=[]
    in_channels=1
    for i in vgg_arch:
        if i=='M':
            layers+=[nn.MaxPool2d(kernel_size=2,stride=2)]
        else:
            conv2d=nn.Conv2d(in_channels,i,kernel_size=3,padding=1)
            layers+=[conv2d,nn.ReLU(inplace=True)]
            in_channels=i
    return nn.Sequential(*layers)

In [22]:
device=torch.device("cuda" if torch.cuda.is_available else "cpu")
device

device(type='cuda')

In [10]:
transform_config=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

In [12]:
train_dataset=datasets.FashionMNIST('/home/ubuntu/gpu_work',download=True,train=True,transform=transform_config)
test_dataset=datasets.FashionMNIST('/home/ubuntu/gpu_work',download=True,train=False,transform=transform_config)

In [13]:
BATCH_SIZE=256
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)
test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [23]:
model=vgg11().to(device)
optimizer=optim.Adam(params=model.parameters(),lr=0.0001)
loss_fn=nn.CrossEntropyLoss()

In [24]:
def train(model,train_loader,optimizer,epoch,device):
    model.train
    for batch_ids, (data,label) in enumerate(train_loader):
        label=label.type(torch.LongTensor)
        data,label=data.to(device), label.to(device)
        
        optimizer.zero_grad()
        model_output=model(data)
        loss=loss_fn(model_output,label)
        loss.backward()
        optimizer.step()
        
        if (batch_ids+1)%50 == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch,batch_ids*len(data),len(train_loader.dataset),
                100.*batch_ids/len(train_loader),loss.item()))

In [25]:
def test(model,test_loader,device):
    model.eval()
    test_loss=0
    correct=0
    with torch.no_grad():
        for data,label in test_loader:
            data,label=data.to(device),label.to(device)
            y_hat=model(data)
            _,y_pred=torch.max(y_hat,1)
            correct+=(y_pred==label).sum().item()
            test_loss+=F.nll_loss(y_hat,label,reduction='sum').item()
        test_loss/=len(test_dataset)
        print("\n Test Set: Average loss: {:.0f}, Accuracy:{}/{} ({:.0f}%)".format(
            test_loss,correct,len(test_dataset),100.*correct/len(test_dataset)))
        print("="*50)

In [35]:
if __name__=='__main__':
    seed=42
    EPOCHS=10
    
    for epoch in range(1,EPOCHS+1):
        train(model,train_loader,optimizer,epoch,device)
        test(model,test_loader,device)


 Test Set: Average loss: -8, Accuracy:8825/10000 (88%)

 Test Set: Average loss: -11, Accuracy:8966/10000 (90%)

 Test Set: Average loss: -10, Accuracy:9003/10000 (90%)

 Test Set: Average loss: -11, Accuracy:9113/10000 (91%)

 Test Set: Average loss: -13, Accuracy:9205/10000 (92%)

 Test Set: Average loss: -13, Accuracy:9147/10000 (91%)

 Test Set: Average loss: -14, Accuracy:9247/10000 (92%)

 Test Set: Average loss: -17, Accuracy:9256/10000 (93%)

 Test Set: Average loss: -19, Accuracy:9222/10000 (92%)

 Test Set: Average loss: -21, Accuracy:9279/10000 (93%)


In [32]:
import gc
#del model
gc.collect()
torch.cuda.empty_cache()