In [4]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F

In [57]:
class GoogleLeNet(nn.Module):
    def __init__(self,aux_logic=True,init_weight=True,num_classes=10):
        super(GoogleLeNet,self).__init__()
        
        self.aux_logic=aux_logic
        
        if self.training:
            print('self.training is True')
        else:
            print('self.training is False')
            
        self.conv1=BasicConv2d(1,64,kernel_size=7,stride=2,padding=3)
        self.maxpool1=nn.MaxPool2d(kernel_size=3,stride=2,padding=0,ceil_mode=True)
        self.conv2=BasicConv2d(64,64,kernel_size=1,stride=1,padding=0)
        self.conv3=BasicConv2d(64,192,kernel_size=3,stride=1,padding=1)
        self.maxpool2=nn.MaxPool2d(kernel_size=3,stride=2,padding=0,ceil_mode=True)
        
        self.inception3a=Inception_Block(192,64,96,128,16,32,32)
        self.inception3b=Inception_Block(256,128,128,192,32,96,64)
        self.maxpool3=nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)
        
        self.inception4a=Inception_Block(480,192,96,208,16,48,64)
        self.inception4b=Inception_Block(512,160,112,224,24,64,64)
        self.inception4c=Inception_Block(512,128,128,256,24,64,64)
        self.inception4d=Inception_Block(512,112,144,288,32,64,64)
        self.inception4e=Inception_Block(528,256,160,320,32,128,128)
        self.maxpool4=nn.MaxPool2d(kernel_size=2,stride=2,ceil_mode=True)
        
        self.inception5a=Inception_Block(832,256,160,320,32,128,128)
        self.inception5b=Inception_Block(832,384,192,384,48,128,128)
        
        if aux_logic:
            self.aux1=Inception_Aux(512,num_classes)
            self.aux2=Inception_Aux(528,num_classes)
        
        self.avgpool=nn.AdaptiveAvgPool2d((1,1))
        self.dropout=nn.Dropout(0.2)
        self.fc=nn.Linear(1024,num_classes)
        
    def forward(self,x):
        x=self.conv1(x)
        x=self.maxpool1(x)
        x=self.conv2(x)
        x=self.conv3(x)
        x=self.maxpool2(x)
        
        x=self.inception3a(x)
        x=self.inception3b(x)
        x=self.maxpool3(x)
        
        x=self.inception4a(x)
        
        if self.aux_logic and self.training:
            aux1=self.aux1(x)
        
        x=self.inception4b(x)
        x=self.inception4c(x)
        x=self.inception4d(x)
        
        if self.aux_logic and self.training:
            aux2=self.aux2(x)
            
        x=self.inception4e(x)
        x=self.maxpool4(x)
        
        x=self.inception5a(x)
        x=self.inception5b(x)
        
        x=self.avgpool(x)
        x=torch.flatten(x,1)
        x=self.dropout(x)
        x=self.fc(x)
        
        if self.aux_logic and self.training:
            return x,aux1,aux2
        else:
            return x

In [58]:
class BasicConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,**kwargs):
        super(BasicConv2d,self).__init__()
        
        self.conv=nn.Conv2d(in_channels,out_channels,**kwargs)
        self.relu=nn.ReLU()
        
    def forward(self,x):
        return self.relu(self.conv(x))

In [59]:
class Inception_Block(nn.Module):
    def __init__(self,in_channels,out_1x1,red_3x3,out_3x3,red_5x5,out_5x5,out_1x1pool):
        super(Inception_Block,self).__init__()
        
        self.branch1=BasicConv2d(in_channels,out_1x1,kernel_size=(1,1))
        
        self.branch2=nn.Sequential(
            BasicConv2d(in_channels,red_3x3,kernel_size=(1,1)),
            BasicConv2d(red_3x3,out_3x3,kernel_size=(3,3),padding=(1,1)),
        )
        
        self.branch3=nn.Sequential(
            BasicConv2d(in_channels,red_5x5,kernel_size=(1,1)),
            BasicConv2d(red_5x5,out_5x5,kernel_size=(5,5),padding=(2,2))
        )
        
        self.branch4=nn.Sequential(
            nn.MaxPool2d(kernel_size=(3,3),stride=(1,1),padding=(1,1),ceil_mode=True),
            BasicConv2d(in_channels,out_1x1pool,kernel_size=(1,1))
        )
        
    def forward(self,x):
        branch1=self.branch1(x)
        branch2=self.branch2(x)
        branch3=self.branch3(x)
        branch4=self.branch4(x)
        output=[branch1,branch2,branch3,branch4]
        return torch.cat(output,1)

In [60]:
class Inception_Aux(nn.Module):
    def __init__(self,in_channels,num_classes):
        super(Inception_Aux,self).__init__()
        self.relu=nn.ReLU()
        self.dropout=nn.Dropout(p=0.7)
        self.pool=nn.AvgPool2d(kernel_size=5,stride=3)
        self.conv=BasicConv2d(in_channels,128,kernel_size=1)
        self.fc1=nn.Linear(2048,1024)
        self.fc2=nn.Linear(1024,num_classes)
    
    def forward(self,x):
        x=self.pool(x)
        x=self.conv(x)
        x=torch.flatten(x,1)
        x=self.relu(self.fc1(x))
        x=self.dropout(x)
        x=self.fc2(x)
        
        return x

In [61]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [69]:
transform_config=transforms.Compose([
    transforms.Resize((224,224)),    
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

In [70]:
BATCH_SIZE=256
train_dataset=datasets.FashionMNIST('/home/ubuntu/gpu_work',download=True,train=True,transform=transform_config)
test_dataset=datasets.FashionMNIST('/home/ubuntu/gpu_work',download=True,train=False,transform=transform_config)


In [71]:
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)
test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [65]:
model=GoogleLeNet().to(device)
optimizer=optim.Adam(params=model.parameters(),lr=0.0001)
loss_fn=nn.CrossEntropyLoss()

self.training is True


In [66]:
def train(model,train_loader,optimizer,epoch,device):
    model.train()
    for batch_ids,(data,label) in enumerate(train_loader):
        label=label.type(torch.LongTensor)
        data,label=data.to(device),label.to(device)
        
        optimizer.zero_grad()
        model_output,aux1,aux2=model(data)
        loss=loss_fn(model_output,label)+0.3*(loss_fn(aux1,label)+loss_fn(aux2,label))
        loss.backward()
        optimizer.step()
        
        if(batch_ids+1) % 50 ==0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch,batch_ids*len(data),len(train_loader.dataset),
                100.*batch_ids/len(train_loader),loss.item()))

In [67]:
def test(model,test_loader,device):
    model.eval()
    test_loss=0
    correct=0
    with torch.no_grad():
        for data,label in test_loader:
            data,label=data.to(device),label.to(device)
            y_hat=model(data)
            test_loss+=F.nll_loss(y_hat,label,reduction='sum').item()
            _,y_pred=torch.max(y_hat,1)
            correct+=(y_pred==label).sum().item()
            
        test_loss/=len(test_dataset)
        print("\n Test Set: Average loss: {:.0f}, Accuracy:{}/{} ({:.0f}%)".format(
            test_loss,correct,len(test_dataset),100.*correct/len(test_dataset)))
        print("="*50)

In [72]:
if __name__=='__main__':
    seed=42
    EPOCHS=10
    
    for epoch in range(1,EPOCHS+1):
        train(model,train_loader,optimizer,epoch,device)
        test(model,test_loader,device)


 Test Set: Average loss: -7, Accuracy:7364/10000 (74%)

 Test Set: Average loss: -7, Accuracy:7829/10000 (78%)

 Test Set: Average loss: -8, Accuracy:8164/10000 (82%)

 Test Set: Average loss: -9, Accuracy:8343/10000 (83%)

 Test Set: Average loss: -9, Accuracy:8522/10000 (85%)

 Test Set: Average loss: -9, Accuracy:8605/10000 (86%)

 Test Set: Average loss: -10, Accuracy:8689/10000 (87%)

 Test Set: Average loss: -10, Accuracy:8779/10000 (88%)

 Test Set: Average loss: -10, Accuracy:8831/10000 (88%)

 Test Set: Average loss: -11, Accuracy:8873/10000 (89%)


In [47]:
import gc
#del model
gc.collect()
torch.cuda.empty_cache()