In [18]:
import torch
import torchvision
from torchvision import transforms as T
import torch.nn.functional as F
import torch.nn as nn


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [19]:
class AlexNet(torch.nn.Module):
    def __init__(self, num_classes = 10):
        super(AlexNet, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride =4, padding = 0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size  = 3, stride = 2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride = 1, padding = 2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2)
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(384),
            nn.ReLU()
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(384),
            nn.ReLU()
        )

        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256, 4096),
            nn.ReLU())
        
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out
    


In [20]:

def get_loss_function():
    loss_function = torch.nn.CrossEntropyLoss()
    return loss_function


In [21]:

def get_optimizer(net, lr, wd, momentum):
    optimizer = torch.optim.SGD(net.parameters(), lr=lr, weight_decay=wd, momentum=momentum)
    return optimizer



In [22]:

def train(net,data_loader,optimizer,loss_function, device='cuda:0'):
    
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
    net.train() # Strictly needed if network contains layers which has different behaviours between train and test
    
    for batch_idx, (inputs, targets) in enumerate(data_loader): 
        # Load data into GPU
        inputs = inputs.to(device)
        targets = targets.to(device) 
        # Forward pass
        outputs = net.forward(inputs) 
        # Apply the loss
        loss = loss_function(outputs,targets) # Reset the optimizer
        # Backward pass
        loss.backward()
        # Update parameters
        optimizer.step()
        optimizer.zero_grad()
        samples+=inputs.shape[0]
        cumulative_loss += loss.item()
        _, predicted = outputs.max(1)
        cumulative_accuracy += predicted.eq(targets).sum().item()
    
    return cumulative_loss/samples, cumulative_accuracy/samples*100



In [23]:
def test(net, data_loader, cost_function, device='cuda:0'):
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
    net.eval() # Strictly needed if network contains layers which has different behaviours between train and test
    
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            # Load data into GPU inputs = inputs.to(device)
            targets = targets.to(device)
             # Forward pass
            outputs = net.forward(inputs)
            _, predicted = outputs.max(1)
            cumulative_accuracy += predicted.eq(targets).sum().item()
    
    return cumulative_loss/samples, cumulative_accuracy/samples*100


In [24]:
def get_data(batch_size, test_batch_size=256):
    # Prepare data transformations and then combine them sequentially
    transform = list() 
    transform.append(T.ToTensor())
    # converts Numpy to Pytorch Tensor
    transform.append(T.Normalize(mean=[0.5], std=[0.5]))
    # Normalizes the Tensors between [-1, 1]
    transform = T.Compose(transform)
    # Composes the above transformations into one.
    # Load data 
    full_training_data = torchvision.datasets.CIFAR10('./data', train=True, transform=transform, download=True)
    test_data = torchvision.datasets.CIFAR10('./data', train=False, transform=transform, download=True)
    # Create train and validation splits
    num_samples = len(full_training_data)
    training_samples = int(num_samples*0.5+1)
    validation_samples = num_samples - training_samples
    training_data, validation_data = torch.utils.data.random_split(full_training_data, [training_samples,validation_samples])
    # Initialize dataloaders 
    train_loader = torch.utils.data.DataLoader(training_data, batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(validation_data, test_batch_size, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_data, test_batch_size, shuffle=False)
    return train_loader, val_loader, test_loader


In [25]:
def main(batch_size=128, device='cuda:0', learning_rate=0.01, weight_decay=0.000001, momentum=0.9, epochs=50):
    train_loader, val_loader, test_loader = get_data(batch_size)
    # TODO for defining AlexNet
    net = AlexNet()
    optimizer = get_optimizer(net, learning_rate, weight_decay, momentum)
    loss_function = get_loss_function() 

    for e in range(epochs):
        train_loss, train_accuracy = train(net, train_loader, optimizer, loss_function)
        val_loss, val_accuracy = test(net, val_loader, loss_function)
        print('Epoch: {:d}'.format(e+1))
        print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss,train_accuracy))
        print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
        print('-----------------------------------------------------')
        print('After training:')
        train_loss, train_accuracy = test(net, train_loader, loss_function)
        val_loss, val_accuracy = test(net, val_loader, loss_function)
        test_loss, test_accuracy = test(net, test_loader, loss_function)
        print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_accuracy))
        print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, val_accuracy))
        print('\t Test loss {:.5f}, Test accuracy {:.2f}'.format(test_loss, test_accuracy))
        print('-----------------------------------------------------')


In [27]:
main()

Files already downloaded and verified
Files already downloaded and verified


RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same