In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BaselineNet(nn.Module):
    def __init__(self):
        super(BaselineNet, self).__init__()

        # Convolutional layer 1
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)

        # Convolutional layer 2
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        # Convolutional layer 3
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        # Convolutional layer 4
        self.conv4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        # Convolutional layer 5
        self.conv5 = nn.Conv2d(256, 128, kernel_size=3, padding=1)

        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layer
        self.fc = nn.Linear(128 * 14 * 14, 512)

        # Dropout layer
        self.dropout = nn.Dropout(0.5)

        # Output layer
        self.out = nn.Linear(512, 200)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))

        x = x.view(-1, 128 * 14 * 14)
        x = F.relu(self.fc(x))
        x = self.dropout(x)
        x = self.out(x)
        return x

In [8]:
import os

import pandas as pd
from torchvision.datasets import VisionDataset
from torchvision.datasets.folder import default_loader
from torchvision.datasets.utils import download_file_from_google_drive
import torchvision.transforms as transforms
from skimage import io
import numpy as np
from PIL import Image


# Define the transform to apply to the images
transform = transforms.Compose([
        transforms.Resize((550, 550)),
        transforms.RandomCrop(448, padding=8),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

class Cub2011(VisionDataset):
    """`CUB-200-2011 <http://www.vision.caltech.edu/visipedia/CUB-200-2011.html>`_ Dataset.
        Args:
            root (string): Root directory of the dataset.
            train (bool, optional): If True, creates dataset from training set, otherwise
               creates from test set.
            transform (callable, optional): A function/transform that  takes in an PIL image
               and returns a transformed version. E.g, ``transforms.RandomCrop``
            target_transform (callable, optional): A function/transform that takes in the
               target and transforms it.
            download (bool, optional): If true, downloads the dataset from the internet and
               puts it in root directory. If dataset is already downloaded, it is not
               downloaded again.
    """
    base_folder = 'CUB_200_2011/images'
    # url = 'http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz'
    file_id = '1hbzc_P1FuxMkcabkgn9ZKinBwW683j45'
    filename = 'CUB_200_2011.tgz'
    tgz_md5 = '97eceeb196236b17998738112f37df78'

    def __init__(self, root, train=True, transform=None, target_transform=None, download=False):
        super(Cub2011, self).__init__(root, transform=transform, target_transform=target_transform)


        self.train = train
        if download:
            self._download()

        if not self._check_integrity():
            raise RuntimeError('Dataset not found or corrupted. You can use download=True to download it')

    def _load_metadata(self):
        images = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'images.txt'), sep=' ',
                             names=['img_id', 'filepath'])
        image_class_labels = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'image_class_labels.txt'),
                                         sep=' ', names=['img_id', 'target'])
        train_test_split = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'train_test_split.txt'),
                                       sep=' ', names=['img_id', 'is_training_img'])

        data = images.merge(image_class_labels, on='img_id')
        self.data = data.merge(train_test_split, on='img_id')

        class_names = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'classes.txt'),
                                  sep=' ', names=['class_name'], usecols=[1])
        self.class_names = class_names['class_name'].to_list()
        if self.train:
            self.data = self.data[self.data.is_training_img == 1]
        else:
            self.data = self.data[self.data.is_training_img == 0]

    def _check_integrity(self):
        try:
            self._load_metadata()
        except Exception:
            return False

        for index, row in self.data.iterrows():
            filepath = os.path.join(self.root, self.base_folder, row.filepath)
            if not os.path.isfile(filepath):
                print(filepath)
                return False
        return True

    def _download(self):
        import tarfile

        if self._check_integrity():
            print('Files already downloaded and verified')
            return

        download_file_from_google_drive(self.file_id, self.root, self.filename, self.tgz_md5)

        with tarfile.open(os.path.join(self.root, self.filename), "r:gz") as tar:
            tar.extractall(path=self.root)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        sample = self.data.iloc[idx]
        path = os.path.join(self.root, self.base_folder, sample.filepath)
        target = sample.target - 1  # Targets start at 1 by default, so shift to 0
        img = Image.open(path)

        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return {"image":img, "target":target}


train_dataset = Cub2011('./cub2011', train=True, download=False, transform=transform)
test_dataset = Cub2011('./cub2011', train=False, download=False)

In [34]:
from torch.utils.data import Dataset, DataLoader
trainloader = DataLoader(train_dataset, batch_size=200, shuffle=True, num_workers=0)
testloader = DataLoader(test_dataset, batch_size=200, shuffle=False, num_workers=0)

In [None]:
from tqdm import tqdm

def train(model, trainloader, testloader, epochs, lr):
    # register hooks to track activation
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # list of accuracies
    train_accuracies, test_accuracies, train_losses, test_losses = list(), list(), list(), list()

    # iterate for number of epochs
    for epoch in range(epochs):
        train_outputs = []
        y_train = []
        # set mode to training
        model.train()
        train_loss = 0
        for data in tqdm(trainloader):
            optimizer.zero_grad()

            y = data['target']
            images = data['image']
            # input data
            out = model(images)

            # calculate loss
            print(out.shape, y.shape)
            loss = F.cross_entropy(out, y)
            train_outputs += [out]
            y_train += [y]
            loss.backward()
            optimizer.step()
            train_loss += loss


        with torch.no_grad():
            model.eval()
            test_outputs = []
            y_test = []
            test_loss = 0
            for data in tqdm(testloader):
                y = data['target']
                images = data['image']
                # input data
                out_test = model(images)
                test_outputs += [out_test]
                y_test += [y]

                # calculate loss
                loss = F.cross_entropy(out_test, y)
                test_loss += loss

        train_acc = torch.Tensor(train_outputs).max(dim=-1)[1].eq(y_train).sum().item()
        test_acc = torch.Tensor(test_outputs).max(dim=-1)[1].eq(y_test).sum().item()
        ## add to list and print
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)
        train_losses.append(loss.item())
        test_losses.append(test_loss.item())

        print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
              format(epoch, loss.item(), train_acc, test_acc), end="\r")

model = BaselineNet()
train(model, trainloader, testloader, 100, 0.01)

  0%|          | 0/30 [00:00<?, ?it/s]