In [18]:
class Base:
    def __init__(
        self,
        root_path: str = ".",
        project_path: str = "vis_lang",
        variant_name: str = "base",
        epochs: int = 200,
    ):
        self.root_path = root_path
        self.project_path = project_path
        self.variant_name = variant_name
        self.epochs = epochs
        self.checkpoints_path = os.path.join(self.root_path, project_path, self.variant_name, "models")
        make_dirs(self.checkpoints_path)
        self.plots_path = os.path.join(self.root_path, project_path, self.variant_name, "plots")
        make_dirs(self.plots_path)


        self.class_names = None
        self.train_loader = None
        self.test_loader = None
        self.model = None
        self.criterion = None
        self.optimizer = None
        self.scheduler = None
        self.train_losses = []
        self.train_accuracy = []
        self.test_losses = []
        self.test_accuracy = []
        self.best_accuracy = 0
        self.min_loss = np.inf

    def load_data(self):
        NotImplementedError

    def set_up_model_architecture(self, num_features_in_last_layer: int):
        model = models.resnet18()
        model.linear = nn.Linear(
            in_features=512, out_features=num_features_in_last_layer
        )
        self.model = model
        if torch.cuda.is_available():
            self.model.cuda()

    def init_model_helpers(self, criterion):
        self.criterion = criterion()
        self.optimizer = optim.SGD(
            self.model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4
        )
        self.scheduler = optim.lr_scheduler.MultiStepLR(
            self.optimizer, milestones=[150, 250, 350], gamma=0.1
        )
        if torch.cuda.is_available():
            self.criterion.cuda()

    def train_single_epoch(self, epoch_idx):
        """
        Ensure to update self.train_losses & self.train_accuracy
        :param epoch_idx: Index of the epoch
        :param train_loader: dataloader object for the training dataset
        :return: None
        """
        train_loss = 0.0
        total = 0
        correct = 0

        start_time = time.time()

        self.model.train()
        for i, data in enumerate(self.train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            device = get_device()
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            self.optimizer.zero_grad()

            # forward + backward + optimize
            outputs = self.model(inputs)
            loss = self.criterion(outputs, labels)
            loss.backward()
            self.optimizer.step()
            train_loss += loss.item()
            total += labels.size(0)

            correct += self.num_correct_preds(outputs, labels)

        epoch_loss = train_loss / len(self.train_loader)
        epoch_accuracy = correct * 100 / total

        elapsed = time.time() - start_time
        print(
            f"Epoch {epoch_idx} || Loss: {epoch_loss:7.3f} || Accuracy: {epoch_accuracy:6.2f}% || Time: {elapsed:6.2f}"
        )

        self.train_losses.append(epoch_loss)
        self.train_accuracy.append(epoch_accuracy)

    def num_correct_preds(self, outputs, labels):
        _, predicted = outputs.max(1)
        return predicted.eq(labels).sum().item()



    def validate_single_epoch(self, epoch_idx):
        """
        Ensure to update self.test_losses & self.test_accuracy
        :param epoch_idx: Index of the epoch
        :param test_loader: dataloader object for the test dataset
        :return: None
        """
        test_loss = 0.0
        total = 0
        correct = 0

        self.model.eval()
        with torch.no_grad():
            for i, data in enumerate(self.test_loader, 0):
                inputs, labels = data
                device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                test_loss += loss.item()

                total += labels.size(0)
                correct += self.num_correct_preds(outputs, labels)

        epoch_loss = test_loss / len(self.test_loader)
        epoch_accuracy = correct * 100 / total

        state = {
            "net": self.model.state_dict(),
            "acc": epoch_accuracy,
            "epoch": epoch_idx,
            "loss": epoch_loss,
        }
        if self.best_accuracy < epoch_accuracy:
            self.best_accuracy = epoch_accuracy
            print(f"Saving model with acc: {epoch_accuracy:7.3f}, loss: {epoch_loss:6.2f}, epoch: {epoch_idx}")
            torch.save(
                state,
                os.path.join(self.checkpoints_path, "cifar10_base_best_acc.pth"),
            )

        if self.min_loss > epoch_loss:
            self.min_loss = epoch_loss
            print(f"Saving model with acc: {epoch_accuracy:7.3f}, loss: {epoch_loss:6.2f}, epoch: {epoch_idx}")
            torch.save(
                state,
                os.path.join(self.checkpoints_path,"cifar10_base_best_loss.pth"),
            )
        self.test_losses.append(epoch_loss)
        self.test_accuracy.append(epoch_accuracy)

    def train_model(self):

        print("Started Training")

        for epoch in range(self.epochs):
            self.train_single_epoch(epoch)
            self.validate_single_epoch(epoch)
            self.scheduler.step()

        print("Finished Training")

        print("Training Loss: ", self.train_losses)
        print("Training Accuracy: ", self.train_accuracy)
        print("Test Loss: ", self.test_losses)
        print("Test Accuracy: ", self.test_accuracy)

        self.export_plots()

    def export_plots(self):

        print(f"Saving plots at {self.plots_path}")

        train_losses_fig = plt.figure()
        plt.plot(self.train_losses)
        plt.xlabel("Epochs")
        plt.ylabel("Train Loss")
        train_losses_fig.savefig(os.path.join(self.plots_path, "train_loss.png"))

        test_losses_fig = plt.figure()
        plt.plot(self.test_losses)
        plt.xlabel("Epochs")
        plt.ylabel("Test Loss")
        test_losses_fig.savefig(os.path.join(self.plots_path, "test_loss.png"))

        train_acc_fig = plt.figure()
        plt.plot(self.train_accuracy)
        plt.xlabel("Epochs")
        plt.ylabel("Train Acc")
        train_acc_fig.savefig(os.path.join(self.plots_path, "train_acc.png"))

        test_acc_fig = plt.figure()
        plt.plot(self.test_accuracy)
        plt.xlabel("Epochs")
        plt.ylabel("Test Acc")
        test_acc_fig.savefig(os.path.join(self.plots_path, "test_acc.png"))


# Utils


def make_dirs(path: str):
    """ Why is this not how the standard library works? """
    path = os.path.split(path)[0]
    if path != "":
        os.makedirs(path, exist_ok=True)


def get_device():
    return torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [19]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets

import os

In [20]:
class Cifar10(Base):
    def __init__(self, root_path, variant_name="cifar10_base", epochs=200):
        super(Cifar10, self).__init__(root_path=root_path, variant_name=variant_name, epochs=epochs)

    def load_data(self):

        transform_train = transforms.Compose(
            [
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(
                    (0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)
                ),
            ]
        )
        transform_test = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(
                    (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
                ),
            ]
        )

        dataset_args = {"root": os.path.join(self.root_path, "data"),
                        "download": True}
        dataloader_args = {"batch_size":128, "shuffle":True, "num_workers":2}
        train_dataset = datasets.CIFAR10(
            **dataset_args,
            train=True,
            transform=transform_train,
        )

        self.class_names = train_dataset.classes

        self.train_loader = torch.utils.data.DataLoader(
            train_dataset, **dataloader_args
        )

        test_dataset = datasets.CIFAR10(
            **dataset_args,
            train=False,
            transform=transform_test,
        )

        self.test_loader = torch.utils.data.DataLoader(
            test_dataset, **dataloader_args
        )


In [21]:
root_path = "/nethome/bdevnani3/raid"

cifar10 = Cifar10(root_path=root_path, epochs=200)
cifar10.load_data()
cifar10.set_up_model_architecture(10)
cifar10.init_model_helpers(nn.CrossEntropyLoss)
cifar10.train_model()

Files already downloaded and verified
Files already downloaded and verified
Started Training


KeyboardInterrupt: 

In [23]:
import gensim.downloader
import torch
from base import get_device
import numpy as np


class Cifar10Emb(Cifar10):
    def __init__(self, root_path, variant_name="cifar10_emb", epochs=200):
        super(Cifar10Emb, self).__init__(root_path=root_path,
                                         variant_name=variant_name,
                                         epochs=epochs)

    def find_closest_words(
            word_lookup: torch.Tensor, x: torch.Tensor, mode: str = "l2"
    ) -> torch.Tensor:
        """
        Given a size [N, c] lookup table (N classes, c channels per vector) and a set of [M, c] vectors to look up,
        returns a size [M] vector of indices from 0 to N-1 containing the closest vector in the lookup for that input.

        Modes:
            l2     - Computes pairwise L2 distance and chooses the lowest one.
            cossim - Computs pairwise cosine similarity, and chooses the most similar. (Not implemented)
        """
        N, c = word_lookup.shape
        M, c2 = x.shape

        assert c == c2, "The lookup should have the same number of channels as the input."

        if mode == "l2":
            return (
                ((word_lookup[None, :, :] - x[:, None, :]) ** 2).sum(dim=-1).argmin(dim=-1)
            )
        else:
            raise NotImplementedError

    def init_word_lookup(self):
        # We only need to lazily initialize this once. Don't reinitialize it if it's already been initialized.
        word_vectors = gensim.downloader.load(name="word2vec-google-news-300")

        # Note: we store the word lookup in the model, not the datset because
        #   1.) The word lookup should be on the same device as the model
        #   2.) If using multiple GPUs, the model will get duplicated to each device, but the dataset won't
        #   3.) The word model (i.e., textual feature encoder) is a property of the model not the dataset
        self.model.word_lookup = torch.from_numpy(
            np.stack([word_vectors[_class] for _class in self.class_names])
        ).to(get_device())

    def num_correct_preds(self, outputs, labels):
        return (self.find_closest_words(self.model.word_lookup, outputs) == labels).sum().item()

ModuleNotFoundError: No module named 'gensim'