In [1]:
from torch.nn import Conv2d, MaxPool2d, Dropout, Linear, ReLU, CrossEntropyLoss
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm
import torchvision
import pathlib
import pickle
import torch
import os
import gc

In [2]:
class BevDataset(Dataset):
  def __init__(self, root, size=224, split='train'):
    self.split = split
    postfix = split
    root = os.path.join(root, 'bev_classification', 'images')
    self.dataset_folder = torchvision.datasets.ImageFolder(os.path.join(root, postfix) ,transform = transforms.Compose([transforms.Resize((size,size)),transforms.ToTensor()]))

  def __getitem__(self,index):
    img = self.dataset_folder[index]
    path = self.dataset_folder.imgs[index]
    return img[0], img[1], path[0]
  
  def __len__(self):
    return len(self.dataset_folder)
  
# class BevTestDataset(Dataset):
#   def __init__(self, root, size=224, split='train'):
#     postfix = split
#     root = os.path.join(root, 'bev_classification', 'images')
#     self.dataset_folder = torchvision.datasets.ImageFolder(os.path.join(root, postfix) ,transform = transforms.Compose([transforms.Resize((size,size)),transforms.ToTensor()]))

#   def __getitem__(self,index):
#     img = self.dataset_folder[index]
#     path = self.dataset_folder.imgs[index]
#     return img[0], img[1], path[0]
  
#   def __len__(self):
#     return len(self.dataset_folder)

In [3]:
class ImageClassifier(nn.Module):
    def __init__(self, dropout=0.2):
        super(ImageClassifier, self).__init__()
        output = 99
        self.dropout = Dropout(dropout)
        self.fc1 = Linear(512*32*32, output)
        self.fc2 = Linear(output, output)
        self.conv1 = Conv2d(3, 64, (3,3), padding=(1,1))
        self.conv2 = Conv2d(64, 128, (3,3), padding=(1,1))
        self.conv3 = Conv2d(128, 256, (3,3), padding=(1,1))
        self.conv4 = Conv2d(256, 512, (3,3), padding=(1,1))
        
        self.net = nn.Sequential(
            # Image size = 512 x 512 x 3
            self.conv1, 
            ReLU(),
            MaxPool2d(kernel_size=2, stride=2),

            # Image size = 256 x 256 x 64
            self.conv2, 
            ReLU(),
            Dropout(dropout),
            MaxPool2d(kernel_size=2, stride=2),

            # Image size = 128 x 128 x 128
            self.conv3, 
            ReLU(),
            MaxPool2d(kernel_size=2, stride=2),

            # Image size = 64 x 64 x 256
            self.conv4,
            ReLU(),
            Dropout(dropout),
            MaxPool2d(kernel_size=2, stride=2)
            # Image size = 32 x 32 x 512
        )

        self.initialize_weights()


    def initialize_weights(self):
        gain = 2**(1/2)
        nn.init.xavier_normal_(self.fc1.weight, gain=gain)
        nn.init.xavier_normal_(self.fc2.weight, gain=gain)
        nn.init.xavier_normal_(self.conv1.weight, gain=gain)
        nn.init.xavier_normal_(self.conv2.weight, gain=gain)
        nn.init.xavier_normal_(self.conv3.weight, gain=gain)
        nn.init.xavier_normal_(self.conv4.weight, gain=gain)


    def forward(self, X):
        output = self.net(X)
        _, c, h, w = output.size()
        output = output.view(-1, c*h*w)
        output = self.dropout(self.fc1(output))
        return self.fc2(output)

In [4]:
# Hyper parameters
# With batch_size 50, there will be 1776 iterations over the dataset per epoch
num_chunks = 10
batch_size = 5
num_epochs = 2
lr = 5e-3

In [5]:
train_losses = []
train_accuracy = []

val_losses = [0]
val_accuracy = [0]

model_path = 'model/mps-model.pkl'

if not pathlib.Path('model').exists():
    pathlib.Path('model').mkdir()

def train():
    try:
        gc.collect()
        device = torch.device('mps')
        if not pathlib.Path(model_path).exists():
            model = ImageClassifier().to(device)
        else:
            print('Model Found!')
            print('Loading model...')
            with open(model_path, 'rb') as f:
                model = pickle.load(f).to(device)

        # TODO: make sure to split the data into 10 samples and train on each 
        for i in range(num_chunks):
            print()
            train_loader = DataLoader(BevDataset('.', chunk=i), batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(BevDataset('.', split='val', chunk=i), batch_size=batch_size, shuffle=True)
            
            # Only have 10 validation checks per epoch
            val_check = len(train_loader) // 5

            criterion = CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=lr)

            loop = tqdm(total=len(train_loader)*num_epochs, position=0)

            for epoch in range(num_epochs):
                train_step_losses = []
                train_step_accuracy = []
                for batch, (x, y_truth) in enumerate(train_loader):
                    x, y_truth = x.to(device), y_truth.to(device)

                    optimizer.zero_grad()

                    y_hat = model(x)

                    accuracy = (y_hat.argmax(1) == y_truth).float().mean()
                    train_accuracy.append((batch*epoch, accuracy.item()))

                    loss = criterion(y_hat, y_truth)
                    train_losses.append(loss.item())

                    loss.backward()
                    optimizer.step()

                    if (batch + 1) % val_check == 0:
                        print('Validation check')
                        val_loop = tqdm(total=len(val_loader), position=0)

                        val_batch_loss = []
                        val_batch_accuracy = []
                        for batch, (x, y_truth) in enumerate(val_loader):
                            x, y_truth = x.to(device), y_truth.to(device)

                            optimizer.zero_grad()

                            y_hat = model(x)

                            accuracy = (F.softmax(y_hat,1).argmax(1) == y_truth).float().mean()
                            val_batch_accuracy.append((batch*epoch, accuracy.item()))

                            loss = criterion(y_hat, y_truth)
                            val_batch_loss.append(loss.item())

                            loss.backward()
                            optimizer.step()
                            val_loop.update(1)
                            val_loop.set_description(f'val batch: {batch} val accuracy: {accuracy*100:.2f}% val loss: {loss:.4f}')

                        val_losses.append(sum(val_batch_loss) / len(val_batch_loss))
                        val_accuracy.append(sum(val_batch_accuracy) / len(val_batch_accuracy))

                    loop.update(1)
                    loop.set_description(f'epoch: {epoch+1} batch: {batch} accuracy: {accuracy*100:.2f}% val accuracy {val_accuracy[-1]*100:.2f}% loss: {loss:.4f} val loss: {val_losses[-1]:.4f}')
                
                print('Saving model...')
                with open(model_path, 'wb') as f:
                    pickle.dump(model, f)
                print('Model saved.')
    except KeyboardInterrupt:
        print('Saving model...')
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
        print('Model saved.')


# train()


In [6]:
import torchvision.models as models

class VGGIntermediate(nn.Module):
    def __init__(self):
        super(VGGIntermediate, self).__init__()
        self.vgg = models.vgg19(pretrained=True)
        self.set_up_vgg()

    def set_up_vgg(self):
        for param in self.vgg.parameters():
            param.requires_grad = False

        num_features = self.vgg.classifier[-1].in_features  # Get the number of input features for the final layer
        self.vgg.classifier[-1] = Linear(num_features, 99)
        # Optionally, you may want to initialize the new layer weights
        # Initialize weights with Xavier initialization
        torch.nn.init.xavier_uniform_(self.vgg.classifier[-1].weight)
        # Optionally, initialize biases to zeros
        torch.nn.init.zeros_(self.vgg.classifier[-1].bias)

    def forward(self, X):
        return self.vgg(X)

In [7]:
# Hyper parameters
# With batch_size 50, there will be 1776 iterations over the dataset per epoch
batch_size = 8
num_epochs = 5
lr = 1e-4

In [8]:
train_losses = [0]
train_accuracy = [0]

val_losses = [0]
val_accuracy = [0]

model_path = 'model/mps-model.pkl'

In [9]:

def fine_tune():
    try:
        gc.collect()
        device = torch.device('mps')
        if not pathlib.Path(model_path).exists():
            model = VGGIntermediate().to(device)
        else:
            print('Model Found!')
            print('Loading model...')
            with open(model_path, 'rb') as f:
                model = pickle.load(f).to(device)

        print()
        train_loader = DataLoader(BevDataset('.'), batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(BevDataset('.', split='val'), batch_size=batch_size, shuffle=True)
        
        # Only have 10 validation checks per epoch
        val_check = len(train_loader) // 5

        criterion = CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)

        loop = tqdm(total=len(train_loader)*num_epochs, position=0)

        for epoch in range(num_epochs):
            train_step_losses = []
            train_step_accuracy = []
            for batch, (x, y_truth, _) in enumerate(train_loader):
                x, y_truth = x.to(device), y_truth.to(device)

                optimizer.zero_grad()

                y_hat = model(x)

                accuracy = (y_hat.argmax(1) == y_truth).float().mean()
                train_step_accuracy.append(accuracy.item())

                loss = criterion(y_hat, y_truth)
                train_step_losses.append(loss.item())

                loss.backward()
                optimizer.step()

                if (batch + 1) % val_check == 0:
                    print('Validation check')
                    val_loop = tqdm(total=len(val_loader), position=0)

                    val_batch_loss = []
                    val_batch_accuracy = []
                    for batch, (x, y_truth, _) in enumerate(val_loader):
                        x, y_truth = x.to(device), y_truth.to(device)

                        optimizer.zero_grad()

                        y_hat = model(x)

                        accuracy = (F.softmax(y_hat,1).argmax(1) == y_truth).float().mean()
                        val_batch_accuracy.append(accuracy.item())

                        loss = criterion(y_hat, y_truth)
                        val_batch_loss.append(loss.item())

                        loss.backward()
                        optimizer.step()
                        val_loop.update(1)
                        val_loop.set_description(f'val batch: {batch} val accuracy: {accuracy*100:.2f}% val loss: {loss:.4f}')

                    val_losses.append(sum(val_batch_loss) / len(val_batch_loss))
                    val_accuracy.append(sum(val_batch_accuracy) / len(val_batch_accuracy))

                    train_losses.append(sum(train_step_losses) / len(train_step_losses))
                    train_accuracy.append(sum(train_step_accuracy) / len(train_step_accuracy))

                loop.update(1)
                loop.set_description(f'epoch: {epoch+1} batch: {batch} accuracy: {train_accuracy[-1]*100:.2f}% val accuracy {val_accuracy[-1]*100:.2f}% loss: {train_losses[-1]:.4f} val loss: {val_losses[-1]:.4f}')
            
            print('Saving model...')
            with open(model_path, 'wb') as f:
                pickle.dump(model, f)
            print('Model saved.')
    except KeyboardInterrupt:
        print('Saving model...')
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
        print('Model saved.')

# fine_tune()

In [10]:
gc.collect()

device = torch.device('mps')
print('Model Found!')
print('Loading model...')
with open(model_path, 'rb') as f:
    model = pickle.load(f).to(device)
test_dataset = BevDataset('.', split='test')
test_loader = DataLoader(BevDataset('.', split='test'), batch_size=batch_size, shuffle=True)
test_classes = test_dataset.dataset_folder.classes

Model Found!
Loading model...


In [11]:
def write_predictions(y_hat, paths, f):
    class_nums = F.softmax(y_hat,1).argmax(1)
    # print(class_nums, paths)
    class_labels = [test_classes[c] for c in class_nums]
    # print(class_labels)
    for path, label in zip(paths, class_labels):
        f.write(f'{path},{label}\n')

In [14]:
def write_five_predictions(y_hat, paths, f):
    for i in range(y_hat.size()[0]):
        class_nums = torch.topk(y_hat[i,:].flatten(), 5).indices
        class_labels = [str(test_classes[c]) for c in class_nums]
        f.write(f'{paths[i]},{",".join(class_labels)}')

In [15]:
with open('test_edited.txt', 'w') as f, open('test_edited_five.txt', 'w') as f_f:
    loop = tqdm(total=len(test_loader), position=0)
    for x, _, path in test_loader:
        y_hat = model(x.to(device))
        write_predictions(y_hat, path, f)
        write_five_predictions(y_hat, path, f_f)
        loop.update(1)

  0%|          | 0/1238 [01:37<?, ?it/s]


./bev_classification/images/test/049000000443/075ea662-010c-4b0b-abf1-8ab84352e04a.jpg,049000000443,078000082401,049000040869,049000000450,049000004632
./bev_classification/images/test/049000007640/8a6ee392-78c1-4f89-a2f2-dcb610fde1d1.jpg,049000007640,049000009774,012000001598,078000082401,049000024708
./bev_classification/images/test/016571910303/68e73415-cfe8-44f7-90c9-0c9727fe432d.jpg,078000003864,611269332827,049000004632,016571910310,070847037989
./bev_classification/images/test/016571910310/c455acc6-da2e-4c5c-9650-97a0c00a1816.jpg,016571910310,012000004520,016571910303,818094005784,858176002171
./bev_classification/images/test/049000000443/01ba7035-339b-4f41-aeca-08d0850e4e98.jpg,049000000443,049000040869,049000050103,078000082401,049000000450
./bev_classification/images/test/078000003864/5e2f1d0f-15b9-4d74-8ab4-8b861a84ee7d.jpg,049000024692,078000003864,049000024685,012000504051,012000171956
./bev_classification/images/test/818094005791/f9e7a646-7ff6-40e8-957b-57167499d681.jpg,0