# Microsoft-Vision-ResNet50 based Hybrid Model using Linear Classifier

In [2]:

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cs231n/assignments/assignment1/'
FOLDERNAME = 'home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier/'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier')

# This downloads the CIFAR-10 dataset to your Drive
# if it doesn't already exist.
%cd /$FOLDERNAME/datasets/
!bash get_datasets.sh
%cd /$FOLDERNAME

/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier/datasets
/home/ubuntu/Vision-Classifiers/Microsoft-Vision-Classifier


In [3]:

import numpy as np
import matplotlib.pyplot as plt
from data_utils import get_CIFAR10_data


%load_ext autoreload
%autoreload 2

def calculate_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [4]:
import time
import torch
import torchvision
import numpy as np
import torch.nn as nn
from torch import Tensor
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
import progressbar
from progressbar import progressbar


In [5]:
import microsoftvision

In [6]:
class Wrangling:
    def __init__(self):
        self.wrangling = transforms.Compose([
                                           transforms.Resize(224),
                                           transforms.CenterCrop(224),
                                           transforms.ToTensor(),
                                           transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])])

    def __call__(self, x):
        return self.wrangling(x)[[2,1,0],:,:]

In [7]:

data = get_CIFAR10_data()
for k, v in list(data.items()):
    print(f"{k}: {v.shape}")

X_train: (49000, 3, 32, 32)
y_train: (49000,)
X_val: (1000, 3, 32, 32)
y_val: (1000,)
X_test: (1000, 3, 32, 32)
y_test: (1000,)


In [8]:


train_dataset = CIFAR10('./path', download=True, train=True, transform=Wrangling())
test_dataset = CIFAR10('./path', download=True, train=False, transform=Wrangling())

Files already downloaded and verified
Files already downloaded and verified


In [9]:
model = microsoftvision.models.resnet50(pretrained=True)

Loading Microsoft Vision pretrained model
Model already downloaded.


In [10]:
model.eval()
model.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [24]:
def extractor(dataset, model):
    all_features = []
    all_labels = []

    with torch.no_grad():
        for point, labels in progressbar(DataLoader(dataset, batch_size=128, num_workers=8)):
            point = point.cuda()
            labels = labels.cuda()
            features = model(point)

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu(), torch.cat(all_labels).cpu()

In [25]:
train_features, train_labels = extractor(train_dataset, model)
test_features, test_labels = extractor(test_dataset, model)

100% (391 of 391) |######################| Elapsed Time: 0:04:51 Time:  0:04:51
100% (79 of 79) |########################| Elapsed Time: 0:00:58 Time:  0:00:58


In [26]:

print(train_labels.dtype)                                                                                                                                                                                                                              
print(train_features.dtype) 
train_labels = train_labels.to(dtype=torch.float)
train = TensorDataset( train_features, train_labels)

train_loader = DataLoader(train, batch_size= 30,shuffle=True)

torch.int64
torch.float32


In [28]:
test_labels = test_labels.to(dtype=torch.float)
test = TensorDataset( Tensor(test_features), Tensor(test_labels))
test_loader = DataLoader(test, batch_size= 10)

In [29]:
import torch.nn as nn
import torch.nn.functional as F

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc = nn.Linear(2048, 1024)
        self.dropout_layer = nn.Dropout(p=0.5)
        self.out = nn.Linear(1024, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.dropout_layer(self.relu(self.fc(x)))
        x = self.out(x)
        return x
network =  Network().cuda()

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001) 


In [32]:
dataset_sizes = {'train':len(train_labels),'test':len(test_labels)}

In [33]:
device = 'cuda'
def train_model(model, criterion, optimizer, num_epochs=20):
    since = time.time()

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        model.train()  


        running_loss = 0.0
        running_corrects = 0

     
        for inputs, labels in progressbar(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            labels = labels.to(dtype=torch.long)

            optimizer.zero_grad()

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

         
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes['train']
        epoch_acc = running_corrects.double() / dataset_sizes['train']

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    return model

In [34]:
model_ft = train_model(network, criterion,optimizer,num_epochs=5)

  2% (43 of 1667) |                      | Elapsed Time: 0:00:00 ETA:   0:00:06

Epoch 0/4
----------


100% (1667 of 1667) |####################| Elapsed Time: 0:00:05 Time:  0:00:05
  2% (43 of 1667) |                      | Elapsed Time: 0:00:00 ETA:   0:00:05

 Loss: 0.2617 Acc: 0.9121
Epoch 1/4
----------


100% (1667 of 1667) |####################| Elapsed Time: 0:00:05 Time:  0:00:05
  2% (43 of 1667) |                      | Elapsed Time: 0:00:00 ETA:   0:00:06

 Loss: 0.1963 Acc: 0.9319
Epoch 2/4
----------


100% (1667 of 1667) |####################| Elapsed Time: 0:00:05 Time:  0:00:05
  2% (43 of 1667) |                      | Elapsed Time: 0:00:00 ETA:   0:00:06

 Loss: 0.1792 Acc: 0.9371
Epoch 3/4
----------


100% (1667 of 1667) |####################| Elapsed Time: 0:00:05 Time:  0:00:05
  2% (43 of 1667) |                      | Elapsed Time: 0:00:00 ETA:   0:00:06

 Loss: 0.1663 Acc: 0.9403
Epoch 4/4
----------


100% (1667 of 1667) |####################| Elapsed Time: 0:00:05 Time:  0:00:05


 Loss: 0.1538 Acc: 0.9456
Training complete in 0m 29s


In [35]:
correct = 0
total = 0
with torch.no_grad():
    for data in progressbar(test_loader):
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model_ft(images)
        _, predicted = torch.max(outputs.data, 1)
        summ += labels.size(0)
        done += (predicted == labels).sum().item()
        

print('Hybrid Model Performance Accuracy:', 100 * done / summ))

100% (1000 of 1000) |####################| Elapsed Time: 0:00:00 Time:  0:00:00


Accuracy of the network on the test images: 93 %
