In [1]:
from ConceptBottleneck.CUB.models import *
import torch

Test Picture of a Dog and related transformation

In [2]:
from PIL import Image
import torch.utils.model_zoo as model_zoo
from torchvision import transforms
input_image = Image.open('docs/dog.jpg')
preprocess = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)

## Default Model From pytorch

In [3]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
model.eval()

if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
  output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)

Using cache found in C:\Users\cheng/.cache\torch\hub\pytorch_vision_v0.10.0


tensor([ 1.5696e-01,  1.1051e-01, -4.4363e-01, -6.1477e-02, -2.0094e-01,
         1.6820e-01,  5.8117e-01,  6.0334e-02, -7.7651e-02, -1.1192e+00,
        -2.1800e-01, -5.2624e-01, -1.1816e-01,  6.6277e-02,  8.4988e-01,
        -5.3259e-02, -4.7113e-01, -2.5980e-03,  2.7504e-01,  1.7060e-01,
         4.4231e-01, -3.8615e-01,  1.3482e-01, -5.2201e-01, -3.3516e-04,
        -2.3492e-01, -2.3264e-01, -7.1883e-03,  1.6899e-02, -1.2724e-01,
         2.0508e-01, -6.9888e-03,  1.3074e+00, -2.3181e-01,  9.8966e-01,
        -7.7782e-01,  4.8900e-01, -4.2243e-01, -2.6523e-01, -1.5005e-01,
         3.5147e-01, -1.7462e-01,  1.0251e-01,  4.2223e-01, -1.4342e-01,
         1.3578e-02, -4.6477e-01,  3.5966e-01,  1.5194e-01,  2.0217e-01,
        -1.3763e-01,  1.4986e-01,  2.8415e-01, -3.7086e-01,  9.7689e-01,
         7.9543e-01, -7.4032e-01, -7.8852e-02, -9.6364e-02,  1.2193e-01,
        -1.2712e-01,  3.6825e-01, -4.5354e-01, -7.4801e-01, -7.9354e-02,
         2.1688e-01,  2.7018e-01,  1.0089e+00,  4.0

## Default Model From concept bottle neck model from x to c

In [4]:
modelCBMxtoc = ModelXtoC(pretrained = True, freeze = True, num_classes = 200, use_aux = True, n_attributes = 112, expand_dim = 0, three_class = False)
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    modelCBMxtoc.to('cuda')
modelCBMxtoc.eval()
len(modelCBMxtoc(input_batch))

112

## self designed Model

In [3]:
import os
from CUB.template_model import BasicConv2d, InceptionA, InceptionAux, InceptionD, InceptionE, InceptionC, InceptionB, FC
from torch.nn import Parameter
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo

In [4]:
class InceptionF(nn.Module):
    def __init__(self, in_channels):
        super(InceptionF, self).__init__()
        self.convo = BasicConv2d(in_channels, 320, kernel_size=(3, 3))
    def forward(self, x):
        return self.convo(x)

In [5]:


model_urls = {
    # Downloaded inception model (optional)
    'downloaded': 'pretrained/inception_v3_google-1a9a5a14.pth',
    # Inception v3 ported from TensorFlow
    'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
}

def inception_v3(pretrained, freeze, **kwargs):
    """Inception v3 model architecture from
    `"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_.

    .. note::
        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
        N x 3 x 299 x 299, so ensure your images are sized accordingly.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        transform_input (bool): If True, preprocesses the input according to the method with which it
            was trained on ImageNet. Default: *False*
    """
    if pretrained:
        if 'transform_input' not in kwargs:
            kwargs['transform_input'] = True
        model = Inception3(**kwargs)
        if os.path.exists(model_urls.get('downloaded')):
            model.load_partial_state_dict(torch.load(model_urls['downloaded']))
        else:
            model.load_partial_state_dict(model_zoo.load_url(model_urls['inception_v3_google']))
        if freeze:  # only finetune fc layer
            for name, param in model.named_parameters():
                if 'fc' not in name:  # and 'Mixed_7c' not in name:
                    param.requires_grad = False
        return model

    return Inception3(**kwargs)

class Inception3(nn.Module):

    def __init__(self, num_classes, aux_logits=True, transform_input=False, n_attributes=0, bottleneck=False, expand_dim=0, three_class=False, connect_CY=False, conceptFilter = True):
        """
        Args:
        num_classes: number of main task classes
        aux_logits: whether to also output auxiliary logits
        transform input: whether to invert the transformation by ImageNet (should be set to True later on)
        n_attributes: number of attributes to predict
        bottleneck: whether to make X -> A model
        expand_dim: if not 0, add an additional fc layer with expand_dim neurons
        three_class: whether to count not visible as a separate class for predicting attribute
        """
        super(Inception3, self).__init__()
        self.aux_logits = aux_logits
        self.transform_input = transform_input
        self.n_attributes = n_attributes
        self.bottleneck = bottleneck
        self.conceptFilter = conceptFilter
        self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, stride=2)
        self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3)
        self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1)
        self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1)
        self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3)
        self.Mixed_5b = InceptionA(192, pool_features=32)
        self.Mixed_5c = InceptionA(256, pool_features=64)
        self.Mixed_5d = InceptionA(288, pool_features=64)
        self.Mixed_6a = InceptionB(288)
        self.Mixed_6b = InceptionC(768, channels_7x7=128)
        self.Mixed_6c = InceptionC(768, channels_7x7=160)
        self.Mixed_6d = InceptionC(768, channels_7x7=160)
        self.Mixed_6e = InceptionC(768, channels_7x7=192)
        if aux_logits:
            self.AuxLogits = InceptionAux(768, num_classes, n_attributes=self.n_attributes, bottleneck=bottleneck, \
                                                expand_dim=expand_dim, three_class=three_class, connect_CY=connect_CY)
        self.Mixed_7a = InceptionD(768)
        self.Mixed_7b = InceptionE(1280)
        self.Mixed_7c = InceptionE(2048)

        #used to filter the concepts out
        self.ConceptFilter =  torch.nn.ModuleList([InceptionF(2048) for i in range(n_attributes)])
        self.all_fc = nn.ModuleList() #separate fc layer for each prediction task. If main task is involved, it's always the first fc in the list
        self.all_fc2 = nn.ModuleList() #separate fc layer for each prediction task. If main task is involved, it's always the first fc in the list

        if connect_CY:
            self.cy_fc = FC(n_attributes, num_classes, expand_dim)
        else:
            self.cy_fc = None

        if self.n_attributes > 0:
            if not bottleneck: #multitasking
                self.all_fc.append(FC(2048, num_classes, expand_dim))
            for i in range(self.n_attributes):
                self.all_fc.append(FC(2048, 1, expand_dim))
        else:
            self.all_fc.append(FC(2048, num_classes, expand_dim))

        if self.n_attributes > 0:
            if not bottleneck: #multitasking
                self.all_fc2.append(FC(320, num_classes, expand_dim))
            for i in range(self.n_attributes):
                self.all_fc2.append(FC(320, 1, expand_dim))
        else:
            self.all_fc2.append(FC(320, num_classes, expand_dim))

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                import scipy.stats as stats
                stddev = m.stddev if hasattr(m, 'stddev') else 0.1
                X = stats.truncnorm(-2, 2, scale=stddev)
                values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
                values = values.view(m.weight.size())
                with torch.no_grad():
                    m.weight.copy_(values)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        if self.transform_input:
            x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
            x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
            x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
            x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
        # N x 3 x 299 x 299
        x = self.Conv2d_1a_3x3(x)
        # N x 32 x 149 x 149
        x = self.Conv2d_2a_3x3(x)
        # N x 32 x 147 x 147
        x = self.Conv2d_2b_3x3(x)
        # N x 64 x 147 x 147
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        # N x 64 x 73 x 73
        x = self.Conv2d_3b_1x1(x)
        # N x 80 x 73 x 73
        x = self.Conv2d_4a_3x3(x)
        # N x 192 x 71 x 71
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        # N x 192 x 35 x 35
        x = self.Mixed_5b(x)
        # N x 256 x 35 x 35
        x = self.Mixed_5c(x)
        # N x 288 x 35 x 35
        x = self.Mixed_5d(x)
        # N x 288 x 35 x 35
        x = self.Mixed_6a(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6b(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6c(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6d(x)
        # N x 768 x 17 x 17
        x = self.Mixed_6e(x)
        # N x 768 x 17 x 17
        if self.training and self.aux_logits:
            out_aux = self.AuxLogits(x)
        # N x 768 x 17 x 17
        x = self.Mixed_7a(x)
        # N x 1280 x 8 x 8
        x = self.Mixed_7b(x)
        # N x 2048 x 8 x 8
        x = self.Mixed_7c(x)
        # N x 2048 x 8 x 8
        # Adaptive average pooling
        if self.conceptFilter:
            filterOut = self._concept_filters(x)
            # [N x 2048 x 8 x 8] list of size of the attributes
            filterOut = [F.adaptive_avg_pool2d(x, (1, 1)) for x in filterOut]
            filterOut = [F.dropout(x, training=self.training) for x in filterOut]
            filterOut = [x.view(x.size(0), -1) for x in filterOut]
            filterOut = [fc(x) for fc,x in zip(self.all_fc2, filterOut)]
            return filterOut
        x = F.adaptive_avg_pool2d(x, (1, 1))
        # N x 2048 x 1 x 1
        x = F.dropout(x, training=self.training)
        # N x 2048 x 1 x 1
        x = x.view(x.size(0), -1)
        # N x 2048
        out = []
        for fc in self.all_fc:
            out.append(fc(x))
        if self.n_attributes > 0 and not self.bottleneck and self.cy_fc is not None:
            attr_preds = torch.cat(out[1:], dim=1)
            out[0] += self.cy_fc(attr_preds)
        if self.training and self.aux_logits:
            return out, out_aux
        else:
            return out

    def _concept_filters(self, x):
        return [self.ConceptFilter[i](x) for i in range(self.n_attributes)]

    def load_partial_state_dict(self, state_dict):
        """
        If dimensions of the current model doesn't match the pretrained one (esp for fc layer), load whichever weights that match
        """
        own_state = self.state_dict()
        for name, param in state_dict.items():
            if name not in own_state or 'fc' in name:
                continue
            if isinstance(param, Parameter):
                param = param.data
            own_state[name].copy_(param)


In [6]:
modelNew = inception_v3(pretrained = True, freeze = True, num_classes = 200,  n_attributes = 112, expand_dim = 0, three_class = False, bottleneck=True, conceptFilter = True)

In [7]:
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    modelNew.to('cuda')
modelNew.eval()
len(modelNew(input_batch))

112

In [101]:
modelNew(input_batch)

[tensor([[-34.3326]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[1.5820]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-8.7992]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-10.0736]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-22.8786]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[26.2267]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-11.8767]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[3.2959]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-20.7309]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-33.6099]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[4.0887]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-40.7761]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-24.7010]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[1.1098]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-11.4188]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[11.6811]],

In [97]:
modelCBMxtoc(input_batch)

[tensor([[-0.9830]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-1.4407]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[1.4971]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[1.2671]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-0.0709]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[0.9529]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-2.9429]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[0.0055]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[0.8997]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-1.6917]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-0.3093]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-1.2598]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[0.9011]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[-0.0315]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[3.5385]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor([[1.7489]], device='cuda