In [1]:
from __future__ import print_function
import os
import numpy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import ImageFolder
import torchvision
from PIL import Image
import numpy as np
from torchvision import datasets, transforms
from torch.autograd import Variable
from time import time
from torchsummary import summary


In [2]:
use_gpu = torch.cuda.is_available()
device = torch.cuda.set_device(2)
    
os.makedirs("./mnist",exist_ok=True)

batch_size = 16

train_mnist = datasets.MNIST(root="./mnist", train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
test_mnist = datasets.MNIST(root="./mnist", train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))

train_loader = torch.utils.data.DataLoader(train_mnist,
    batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_mnist,
    batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

dataloders = {'train':train_loader,'test':test_loader}
dataset_sizes = {'train':len(train_mnist),'test':len(test_mnist)}
dataset_sizes

{'train': 60000, 'test': 10000}

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(64*5*5, 256)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(256, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x) 
        x = self.maxpool2(x)
        x = x.view(-1,64*5*5)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x
model = Net().cuda()
optimizer = optim.Adamax(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [4]:
from tqdm import tqdm
def train_model(model, criterion, optimizer, num_epochs=10):
    since = time()
    best_model_wts = model.state_dict()
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode
            
            running_loss = 0.0
            running_corrects = 0.0
            
            for data in tqdm(dataloders[phase]):
                inputs, labels = data
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    print(inputs.shape)
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)
                optimizer.zero_grad()
                outputs = model(inputs)

                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()   
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data)
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = float(running_corrects) / float(dataset_sizes[phase])

            print('{} Loss: {:.10f} Acc: {:.10f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
                state = {'model':model.state_dict(),'optim':optimizer.state_dict()}
#                 torch.save(state,'mnist0127_test.pth')

    time_elapsed = time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best test Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [5]:
# model = train_model(model, criterion, optimizer,num_epochs=10)

In [6]:
data_transforms = transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])

train_path = "image_data/training/"
test_path = "image_data/testing/"

train_datasets = datasets.ImageFolder(train_path,data_transforms)
train_dataloders = torch.utils.data.DataLoader(train_datasets, batch_size=32,
                                             shuffle=True, num_workers=4)
test_datasets = datasets.ImageFolder(test_path,data_transforms)
test_dataloders = torch.utils.data.DataLoader(test_datasets, batch_size=32,
                                             shuffle=True, num_workers=4)

causal_dataloders = {'train':train_dataloders,'test':test_dataloders}
causal_dataset_sizes = {'train':len(train_datasets),'test':len(test_datasets)}



## Intervene Model

In [7]:
new_model = Net()
checkpoint = torch.load("mnist0127_test.pth")
new_model.load_state_dict(checkpoint['model'])
new_model.cuda()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1600, out_features=256, bias=True)
  (relu3): ReLU()
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [8]:
f_mod = list(new_model.children())[:2]
f_mod = nn.Sequential(*f_mod)
f_mod.cuda()
summary(f_mod,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             320
              ReLU-2           [-1, 32, 26, 26]               0
Total params: 320
Trainable params: 320
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.33
Params size (MB): 0.00
Estimated Total Size (MB): 0.33
----------------------------------------------------------------


In [9]:
r_mod = list(new_model.children())[2:6]
r_mod = nn.Sequential(*r_mod)
cls_mod = list(new_model.children())[6:len(list(new_model.children()))]
cls_mod = nn.Sequential(*cls_mod)
for param in r_mod.parameters():
    param.requires_grad = False
for param in cls_mod.parameters():
    param.requires_grad = False

In [10]:

class vgg_auto(nn.Module):
    def __init__(self):
        super(vgg_auto, self).__init__()
        self.f_mod = f_mod
        self.r_mod = r_mod
        self.cls_mod = cls_mod
        self.ae1 = nn.Conv2d(in_channels=32,out_channels=16,kernel_size=(1,1))
        self.ae2 = nn.Conv2d(in_channels=16,out_channels=64,kernel_size=(1,1))
        self.ae3 = nn.Conv2d(in_channels=64,out_channels=32,kernel_size=(1,1))
        self.zero_out = 5
#         self.prob_zero = np.random.uniform(0,1,1)
        
    def forward(self, x):
        x = self.f_mod(x)
        x = self.ae1(x)
        x[:,self.zero_out,:,:] = 0
        x = self.ae2(x)
        x = self.ae3(x)
        x = self.r_mod(x)
        x = x.view(-1,64*5*5)
        x = self.cls_mod(x)
        return x

In [11]:
use_gpu = True
causal_model = vgg_auto()
causal_model.cuda()
criterion_kl = nn.KLDivLoss()
optimizer_c = optim.Adam(filter(lambda p: p.requires_grad,causal_model.parameters()), lr=0.0001)

In [12]:
def train_causal_model(model, c_model, criterion, optimizer, num_epochs=10):
    since = time()
    best_model_wts = model.state_dict()
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            test_loss = 10

            for data in causal_dataloders[phase]:
                inputs,labels = data
                inputs = inputs[:,0,:,:].unsqueeze(1)
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                else:
                    inputs = Variable(inputs)
                optimizer.zero_grad()
                outputs = c_model(inputs)
                outputs = F.log_softmax(outputs,-1)
                _, preds = torch.max(outputs.data, 1)
                
                score = model(inputs)
                score = F.softmax(score,-1)
                score = score.detach()
                
                loss = criterion(outputs, score)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                
                running_loss += loss.item()
            
            epoch_loss = running_loss / causal_dataset_sizes[phase]

            print('{} Loss: {:.10f} '.format(phase, epoch_loss))

            # deep copy the model
            if phase == 'test' and test_loss > epoch_loss:
                test_loss = epoch_loss
                best_model_wts = model.state_dict()
                state = {'model':model.state_dict(),'optim':optimizer.state_dict()}
                torch.save(state,'causal_mnist0127.pth')

    time_elapsed = time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [13]:
train_causal_model(new_model,causal_model,criterion_kl,optimizer_c,num_epochs=10)

Epoch 0/9
----------
train Loss: 0.0003425949 
test Loss: 0.0000304242 
Epoch 1/9
----------
train Loss: 0.0000173706 
test Loss: 0.0000140618 
Epoch 2/9
----------
train Loss: 0.0000079639 
test Loss: 0.0000129163 
Epoch 3/9
----------
train Loss: 0.0000053032 
test Loss: 0.0000076108 
Epoch 4/9
----------
train Loss: 0.0000041351 
test Loss: 0.0000063364 
Epoch 5/9
----------
train Loss: 0.0000035303 
test Loss: 0.0000064362 
Epoch 6/9
----------
train Loss: 0.0000028948 
test Loss: 0.0000058344 
Epoch 7/9
----------
train Loss: 0.0000026937 
test Loss: 0.0000051751 
Epoch 8/9
----------
train Loss: 0.0000023610 
test Loss: 0.0000048938 
Epoch 9/9
----------
train Loss: 0.0000022458 
test Loss: 0.0000052664 
Training complete in 2m 20s


Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1600, out_features=256, bias=True)
  (relu3): ReLU()
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

## Load Test Image

In [24]:
from PIL import Image
test_image = "image_data/sub50_test/3/10728.png"
new_model.eval()

img = Image.open(test_image)
trans = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))])
img = trans(img)
img = img.unsqueeze_(0)
img = img.cuda()

score = new_model(img).view(-1)
print(score)

tensor([-33.7162, -15.0072, -15.1428,  39.4012, -23.2148,  -2.2375,
        -36.6115,  -6.7545,   0.3046,  -6.8058], device='cuda:2')


In [25]:
causal_score = causal_model(img).view(-1)
print(causal_score)

causal_effect = causal_score-score
prob = F.softmax(score,-1)

ece = torch.dot(causal_effect,prob)

tensor([-30.6017, -13.7269, -13.7403,  36.8555, -21.8020,  -2.3955,
        -34.3664,  -6.4334,   0.2144,  -6.8351], device='cuda:2')


In [26]:
ece

tensor(-2.5457, device='cuda:2')