In [1]:
import os
import glob
from PIL import Image

import pickle
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision as tv
from tqdm import tqdm
import random

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


Bad key "text.kerning_factor" on line 4 in
/home/christoph/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
ROOT_DIR = "/home/christoph/Desktop/ma/data/gtsrb/"
IMG_SIZE = 64
BATCH_SIZE = 128
LATENT_DIMS = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
train_csv = pd.read_csv(ROOT_DIR + "Train.csv")
valid_csv = pd.read_csv(ROOT_DIR + "Test.csv")

train_files = train_csv[["Path", "ClassId"]]
valid_files = valid_csv[["Path", "ClassId"]]

In [4]:
tfms = tv.transforms.Compose([tv.transforms.Resize((IMG_SIZE, IMG_SIZE)), tv.transforms.ToTensor()])

In [5]:
filenames = [os.path.join(dirpath,filename) for dirpath, _, filenames in os.walk(ROOT_DIR + "Train/") for filename in filenames if filename.endswith('.png')]

In [6]:
file_arr = []
for i in tqdm(range(len(filenames)-30000)):
    if i%10 == 0:
        image = Image.open(filenames[i])
        tens = tfms(image)
        conv_filename = filenames[i].split("gtsrb/")[-1]
        class_id = int(train_files[train_files["Path"] == conv_filename]["ClassId"].astype(int))
        tens_id_arr = [tens, class_id]
        file_arr.append(tens_id_arr)
    
random.shuffle(file_arr)

100%|██████████| 9209/9209 [00:02<00:00, 3373.15it/s]


In [7]:
class TSDataset(Dataset):
    def __init__(self, files, transform=None):
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        x = self.files[idx][0]
        label = self.files[idx][1]
            
        return x, label

In [8]:
training_data = TSDataset(file_arr, tfms)

In [9]:
train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=True)

In [10]:
nclasses = 43

In [11]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        # CNN layers
        self.conv1 = nn.Conv2d(3, 100, kernel_size=5)
        self.bn1 = nn.BatchNorm2d(100)
        self.conv2 = nn.Conv2d(100, 150, kernel_size=3)
        self.bn2 = nn.BatchNorm2d(150)
        self.conv3 = nn.Conv2d(150, 250, kernel_size=3)
        self.bn3 = nn.BatchNorm2d(250)
        self.conv_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(250*2*2, 350)
        self.fc2 = nn.Linear(350, nclasses)

        self.localization = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
            )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            nn.Linear(10 * 4 * 4, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
            )
   
        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))


    # Spatial transformer network forward function
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 10 * 4 * 4)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)
        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x

    def forward(self, x):
        # transform the input
        x = self.stn(x)

        # Perform forward pass
        x = self.bn1(F.max_pool2d(F.leaky_relu(self.conv1(x)),2))
        x = self.conv_drop(x)
        x = self.bn2(F.max_pool2d(F.leaky_relu(self.conv2(x)),2))
        x = self.conv_drop(x)
        x = self.bn3(F.max_pool2d(F.leaky_relu(self.conv3(x)),2))
        x = self.conv_drop(x)
        x = x.view(-1, 250*2*2)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [12]:
# CVAE architecture

class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)


class UnFlatten(nn.Module):
    def forward(self, input, size=1024):
        return input.view(input.size(0), size, 1, 1)

class CVAE(nn.Module):
    def __init__(self, image_channels=3, h_dim=1024, z_dim=32):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 32, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2),
            nn.ReLU(),
            Flatten()
        )
        
        self.signclass_embedding = nn.Embedding(43, 20)
        
        self.h2mu = nn.Linear(h_dim, z_dim)
        self.h2sigma = nn.Linear(h_dim, z_dim)
        self.z2h = nn.Linear(z_dim + 20, h_dim)
        
        self.decoder = nn.Sequential(
            UnFlatten(),
            nn.ConvTranspose2d(h_dim, 128, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=6, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(32, image_channels, kernel_size=6, stride=2),
            nn.Sigmoid(),
        )
        
        
    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        eps = torch.randn(*mu.size()).to(DEVICE)
        z = mu + std * eps
        return z
    
    def bottleneck(self, h, label):
        mu = self.h2mu(h)
        logvar = self.h2sigma(h)

        z = self.reparameterize(mu, logvar)
        
        return z, mu, logvar
    
    def extract_model(self):
        return self.signclass_embedding, self.z2h, self.decoder
        
    def encode(self, x, label):
        return self.bottleneck(self.encoder(x), label)[0]

    def decode(self, z):
        return self.decoder(self.z2h(z))
    
    def forward(self, x, label):
        h = self.encoder(x)
        z_small, mu, logvar = self.bottleneck(h, label)
        
        signclass = self.signclass_embedding(label.long())
        signclass = signclass.squeeze(dim=1)
        z_small_cat = torch.cat([z_small, signclass], dim=1)
        z = self.z2h(z_small_cat)
        return self.decoder(z), mu, logvar, z_small, z

In [13]:
# Ensemble architecture (combining cvae and classifier)

class Ensemble(nn.Module):
    def __init__(self, embeddings, upscaler, decoder, classifier):
        super(Ensemble, self).__init__()
        self.embeddings = embeddings
        self.upscaler = upscaler
        self.decoder = decoder
        self.classifier = classifier
        self.downscaler = nn.MaxPool2d(2, stride=2)
        
    def forward(self, z, label):
        enc_label = self.embeddings(label.long())
        enc_label = enc_label.squeeze(dim=1)
        x = torch.cat((z, enc_label), dim=1)
        x = self.upscaler(x)
        x = self.decoder(x)
        x = self.downscaler(x)
        x = self.classifier(x)
        return x
    
    def get_img(self, z, label):
        enc_label = self.embeddings(label.long())
        x = torch.cat((z, enc_label), dim=1)
        x = self.upscaler(x)
        x = self.decoder(x)
        return x

In [14]:
# Load pre-trained models for classifier and cvae

classifier = Classifier()
classifier.eval()
cvae = CVAE()

classifier.load_state_dict(torch.load("gtsrb_benchmark.pth"))
cvae.load_state_dict(torch.load("ccvae_32_signs_extract_50.pth"))

classifier.to(DEVICE)
cvae.to(DEVICE);

In [15]:
# Load cvae and classifier into ensemble

embeddings, upscaler, decoder = cvae.extract_model()
ensemble = Ensemble(embeddings, upscaler, decoder, classifier)
ensemble.to(DEVICE);

In [89]:
np_arr = []
for data, label in train_dataloader:
        if torch.cuda.is_available():
            data, label = data.cuda(), label.type(torch.FloatTensor).unsqueeze(dim=1).cuda()

        recon_batch, mu, logvar, _, _ = cvae(data, label)  
        to_np = mu.detach().cpu().numpy()
        np_arr.append(to_np)

In [69]:
stacked_tensors = np.vstack(np_arr)

In [30]:
with open('/home/christoph/Desktop/gtsrb_32dimv2.npy', 'wb') as f:
    a = np.save(f, stacked_tensors)

In [16]:
with open('/home/christoph/Desktop/gtsrb_32dim.npy', 'rb') as f:
    tensors = np.load(f)

In [17]:
### PCA ###

In [18]:
from sklearn.decomposition import PCA

In [19]:
### Pipeline ###

In [21]:
with open('/home/christoph/Desktop/gtsrb_pca.pkl', 'rb') as f:
    pca = pickle.load(f);

In [22]:
with open('/home/christoph/Desktop/gtsrb_feats.npy', 'rb') as f:
    embs = np.load(f)

In [23]:
ranges = [-1, -0.5, 0, 0.5, 1]

In [24]:
selected_class = 5
ce_loss = nn.CrossEntropyLoss()

cvae_data, cvae_labels = next(iter(train_dataloader))
cvae_data, cvae_labels = cvae_data.to(DEVICE), cvae_labels.to(DEVICE)
cvae_labels = cvae_labels.unsqueeze(dim=1)
cvae_data.shape, cvae_labels.shape

(torch.Size([128, 3, 64, 64]), torch.Size([128, 1]))

In [25]:
recon_batch, mu, logvar, data, z = cvae(cvae_data, cvae_labels)
mu[0].shape

torch.Size([32])

In [None]:
yilog(yihat)

In [26]:
def add_noise_1d(z_orig, label, ranges, dim):
    noisy_data = []
    probs = []
    for step, value in enumerate(ranges):
        noise = torch.zeros(1, 5).to(DEVICE)
        noise[0, dim] = value
        z_compressed = z_orig.unsqueeze(dim=0) + noise
        
        z_compressed = z_compressed.to("cpu")
        
        z = pca.inverse_transform(z_compressed)
        
        z = torch.Tensor(z).to(DEVICE)

        pred_logits = ensemble(z, label.unsqueeze(dim=0)) 
        loss = ce_loss(pred_logits, label)
        norm_vals = abs(value)
        loss_normed = loss*100/torch.exp(torch.Tensor([norm_vals]).to(DEVICE))
        pred_probs = F.softmax(pred_logits)

        pred = pred_probs.max(1, keepdim=True)[1][0]
        prob = pred_probs.max(1, keepdim=True)[0]

        img = ensemble.get_img(z, label)
        img = img.squeeze(dim=0)
        img = img.detach().cpu().numpy()
        img = img.transpose(1, 2, 0)

        noisy_data.append(img)
        probs.append(prob.item())
            
    return [noisy_data, np.round(probs, 4)]

In [27]:
test = torch.Tensor(embs[0]).to(DEVICE)

In [104]:
### Widget ###

In [29]:
import ipywidgets as widgets
import numpy as np
from PIL import Image
from ipywidgets import interact

In [30]:
def z_to_img(idx):
    grid_data = add_noise(mu[idx], cvae_labels[idx], np.linspace(-3, 3, 10), 1)
    [row, col], img, prob, loss_normed = grid_data[0]
    img = img.detach().cpu().numpy()
    img = img.transpose(1, 2, 0)
    return img

In [31]:
def display_image(x):
    x_scaled = np.uint8(255 * (x - x.min()) / x.ptp())
    return Image.fromarray(x_scaled)

In [89]:
a = widgets.FloatText()
display(a)

def display_sequence(images, probs):
    def _show(range_step=(0, len(images)-1)):
        a.value = probs[range_step]
        return display_image(images[range_step])
    return interact(_show)

x = torch.Tensor(embs[40]).to(DEVICE)
images, probs = add_noise_1d(x, cvae_labels[100], np.linspace(-5, 5, 100), 1)
    
# prob = probs[0]    

display_sequence(images, probs);

FloatText(value=0.0)



interactive(children=(IntSlider(value=49, description='range_step', max=99), Output()), _dom_classes=('widget-…

In [126]:
# dim-0: nächtlich-grell
# dim-1: klein-groß und Winkel
# dim-2: Kontrast
# dim-3: Ausleuchtung
# dim-4: Winkel(?) und Ausleuchtung