In [1]:
import os
import glob
from PIL import Image

import pickle
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision as tv
from tqdm import tqdm
import random

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


Bad key "text.kerning_factor" on line 4 in
/home/christoph/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
ROOT_DIR = "/home/christoph/Desktop/ma/data/gtsrb/"
IMG_SIZE = 64
BATCH_SIZE = 128
LATENT_DIMS = 64
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
train_csv = pd.read_csv(ROOT_DIR + "Train.csv")
valid_csv = pd.read_csv(ROOT_DIR + "Test.csv")

train_files = train_csv[["Path", "ClassId"]]
valid_files = valid_csv[["Path", "ClassId"]]

In [4]:
tfms = tv.transforms.Compose([tv.transforms.Resize((IMG_SIZE, IMG_SIZE)), tv.transforms.ToTensor()])

In [21]:
filenames = [os.path.join(dirpath,filename) for dirpath, _, filenames in os.walk(ROOT_DIR + "Train/") for filename in filenames if filename.endswith('.png')]

In [22]:
file_arr = []
for i in tqdm(range(len(filenames)-30000)):
    if i%10 == 0:
        image = Image.open(filenames[i])
        tens = tfms(image)
        conv_filename = filenames[i].split("gtsrb/")[-1]
        class_id = int(train_files[train_files["Path"] == conv_filename]["ClassId"].astype(int))
        tens_id_arr = [tens, class_id]
        file_arr.append(tens_id_arr)
    
random.shuffle(file_arr)

100%|██████████| 9209/9209 [00:03<00:00, 2882.12it/s]


In [23]:
class TSDataset(Dataset):
    def __init__(self, files, transform=None):
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        x = self.files[idx][0]
        label = self.files[idx][1]
            
        return x, label

In [24]:
training_data = TSDataset(file_arr, tfms)

In [25]:
train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=True)

In [26]:
# Classifier architecture

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv3 = nn.Conv2d(20, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 43)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv3(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [27]:
# CVAE architecture

class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)


class UnFlatten(nn.Module):
    def forward(self, input, size=1024):
        return input.view(input.size(0), size, 1, 1)

class CVAE(nn.Module):
    def __init__(self, image_channels=3, h_dim=1024, z_dim=32):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 32, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2),
            nn.ReLU(),
            Flatten()
        )
        
        self.signclass_embedding = nn.Embedding(43, 20)
        
        self.h2mu = nn.Linear(h_dim, z_dim)
        self.h2sigma = nn.Linear(h_dim, z_dim)
        self.z2h = nn.Linear(z_dim + 20, h_dim)
        
        self.decoder = nn.Sequential(
            UnFlatten(),
            nn.ConvTranspose2d(h_dim, 128, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=6, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(32, image_channels, kernel_size=6, stride=2),
            nn.Sigmoid(),
        )
        
        
    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        eps = torch.randn(*mu.size()).to(DEVICE)
        z = mu + std * eps
        return z
    
    def bottleneck(self, h, label):
        mu = self.h2mu(h)
        logvar = self.h2sigma(h)

        z = self.reparameterize(mu, logvar)
        
        return z, mu, logvar
    
    def extract_model(self):
        return self.signclass_embedding, self.z2h, self.decoder
        
    def encode(self, x, label):
        return self.bottleneck(self.encoder(x), label)[0]

    def decode(self, z):
        return self.decoder(self.z2h(z))
    
    def forward(self, x, label):
        h = self.encoder(x)
        z_small, mu, logvar = self.bottleneck(h, label)
        
        signclass = self.signclass_embedding(label.long())
        signclass = signclass.squeeze(dim=1)
        z_small_cat = torch.cat([z_small, signclass], dim=1)
        z = self.z2h(z_small_cat)
        return self.decoder(z), mu, logvar, z_small, z

In [28]:
# Ensemble architecture (combining cvae and classifier)

class Ensemble(nn.Module):
    def __init__(self, embeddings, upscaler, decoder, classifier):
        super(Ensemble, self).__init__()
        self.embeddings = embeddings
        self.upscaler = upscaler
        self.decoder = decoder
        self.classifier = classifier
        
    def forward(self, z, label):
        enc_label = self.embeddings(label.long())
        enc_label = enc_label.squeeze(dim=1)
        x = torch.cat((z, enc_label), dim=1)
        x = self.upscaler(x)
        x = self.decoder(x)
        x = self.classifier(x)
        return x
    
    def get_img(self, z, label):
        enc_label = self.embeddings(label.long())
        x = torch.cat((z, enc_label), dim=1)
        x = self.upscaler(x)
        x = self.decoder(x)
        return x

In [29]:
# Load pre-trained models for classifier and cvae

classifier = Classifier()
classifier.eval()
cvae = CVAE()

classifier.load_state_dict(torch.load("gtsrb_classifier_50.pth"))
cvae.load_state_dict(torch.load("ccvae_32_signs_extract_50.pth"))

classifier.to(DEVICE)
cvae.to(DEVICE);

In [30]:
# Load cvae and classifier into ensemble

embeddings, upscaler, decoder = cvae.extract_model()
ensemble = Ensemble(embeddings, upscaler, decoder, classifier)
ensemble.to(DEVICE);

In [31]:
np_arr = []
for data, label in train_dataloader:
        if torch.cuda.is_available():
            data, label = data.cuda(), label.type(torch.FloatTensor).unsqueeze(dim=1).cuda()

        recon_batch, mu, logvar, _, _ = cvae(data, label)  
        to_np = mu.detach().cpu().numpy()
        np_arr.append(to_np)

In [24]:
stacked_tensors = np.vstack(np_arr)

In [30]:
with open('/home/christoph/Desktop/gtsrb_32dim.npy', 'wb') as f:
    a = np.save(f, stacked_tensors)

In [32]:
with open('/home/christoph/Desktop/gtsrb_32dim.npy', 'rb') as f:
    tensors = np.load(f)

In [33]:
### PCA ###

In [34]:
from sklearn.decomposition import PCA

In [35]:
pca = PCA(n_components=5)
pca.fit(tensors)

ValueError: array must not contain infs or NaNs

In [36]:
### Pipeline ###

In [37]:
with open('/home/christoph/Desktop/gtsrb_pca.pkl', 'rb') as f:
    pca = pickle.load(f)

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [38]:
with open('/home/christoph/Desktop/gtsrb_feats.npy', 'rb') as f:
    embs = np.load(f)

In [39]:
ranges = [-1, -0.5, 0, 0.5, 1]

In [40]:
selected_class = 5
ce_loss = nn.CrossEntropyLoss()

cvae_data, cvae_labels = next(iter(train_dataloader))
cvae_data, cvae_labels = cvae_data.to(DEVICE), cvae_labels.to(DEVICE)
cvae_labels = cvae_labels.unsqueeze(dim=1)
cvae_data.shape, cvae_labels.shape

(torch.Size([128, 3, 64, 64]), torch.Size([128, 1]))

In [41]:
recon_batch, mu, logvar, data, z = cvae(cvae_data, cvae_labels)
mu[0].shape

torch.Size([32])

In [42]:
def add_noise_1d(z_orig, label, ranges, dim):
    noisy_data = []
    probs = []
    for step, value in enumerate(ranges):
        noise = torch.zeros(1, 5).to(DEVICE)
        noise[0, dim] = value
        z_compressed = z_orig.unsqueeze(dim=0) + noise
        
        z_compressed = z_compressed.to("cpu")
        
        z = pca.inverse_transform(z_compressed)
        
        z = torch.Tensor(z).to(DEVICE)

        pred_logits = ensemble(z, label.unsqueeze(dim=0)) 
        loss = ce_loss(pred_logits, label)
        norm_vals = abs(value)
        loss_normed = loss*100/torch.exp(torch.Tensor([norm_vals]).to(DEVICE))
        pred_probs = F.softmax(pred_logits)

        pred = pred_probs.max(1, keepdim=True)[1][0]
        prob = pred_probs.max(1, keepdim=True)[0]

        img = ensemble.get_img(z, label)
        img = img.squeeze(dim=0)
        img = img.detach().cpu().numpy()
        img = img.transpose(1, 2, 0)

        noisy_data.append(img)
        probs.append(prob.item())
            
    return [noisy_data, np.round(probs, 4)]

In [43]:
test = torch.Tensor(embs[0]).to(DEVICE)

In [44]:
add_noise_1d(test, cvae_labels[2], ranges, 1)



[[array([[[0.33204687, 0.30579707, 0.28167385],
          [0.32451499, 0.29457518, 0.27178654],
          [0.32497332, 0.28840703, 0.2652441 ],
          ...,
          [0.31627873, 0.28804445, 0.26761037],
          [0.31408957, 0.2899756 , 0.27095136],
          [0.32452178, 0.30080447, 0.28263074]],
  
         [[0.3270485 , 0.2968242 , 0.27198952],
          [0.31940758, 0.283434  , 0.25620678],
          [0.31680176, 0.27669784, 0.25042242],
          ...,
          [0.31781214, 0.2849842 , 0.26397783],
          [0.30825415, 0.28324622, 0.26294142],
          [0.32277256, 0.3009061 , 0.27854   ]],
  
         [[0.32860142, 0.29492706, 0.26437327],
          [0.3193335 , 0.28201306, 0.2483052 ],
          [0.3138591 , 0.27738357, 0.24601097],
          ...,
          [0.32049182, 0.29056373, 0.26576623],
          [0.30802056, 0.28264305, 0.25519663],
          [0.3169396 , 0.291118  , 0.2653725 ]],
  
         ...,
  
         [[0.21054415, 0.19001812, 0.18106855],
          [0.2

In [45]:
### Widget ###

In [46]:
import ipywidgets as widgets
import numpy as np
from PIL import Image
from ipywidgets import interact

In [47]:
def z_to_img(idx):
    grid_data = add_noise(mu[idx], cvae_labels[idx], np.linspace(-3, 3, 10), 1)
    [row, col], img, prob, loss_normed = grid_data[0]
    img = img.detach().cpu().numpy()
    img = img.transpose(1, 2, 0)
    return img

In [48]:
def display_image(x):
    x_scaled = np.uint8(255 * (x - x.min()) / x.ptp())
    return Image.fromarray(x_scaled)

In [52]:
a = widgets.FloatText()
display(a)

def display_sequence(images, probs):
    def _show(range_step=(0, len(images)-1)):
        a.value = probs[range_step]
        return display_image(images[range_step])
    return interact(_show)

x = torch.Tensor(embs[0]).to(DEVICE)
images, probs = add_noise_1d(x, cvae_labels[7], np.linspace(-3, 3, 100), 1)
    
prob = probs[0]    

display_sequence(images, probs);

FloatText(value=0.0)



interactive(children=(IntSlider(value=49, description='range_step', max=99), Output()), _dom_classes=('widget-…

In [None]:
# dim-0: dunkel-hell
# dim-1: klein-groß
# dim-2: Kontrast
# dim-3: Ausleuchtung
# dim-4: Winkel(?) und Ausleuchtung