# SVM analysis


#### Obtaining the features and distance from decision boundaries from image patches and making the predictions.

In [None]:
import os
import sys  

import numpy as np
import pandas as pd
import seaborn as sns
import torch
import tqdm

from PIL import Image
from matplotlib import gridspec as gridspec
from matplotlib import pyplot as plt
from sklearn import svm
from sklearn.externals import joblib
from sklearn.manifold import TSNE
from sklearn.pipeline import Pipeline
from torch.utils.data import DataLoader

from dataset.normalization import denormalize
from dataset.normalization import normalize_image
from dataset.fungus_dataset import FungusDataset
from pipeline.features import extract_features
from pipeline.fisher_vector_transformer import FisherVectorTransformer  

pipeline = Pipeline(
    steps=[
        ('fisher_vector', FisherVectorTransformer()),
        ('svc', svm.SVC())
    ]
)

pipeline = joblib.load('/home/dawid_rymarczyk/Downloads/best_model_BoW.pkl')

In [None]:
number_to_fungus_dict = {
    0: 'CA',
    1: 'CG',
    2: 'CL',
    3: 'CN',
    4: 'CP',
    5: 'CT',
    6: 'MF',
    7: 'SB',
    8: 'SC',
    9: 'BG',
}

In [None]:
dataset = FungusDataset(
        random_crop_size=250,
        number_of_bg_slices_per_image=2,
        number_of_fg_slices_per_image=8,
        pngs_dir='./fungus_data_png/pngs_50p/',
        masks_dir='./fungus_data_png/masks_2_3_50p/',
        train=False,
)

loader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=True,
    num_workers=1,
    pin_memory=True,
)

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

In [None]:
rows = []
imgs = []
with torch.no_grad():
    feature_matrix = torch.tensor([], dtype=torch.float, device=device)
    labels = torch.tensor([], dtype=torch.long)
    paths = []
    for i, sample in enumerate(tqdm.tqdm(loader)):
        X = sample['image'].to(device)
        y_true = sample['class']
        image_path = sample['image_path']
        X_features = extract_features(X, device, None)
        feature_matrix = torch.cat((feature_matrix, X_features), dim=0)
        labels = torch.cat((labels, y_true), dim=0)
        paths.extend(image_path)
        fm = feature_matrix.cpu().numpy()

    predicted = pipeline.predict(fm)

#### Calculating the accuracy per image file

In [None]:
df = pd.DataFrame(np.asarray([predicted, labels.detach().numpy(), paths]).T, columns=['prediction', 'label', 'name'])

names = np.unique(df['name'])

file_pred = []
file_name = []
file_class = []

for name in names:
    df_name = df[df['name'] == name]
    counts = np.bincount(list(df_name.prediction))
    file_pred.append(np.argmax(counts))
    file_name.append(name)
    counts = np.bincount(list(df_name.label))
    file_class.append(np.argmax(counts))

file_pred = np.asarray(file_pred) 
file_class = np.asarray(file_class)
cls_acc = {}
for i in range(9):
    cls_acc[i] = np.sum(file_pred[file_class == i] == i) / np.sum(file_class == i)
cls_acc, np.sum(file_pred == file_class) / len(file_class)

#### Presenting image patches with respect to their distance to the decision boundaries

In [None]:
def visualize_corr_uncorr(corr, uncorr):
    labels = np.asarray(labelki)
    labels_0 = np.where(labels==corr)
    predicted_0 = np.where(predicted[labels_0] == corr)
    predicted_6 = np.where(predicted[labels_0] == uncorr)

    rows = []
    for idx in range(len(predicted_0[0])):
        row = [
            labels_0[0][predicted_0[0][idx]],
            labels[labels_0[0][predicted_0[0][idx]]],
            corr,
            distances[labels_0[0][predicted_0[0][idx]], corr],
            -distances[labels_0[0][predicted_0[0][idx]], uncorr],
        ]
        rows.append(row)

    for idx in range(len(predicted_6[0])):
        row = [
            labels_0[0][predicted_6[0][idx]],
            labels[labels_0[0][predicted_6[0][idx]]],
            uncorr,
            -distances[labels_0[0][predicted_6[0][idx]], corr],
            distances[labels_0[0][predicted_6[0][idx]], uncorr],
        ]
        rows.append(row)

    df = pd.DataFrame(rows, columns=['idx', 'label', 'predicted', 'dist_0', 'dist_6'])
    sorted_df = df.sort_values(by='dist_0')
    for i in range(0, len(sorted_df), 1):
        plt.figure(figsize=(5, 5))
        img  = denormalize(imgs[int(sorted_df.iloc[int(i)]['idx'])].copy().transpose((1, 2, 0))[:, :, :])
        plt.imshow(img)
        plt.title("Decision function: " + str(np.round(sorted_df.iloc[int(i)]['dist_0'], decimals=3)))
        plt.show()

In [None]:
for i in range(9):
    for j in range(9):
        if i != j:
            print(number_to_fungus_dict[i], number_to_fungus_dict[j])
            visualize_corr_uncorr(i, j)

#### TSNE visualization

In [None]:
fv = pipeline.best_estimator_.steps[:-1][0][1].transform(fm)
fv_embedded = TSNE(n_components=2).fit_transform(fv)
fv_df = pd.DataFrame(fv_embedded, columns=['x', 'y'])
fv_df['labels'] = labels
fv_df['labels'] = fv_df['labels'].apply(lambda x: FungusDataset.NUMBER_TO_FUNGUS[x])
plt.figure(figsize=(15, 15))
sns.scatterplot(data=fv_df, x='x', y='y', hue='labels', legend="full", hue_order=['CA', 'CG', 'CL', 'CP', 'CT', 'CN', 'MF', 'SB', 'SC', 'BG'])
plt.title('TSNE visualization of fungus representation from Fisher Vector')
plt.show()

#### Making the classifier certainty image visualizations

In [None]:
distances = pd.DataFrame(np.abs(distances), columns=['CA', 'CG', 'CL', 'CN', 'CP', 'CT', 'MF', 'SB', 'SC', 'BG'])
distances['labels'] = labels
distances = distances[distances['labels'] != 9]

In [None]:
plt.figure(figsize = (25, 15))
gs1 = gridspec.GridSpec(9, 16)
gs1.update(wspace=0.025, hspace=0.1) # set the spacing between axes. 

fc = ['CA', 'CG', 'CL', 'CN', 'CP', 'CT', 'MF', 'SB', 'SC']

for fc_idx in range(len(fc)):
    fungus_class = fc[fc_idx]
    sorted_dist = distances.sort_values(by=fungus_class)
    idx = 0
    for i, gs in zip(range(0, len(sorted_dist), len(sorted_dist) // 16), range(0, 16)):
        fung = sorted_dist.iloc[i+1]
        fung_idx = fung.name
        ax = plt.subplot(gs1[16 * fc_idx + gs])
        ax.axis('off')
        ax.imshow(denormalize(imgs[fung_idx].copy().transpose(1, 2, 0)))
        idx += 1 
        if idx == 16:
            break
plt.savefig('./results/svm_analysis.png')