# Imports

In [None]:
import pymongo
import os
import numpy
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn_image as sns_img
from sklearn.manifold import TSNE
from matplotlib.colors import BoundaryNorm
from random import choice
from mpl_toolkits.axes_grid1 import make_axes_locatable


In [None]:
SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 20

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Connect to database

In [None]:
try:
    database_url = "mongodb://localhost:27017/"
    client = pymongo.MongoClient(database_url)
    client.server_info()
except pymongo.errors.ServerSelectionTimeoutError as err:
    raise ConnectionError

database = client["MNIST"]
std_dataset = database["dataset"]
meta_collection = database["metamorphical"]
meta_statuses = meta_collection.distinct("status")
def get_unique_field(status_field):
    return next(iter(meta_collection.find_one({"status": status_field}, {"_id": 0, "label":0, "size": 0, "image":0, "status":0})))

# Get a picture

In [None]:
def get_picture(query = {}):
    pipeline = [
    {
        "$sample": {
            "size": 1  # Get 1 random document
        }
    }
    ]
    pick = std_dataset.aggregate(pipeline)
    pick = pick.to_list()[0]
    image = numpy.reshape(pick["image"], (28, 28))
    return image, pick["label"]

def get_meta_picture(query = {}):
    pipeline = [
    {
        "$match": query
    },
    {
        "$sample": {
            "size": 1  # Get 1 random document
        }
    }
    ]
    pick = meta_collection.aggregate(pipeline)
    pick = pick.to_list()[0]
    image = numpy.reshape(pick["image"], (28, 28))
    pick["image"] = image
    return pick


# A look into the pictures of both datasets

In [None]:

fig, axes = plt.subplots(3, 3, figsize=(12, 12), sharex=True, sharey=True)
for i in range(0, 3):
    for j in range(0, 3):
        a_pic, a_label = get_picture()
        title = (f"Label: {a_label}")
        axes[i][j].imshow(a_pic, cmap=sns.color_palette("Blues", as_cmap=True))
        axes[i][j].set_title(title)
        axes[i][j].set_xticks([]),axes[i][j].set_yticks([])
plt.show()


In [None]:

query = {}
fig, axes = plt.subplots(3, 3, figsize=(12, 12), sharex=True, sharey=True)
for i in range(0, 3):
    for j in range(0, 3):
        meta_pic = get_meta_picture(query)
        title = (f"Label: {meta_pic["label"]} | Status: {meta_pic["status"]}")
        axes[i][j].imshow(meta_pic["image"], cmap=sns.color_palette("Blues", as_cmap=True))
        axes[i][j].set_title(title)
        axes[i][j].set_xticks([]),axes[i][j].set_yticks([])
plt.show()

# Standard Dataset
## Distrubution

In [None]:
std_results = database["dataset_results"]
palette_color = sns.color_palette('bright') 

data_points = std_dataset.find({})
num_labels = 10
label_bins = [0]*num_labels

for item in data_points:
    label_bins[item["label"]] += 1


data_points = std_results.find({})
num_labels = 10
pred_bins = [0]*num_labels

for item in data_points:
    pred_bins[item["predicted"]] += 1


fig, axes = plt.subplots(1, 2, figsize=(14,7))
fig.suptitle("Training set distribution")
axes[0].pie(label_bins, labels=range(0, 10), colors=palette_color, autopct="%.0f%%")
axes[1].pie(pred_bins, labels=range(0, 10), colors=palette_color, autopct="%.0f%%")
axes[0].title.set_text("True")
axes[1].title.set_text("Predicted")
plt.show()


In [None]:
def get_predicted_results(query = {}):
    predicted_comp_points = std_results.aggregate(
        [
            {
                "$lookup":
                {
                    "from": "dataset",
                    "localField": "id",
                    "foreignField": "_id",
                    "as": "matches"
                }
            },
            {
                "$project":
                {
                    "_id": 0,
                    "predicted": "$predicted",
                    "label": "$matches.label",
                    "image": "$matches.image"
                }
            },
            {
                "$match": query
            },
            {
                "$unwind": "$image"
            },
            {
                "$unwind": "$label"
            },
            {
                "$limit": 10000
            }
        ]      
    )
    return list(predicted_comp_points)

## Clustering

In [None]:
from matplotlib.colors import ListedColormap
my_cmap = ListedColormap(palette_color.as_hex())
fig, axes = plt.subplots(1, 3, figsize=(21,7))
x = []
y = []
z = []
b = []
%matplotlib inline

predicted_comp_points = get_predicted_results()
for item in predicted_comp_points:
    x.append(item["image"])
    y.append(item["label"])
    z.append(item["predicted"])
    b.append(int(not item["predicted"] == item["label"]))

tsne = TSNE(n_components=2, random_state=42)
np_x = numpy.array(x)
x_tsne = tsne.fit_transform(np_x)
x_vals, y_vals = zip(*x_tsne)

scatter_label= axes[0].scatter(x_vals, y_vals, c=y, cmap=my_cmap, s=10)
scatter_pred = axes[1].scatter(x_vals, y_vals, c=z, cmap=my_cmap, s=10)
scatter_bool = axes[2].scatter(x_vals, y_vals, c=b, cmap="bwr", s=10)
axes[2].legend(["Correct", "Incorrect"])

divider0 = make_axes_locatable(axes[0])
cax0 = divider0.append_axes("left", size="5%", pad=0.05)
divider1 = make_axes_locatable(axes[1])
cax1 = divider1.append_axes("left", size="0%", pad=0.05)
divider2 = make_axes_locatable(axes[2])
cax2 = divider2.append_axes("right", size="0%", pad=0.05)


cbar0 = fig.colorbar(scatter_label, cax=cax0, location="left")
cbar1 = fig.colorbar(scatter_pred, cax=cax1)
cbar2 = fig.colorbar(scatter_bool, cax=cax2)
cax0.yaxis.tick_left()
fig.delaxes(cax1)
fig.delaxes(cax2)

axes[0].set_xticks([]),axes[0].set_yticks([])
axes[1].set_xticks([]),axes[1].set_yticks([])
axes[2].set_xticks([]),axes[2].set_yticks([])
axes[0].set_title("True")
axes[1].set_title("Predicted")
axes[2].set_title("Difference")
fig.suptitle("Testing set groupings")
plt.tight_layout()
plt.show()

## Heatmap of correctly and not corretcly predicted labels

In [None]:
queires = [{"label": x} for x in range(0, 9+1)]
for query in queires:    
    true_heatmap = [0]*28*28
    false_heatmap = [0]*28*28
    predicted_comp_points = get_predicted_results(query)
    for item in predicted_comp_points:
        correct = (item["predicted"] == item["label"])
        norm_image = numpy.array(item["image"]) / max(item["image"])
        for index, pixel in enumerate(norm_image):
            if pixel >= 0.5:
                if correct:
                    true_heatmap[index] += 1
                else:
                    false_heatmap[index]+= 1

    fig, axes = plt.subplots(1, 2, figsize=(12,6))
    image = numpy.reshape(true_heatmap, (28, 28))
    axes[0].imshow(image)
    image = numpy.reshape(false_heatmap, (28, 28))
    axes[1].imshow(image)



## Heatmap of the falsly predicted ones

In [None]:
queires = [{"predicted": x} for x in range(0, 9+1)]
for query in queires:    
    true_heatmap = [0]*28*28
    false_heatmap = [0]*28*28
    predicted_comp_points = get_predicted_results(query)
    for item in predicted_comp_points:
        correct = (item["predicted"] == item["label"])
        norm_image = numpy.array(item["image"]) / max(item["image"])
        for index, pixel in enumerate(norm_image):
            if pixel >= 0.5:
                if correct:
                    true_heatmap[index] += 1
                else:
                    false_heatmap[index]+= 1

    fig, ax = plt.subplots(1, 1, figsize=(12,6))
    image = numpy.reshape(false_heatmap, (28, 28))
    ax.imshow(image)



# Methamorphic
## Distubution

In [None]:
meta_results = database["metamorphical_results"]

data_points = meta_collection.find({},{"label": 1})
num_labels = 10
label_bins = [0]*num_labels

for item in data_points:
    label_bins[item["label"]] += 1


data_points = meta_results.find({})
num_labels = 10
pred_bins = [0]*num_labels

for item in data_points:
    pred_bins[item["predicted"]] += 1


fig, axes = plt.subplots(1, 2, figsize=(14,7))
fig.suptitle("Metamorphic set distribution")
axes[0].pie(label_bins, labels=range(0, 10), colors=palette_color, autopct="%.0f%%")
axes[1].pie(pred_bins, labels=range(0, 10), colors=palette_color, autopct="%.0f%%")
axes[0].title.set_text("True")
axes[1].title.set_text("Predicted")
plt.show()

In [None]:
def get_predicted_meta_results(query = {}):
    predicted_comp_points_mean = meta_results.aggregate(
                        [
                            {
                                "$lookup":
                                {
                                    "from": "metamorphical",
                                    "localField": "id",
                                    "foreignField": "_id",
                                    "as": "matches"
                                }
                            },
                            {
                                "$project":
                                {
                                    "_id": 0,
                                    "predicted": "$predicted",
                                    "label": "$matches.label",
                                    "image": "$matches.image",
                                    "status": "$matches.status"
                                }
                            },
                            {
                                "$match": query
                            },
                            {
                                "$unwind": "$image"
                            },
                            {
                                "$unwind": "$label"
                            },
                            {
                                "$limit": 10000
                            }
                        ]      
    )
    return list(predicted_comp_points_mean)

## Clustering

In [None]:
x = []
y = []
z = []
b = []
#query = {"status": "rotation"}
queires = [{}]+[{"status": field} for field in meta_statuses]
for query in queires:
    predicted_comp_points_mean = get_predicted_meta_results(query)
    for item in predicted_comp_points_mean:
        x.append(item["image"])
        y.append(item["label"])
        z.append(item["predicted"])
        b.append(not item["predicted"] == item["label"])


    tsne = TSNE(n_components=2, random_state=42)
    np_x = numpy.array(x)
    x_tsne = tsne.fit_transform(np_x)
    x_vals, y_vals = zip(*x_tsne)
    fig, axes = plt.subplots(1, 3, figsize=(21,7))

    scatter_label= axes[0].scatter(x_vals, y_vals, c=y, cmap=my_cmap, s=10)
    scatter_pred = axes[1].scatter(x_vals, y_vals, c=z, cmap=my_cmap, s=10)
    scatter_bool = axes[2].scatter(x_vals, y_vals, c=b, cmap="bwr", s=10)

    divider0 = make_axes_locatable(axes[0])
    cax0 = divider0.append_axes("left", size="5%", pad=0.05)
    divider1 = make_axes_locatable(axes[1])
    cax1 = divider1.append_axes("left", size="5%", pad=0.05)
    divider2 = make_axes_locatable(axes[2])
    cax2 = divider2.append_axes("right", size="5%", pad=0.05)



    cbar0 = fig.colorbar(scatter_label, label='Color Value', cax=cax0, location="left")
    cbar1 = fig.colorbar(scatter_pred, label='Color Value', cax=cax1)
    cbar2 = fig.colorbar(scatter_bool, label='Truth Value', cax=cax2)
    cax0.yaxis.tick_left()

    fig.delaxes(cax1)
    axes[0].set_title("True")
    axes[1].set_title("Predicted")
    axes[2].set_title("Difference")
    fig.suptitle(f"Metamorphic set groupings"+(f" | {query}" if query != {} else ""))
    plt.tight_layout()
    axes[0].set_xticks([]),axes[0].set_yticks([])
    axes[1].set_xticks([]),axes[1].set_yticks([])
    axes[2].set_xticks([]),axes[2].set_yticks([])
    plt.show()
    plt.savefig(f"figures/meta_"+f"{next(iter(query.values()))}" if query != {} else "all"+".eps")

## Heatmap of the labels who were corretcly and not corretcly predicted

In [None]:

queires = [{"label": x} for x in range(0, 9+1)]
for query in queires:    
    true_heatmap = [0]*28*28
    false_heatmap = [0]*28*28
    predicted_comp_points_mean = get_predicted_meta_results(query)
    for item in predicted_comp_points_mean:
        correct = (item["predicted"] == item["label"])
        norm_image = numpy.array(item["image"]) / max(item["image"])
        for index, pixel in enumerate(norm_image):
            if pixel >= 0.5:
                if correct:
                    true_heatmap[index] += 1
                else:
                    false_heatmap[index]+= 1

    fig, axes = plt.subplots(1, 2, figsize=(12,6))
    image = numpy.reshape(true_heatmap, (28, 28))
    axes[0].imshow(image)
    image = numpy.reshape(false_heatmap, (28, 28))
    axes[1].imshow(image)




## Heatmap of the predicted labels who aren't correct.

In [None]:

queires = [{"predicted": x} for x in range(0, 9+1)]
for query in queires:
    true_heatmap = [0]*28*28
    false_heatmap = [0]*28*28
    predicted_comp_points_mean = get_predicted_meta_results(query)
    for item in predicted_comp_points_mean:
        correct = (item["predicted"] == item["label"])
        norm_image = numpy.array(item["image"]) / max(item["image"])
        for index, pixel in enumerate(norm_image):
            if pixel >= 0.5:
                if correct:
                    true_heatmap[index] += 1
                else:
                    false_heatmap[index]+= 1

    fig, ax = plt.subplots(1, 1, figsize=(6,6))
    image = numpy.reshape(false_heatmap, (28, 28))
    ax.imshow(image)




# Won't use

In [None]:
from sklearn import tree
import graphviz
import numpy as np
from dtreeviz import model

np_x = np.array(x)[:1000]
np_y = np.array(z)[:1000]

clf = tree.DecisionTreeClassifier(max_depth=4)
clf = clf.fit(np_x, np_y)
viz = model(clf, X_train=np_x, y_train=np_y,
            feature_names=[f"[{x%28}, {int(x/28)}]" for x in range(0, 784)],
            target_name="label")


%config InlineBackend.figure_format = 'retina' # Make visualizations look good
#%config InlineBackend.figure_format = 'svg' 
%matplotlib inline
plt.Figure(figsize=(15, 15))
viz.view(fancy=False, orientation="TD", fontname="DejaVu Sans")

In [None]:
from lime.lime_image import LimeImageExplainer
from lime.wrappers.scikit_image import SegmentationAlgorithm

explainer = LimeImageExplainer()
segmenter = SegmentationAlgorithm("quickshift")

In [None]:
explainer.explain_instance(np_x, clf.predict_proba, top_labels=10, hide_color=0, num_samples=10000, segmentation_fn=segmenter)