In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Numpy arrays representing the CIFAR-10 data from the project were saved with varying amount
# of data dropped from each dataset and without embeddings
no_embeddings = np.load('../data/no_embedding.npz')
np_no_embedding_drop_0 = no_embeddings['no_embedding_no_transform_drop_0']
np_no_embedding_drop_25 = no_embeddings['no_embedding_no_transform_drop_25']
np_no_embedding_drop_50 = no_embeddings['no_embedding_no_transform_drop_50']
np_no_embedding_drop_75 = no_embeddings['no_embedding_no_transform_drop_75']

# Numpy arrays representing the CIFAR-10 data from the project were saved with transforms
# applied and without any data dropped
transforms = np.load('../data/transforms.npz')
np_grayscale_drop_0 = transforms['no_embedding_grayscale_drop_0']
np_little_blur_drop_0 = transforms['no_embedding_little_blur_drop_0']
np_big_blur_drop_0 = transforms['no_embedding_big_blur_drop_0']
np_rotate_drop_0 = transforms['no_embedding_rotate_180_drop_0']

In [None]:
# Convert to dataframes
df_no_embedding_drop_0 = pd.DataFrame(np_no_embedding_drop_0)
df_no_embedding_drop_25 = pd.DataFrame(np_no_embedding_drop_25)
df_no_embedding_drop_50 = pd.DataFrame(np_no_embedding_drop_50)
df_no_embedding_drop_75 = pd.DataFrame(np_no_embedding_drop_75)

df_grayscale_drop_0 = pd.DataFrame(np_grayscale_drop_0)
df_little_blur_drop_0 = pd.DataFrame(np_little_blur_drop_0)
df_big_blur_drop_0 = pd.DataFrame(np_big_blur_drop_0)
df_rotate_drop_0 = pd.DataFrame(np_rotate_drop_0)

In [None]:
# Six pixel values selected for viewing - two from each channel
# Pixels from R - 23, 685
# Pixels from G - 1098, 1765
# Pixels from B - 2210, 2906
pixels = [23, 685, 1098, 1765, 2210, 2906]

In [None]:
# Function to plot histogram of the values of a pixel over a dataset
def create_histplot(x, y, ax, df_data, column, title=None, ylabel=None):
    sns.histplot(data=df_data, x=column, ax = ax[x, y], binwidth=0.02, stat='probability')
    ax[x,y].set_xlim(0, 1)
    ax[x,y].set_ylim(0, 0.07)
    if x < 1:
        ax[x,y].set_title(title)
    if y < 1:
        ax[x,y].set_ylabel(ylabel)
    else:
        ax[x,y].set_ylabel('')
    ax[x,y].set_xlabel('')

In [None]:
# Plot histograms for the six pixel values by transform group
fig, ax = plt.subplots(6, 5, figsize=(20, 24))

i = 0
for pixel in pixels:
    create_histplot(i, 0, ax, df_no_embedding_drop_0, pixel, 'No embedding', 'Feature ' + str(pixel))
    create_histplot(i, 1, ax, df_grayscale_drop_0, pixel, 'Grayscale', 'Feature ' + str(pixel))
    create_histplot(i, 2, ax, df_little_blur_drop_0, pixel, 'Litle Blur', 'Feature ' + str(pixel))
    create_histplot(i, 3, ax, df_big_blur_drop_0, pixel, 'Big Blur', 'Feature ' + str(pixel))
    create_histplot(i, 4, ax, df_rotate_drop_0, pixel, 'Rotate 180', 'Feature ' + str(pixel))
    i+=1

for j in range(5):
    ax[5, j].set_xlabel('Pixel Value')

fig.savefig('../output/pixels_by_transform.png')

In [None]:
# Plot histograms of the pixel values by drop group
fig, ax = plt.subplots(6, 4, figsize=(16, 24))

i = 0
for pixel in pixels:
    create_histplot(i, 0, ax, df_no_embedding_drop_0, pixel, 'No embedding - drop 0%', 'Feature ' + str(pixel))
    create_histplot(i, 1, ax, df_no_embedding_drop_25, pixel, 'No embedding - drop 25%', 'Feature ' + str(pixel))
    create_histplot(i, 2, ax, df_no_embedding_drop_50, pixel, 'No embedding - drop 50%', 'Feature ' + str(pixel))
    create_histplot(i, 3, ax, df_no_embedding_drop_75, pixel, 'No embedding - drop 75%', 'Feature ' + str(pixel))
    i+=1


fig.savefig('../output/pixels_by_drop_group.png')