# 1 Clean and Noisy Bases

## 1.1 Imports & Constants

In [1]:
import itertools
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

from PIL import Image
from sklearn import feature_extraction

from library import generator

%config InlineBackend.figure_format='retina'

In [2]:
BARABARA = 'images/portillo/barbara.png'

COLOR_SCALE = {
    'vmin': 0,
    'vmax': 255
}

## 1.2 Utilities

In [3]:
def read_image(address):
    image = Image.open(address)
    # print(image.format, image.size, image.mode)
    pixels = np.array(image)
    pixels = pixels.astype(np.float64)
    return pixels

def to_grayscale_patches(pixels, patch_size):
    shaped_patches = feature_extraction.image.extract_patches_2d(pixels, patch_size)
    patches = np.reshape(shaped_patches, (len(shaped_patches), -1)).T
    return patches

## 1.3 Figure Components

In [4]:
barbara = read_image(BARABARA)
noisy_barbara = barbara + np.random.normal(scale=20, size=barbara.shape)

In [5]:
### CLEAN BARBARA

fig = plt.figure(figsize=(64, 64))
plt.imshow(barbara, **COLOR_SCALE, cmap='gray', interpolation='nearest')
plt.axis('off')
fig.savefig('01-barbara.pdf', bbox_inches='tight')
plt.close()

In [6]:
### NOISY BARBARA

fig = plt.figure(figsize=(64, 64))
plt.imshow(noisy_barbara, **COLOR_SCALE, cmap='gray', interpolation='nearest')
plt.axis('off')
fig.savefig('01-noisy-barbara.pdf', bbox_inches='tight')
plt.close()

In [7]:
## CLEAN BARABARA BASES

ITERATIONS = 100
PATCH_SIZE = 16

patches = to_grayscale_patches(barbara, (PATCH_SIZE, PATCH_SIZE))
updates = generator.get_dictionary_learning_iterates(patches)
clean_dictionary = next(itertools.islice(updates, ITERATIONS, None))
clean_dictionary = clean_dictionary.T

sparse_encoding = clean_dictionary.T @ patches

In [8]:
norms = np.abs(sparse_encoding)
norms = np.sum(norms, axis=1)
all_indices = list(range(len(norms)))
all_indices.sort(key=lambda row: norms[row], reverse=True)

sum_signs = np.sum(sparse_encoding, axis=1)
sum_signs = np.sign(sum_signs)

ROWS, COLS = 3, 4

fig, axs = plt.subplots(ROWS, COLS, figsize=(64, 48))
plt.subplots_adjust(left=None, right=None, bottom=None, top=None, wspace=0.05, hspace=0.05)
for index, ax in zip(all_indices, axs.flat):
    base = clean_dictionary[:,index] * sum_signs[index]
    base = base - base.min()
    base = base / base.max() * 255
    base = np.reshape(base, (PATCH_SIZE, PATCH_SIZE))
    
    ax.imshow(base, **COLOR_SCALE, cmap='gray')
    ax.axis('off')
fig.savefig('01-clean-bases.pdf', bbox_inches='tight')
plt.close()

In [9]:
## NOISY BARABARA BASES

ITERATIONS = 100
PATCH_SIZE = 16

patches = to_grayscale_patches(noisy_barbara, (PATCH_SIZE, PATCH_SIZE))
updates = generator.get_dictionary_learning_iterates(patches)
noisy_dictionary = next(itertools.islice(updates, ITERATIONS, None))
noisy_dictionary = noisy_dictionary.T

sparse_encoding = noisy_dictionary.T @ patches

In [10]:
norms = np.abs(sparse_encoding)
norms = np.sum(norms, axis=1)
all_indices = list(range(len(norms)))
all_indices.sort(key=lambda row: norms[row], reverse=True)

sum_signs = np.sum(sparse_encoding, axis=1)
sum_signs = np.sign(sum_signs)

BASES = 12
ROWS, COLS = 3, 4

fig, axs = plt.subplots(ROWS, COLS, figsize=(64, 48))
plt.subplots_adjust(left=None, right=None, bottom=None, top=None, wspace=0.05, hspace=0.05)
for index, ax in zip(all_indices, axs.flat):
    base = noisy_dictionary[:,index] * sum_signs[index]
    base = base - base.min()
    base = base / base.max() * 255
    base = np.reshape(base, (PATCH_SIZE, PATCH_SIZE))
    
    ax.imshow(base, **COLOR_SCALE, cmap='gray')
    ax.axis('off')
fig.savefig('01-noisy-bases.pdf', bbox_inches='tight')
plt.close()

## 1.4 Statistics

In [11]:
noise_stdev = 20
snr_ratio = barbara.mean() / noise_stdev
print('SNR:', snr_ratio)

SNR: 5.869637680053711


In [36]:
TOP_BASES = 20

patches = to_grayscale_patches(barbara, (PATCH_SIZE, PATCH_SIZE))
encoding = clean_dictionary.T @ patches
norms = np.abs(encoding)
norms = np.sum(norms, axis=1)
clean_priorities = list(range(len(norms)))
clean_priorities.sort(key=lambda row: norms[row], reverse=True)

patches = to_grayscale_patches(noisy_barbara, (PATCH_SIZE, PATCH_SIZE))
encoding = noisy_dictionary.T @ patches
norms = np.abs(encoding)
norms = np.sum(norms, axis=1)
noisy_priorities = list(range(len(norms)))
noisy_priorities.sort(key=lambda row: norms[row], reverse=True)

In [39]:
base_angles = np.zeros((TOP_BASES, TOP_BASES))

for row in range(TOP_BASES):
    for col in range(TOP_BASES):
        base_angles[row][col] = np.abs(noisy_dictionary[:,noisy_priorities[row]] @ \
                                       clean_dictionary[:,clean_priorities[col]])
        
top_angles = []
for index in range(TOP_BASES):
    top_angles.append(max(base_angles[index]))

print(stats.describe(top_angles))
values = np.percentile(top_angles, [0, 25, 50, 75, 100])
print(values)

DescribeResult(nobs=20, minmax=(0.30477075667429465, 0.9999999980213353), mean=0.9078263624272255, variance=0.029335900839996674, skewness=-2.4412666509263987, kurtosis=5.799985946880119)
[0.30477076 0.84714707 0.99414061 0.99933735 1.        ]
