# Incremental PCA Convergence and Timing + Elliptic Envelope Performance 

## Import dependencies

In [None]:
%matplotlib inline

import os
from os.path import dirname, abspath, basename

import json

import glob
import imageio

import pickle

import numpy as np
np.random.seed(13)

import h5py as h5

import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from sklearn.metrics import roc_curve, auc

from scipy.interpolate import CubicSpline


## Load timing and convergence monitoring parameters

In [None]:
user_input = {
    "config": "flag-downsampled-diffraction-patterns-with-incremental-pca.json",
    "dataset": "3iyf-100K-mixed-hit-99"
}

incremental_pca_config_file = user_input["config"]
dataset_name = user_input["dataset"]

with open(incremental_pca_config_file) as incremental_pca_config_file_handle:
    incremental_pca_config_params = json.load(incremental_pca_config_file_handle)

if dataset_name not in incremental_pca_config_params:
    raise Exception("Dataset {} not in Config file.".format(dataset_name))

dataset_params = incremental_pca_config_params[dataset_name]

downsampled_diffraction_pattern_height = dataset_params["downsampledDiffractionPatternHeight"]
downsampled_diffraction_pattern_width = dataset_params["downsampledDiffractionPatternWidth"]

num_downsampled_diffraction_patterns = dataset_params["numDiffractionPatterns"]

num_downsampled_diffraction_patterns_to_fit_per_batch = dataset_params["numDownsampledDiffractionPatternsToFitPerBatch"]

num_batches_of_downsampled_diffraction_patterns = num_downsampled_diffraction_patterns // num_downsampled_diffraction_patterns_to_fit_per_batch

num_iters_to_project_downsampled_diffraction_patterns_seen_thus_far = dataset_params["numItersToProjectDownsampledDiffractionPatternsSeenThusFar"]
num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far = dataset_params["numItersToMeasureConvergenceForDownsampledDiffractionPatternsSeenThusFar"]   

incremental_pca_results_dir = dataset_params["incrementalPcaResultsDir"]

minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers = dataset_params["minimumConvergenceMeasureToStartFlaggingDownsampledDiffractionPatternOutliers"]

print("System:\t\t\t\t\t\t\t\t{}\n".format(dataset_name.split("-")[0]))

print("Height of mixed-hit downsampled diffraction patterns:\t\t{}\n".format(downsampled_diffraction_pattern_height))

print("Width of mixed-hit downsampled diffraction patterns:\t\t{}\n".format(downsampled_diffraction_pattern_width))

print("Number of mixed-hit downsampled diffraction patterns:\t\t{}\n".format(num_downsampled_diffraction_patterns))

print("Number of mixed-hit downsampled diffraction patterns per batch:\t{}\n".format(num_downsampled_diffraction_patterns_to_fit_per_batch))


## Show downsampled diffraction patterns

In [None]:
downsampled_diffraction_patterns_h5_file = dataset_params["downsampledDiffractionPatternsH5File"]
downsampled_diffraction_patterns_h5_file_handle = h5.File(downsampled_diffraction_patterns_h5_file, 'r')

diffraction_patterns_h5_file = os.path.join(dirname(dirname(abspath(downsampled_diffraction_patterns_h5_file))), basename(downsampled_diffraction_patterns_h5_file))
diffraction_patterns_h5_file_handle = h5.File(diffraction_patterns_h5_file, 'r')

single_hit_idx = np.where(diffraction_patterns_h5_file_handle["single_hits_mask"][:] == 1)[0]
outlier_idx = np.where(diffraction_patterns_h5_file_handle["single_hits_mask"][:] == 0)[0]

np.random.seed(13)
single_hit_random_sample_idx = np.random.choice(single_hit_idx, 3, replace=False)
outlier_random_sample_idx = np.random.choice(outlier_idx, 3, replace=False)

for image_idx in single_hit_random_sample_idx:
    
    fig, axes = plt.subplots(1, 2, figsize=(9, 9))
    
    axes[0].matshow(diffraction_patterns_h5_file_handle["diffraction_patterns"][image_idx], cmap='gray')
    axes[0].set_title("Single-hit Diffraction\nPattern #{}".format(image_idx + 1), pad=20)
    axes[0].set_xticks([])
    axes[0].set_yticks([])
    axes[0].set_xlabel(diffraction_patterns_h5_file_handle["diffraction_patterns"][image_idx].shape[1])
    axes[0].set_ylabel(diffraction_patterns_h5_file_handle["diffraction_patterns"][image_idx].shape[0])
    
    axes[1].matshow(downsampled_diffraction_patterns_h5_file_handle["downsampled_diffraction_patterns"][image_idx], cmap='gray')
    axes[1].set_title("Single-hit Downsampled\nDiffraction Pattern #{}".format(image_idx + 1), pad=20)
    axes[1].set_xticks([])
    axes[1].set_yticks([])
    axes[1].set_xlabel(downsampled_diffraction_pattern_width)
    axes[1].set_ylabel(downsampled_diffraction_pattern_height)
    
    fig.show()
    
for image_idx in outlier_random_sample_idx:
    
    fig, axes = plt.subplots(1, 2, figsize=(9, 9))
    
    axes[0].matshow(diffraction_patterns_h5_file_handle["diffraction_patterns"][image_idx], cmap='gray')
    axes[0].set_title("Outlier Diffraction\nPattern #{}".format(image_idx + 1), pad=20)
    axes[0].set_xticks([])
    axes[0].set_yticks([])
    axes[0].set_xlabel(diffraction_patterns_h5_file_handle["diffraction_patterns"][image_idx].shape[1])
    axes[0].set_ylabel(diffraction_patterns_h5_file_handle["diffraction_patterns"][image_idx].shape[0])
    
    axes[1].matshow(downsampled_diffraction_patterns_h5_file_handle["downsampled_diffraction_patterns"][image_idx], cmap='gray')
    axes[1].set_title("Outlier Downsampled\nDiffraction Pattern #{}".format(image_idx + 1), pad=20)
    axes[1].set_xticks([])
    axes[1].set_yticks([])
    axes[1].set_xlabel(downsampled_diffraction_pattern_width)
    axes[1].set_ylabel(downsampled_diffraction_pattern_height)
    
    fig.show()

diffraction_patterns_h5_file_handle.close()
downsampled_diffraction_patterns_h5_file_handle.close()


## Convergence measure for downsampled diffraction patterns processed thus far

In [None]:
convergence_measures_for_downsampled_diffraction_patterns_thus_far = []
batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far = np.arange(num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far * 2, num_batches_of_downsampled_diffraction_patterns + 1, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far)
num_batches_processed_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far = 0
batch_number_converged = batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far[-1]
for batch_number in batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far:
    convergence_measure_for_downsampled_diffraction_patterns_thus_far_file = os.path.join(incremental_pca_results_dir, "incremental-pca-convergence-measure-for-all-downsampled-diffraction-patterns-seen-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number={}.npy".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, batch_number))
    if os.path.exists(convergence_measure_for_downsampled_diffraction_patterns_thus_far_file):
        convergence_measure_for_downsampled_diffraction_patterns_thus_far = np.load(convergence_measure_for_downsampled_diffraction_patterns_thus_far_file)   
        convergence_measures_for_downsampled_diffraction_patterns_thus_far.append(convergence_measure_for_downsampled_diffraction_patterns_thus_far)
        num_batches_processed_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far += 1
        if convergence_measure_for_downsampled_diffraction_patterns_thus_far < minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers:
            batch_number_converged = batch_number
            break
        
ax = plt.figure(figsize=(11, 11)).gca()

ax.set_xlabel("Batch number", fontsize=20)
ax.set_ylabel("Jensen-Shannon Divergence", fontsize=20)

ax.set_xlim(min(batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far[:num_batches_processed_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far]), max(batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far[:num_batches_processed_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far]))
#ax.set_ylim(min(minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers, min(convergence_measures_for_downsampled_diffraction_patterns_thus_far)), max(convergence_measures_for_downsampled_diffraction_patterns_thus_far))

ax.set_xticks(batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far[:num_batches_processed_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far][9::100])
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far[:num_batches_processed_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far], convergence_measures_for_downsampled_diffraction_patterns_thus_far)
# ax.axhline(minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers, color='red')

# ax.legend(['convergence_measures_for_downsampled_diffraction_patterns_thus_far', 'minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers'])

plt.setp(ax.get_xticklabels(), fontsize=20)
plt.setp(ax.get_yticklabels(), fontsize=20)
plt.show()


In [None]:
batch_number_converged


## Time taken to update Incremental PCA with a new batch of downsampled diffraction patterns

In [None]:
times_taken_to_update_incremental_pca_with_new_batch = []
#batch_numbers = np.arange(1, num_batches_of_downsampled_diffraction_patterns + 1)
batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch = np.arange(1, batch_number_converged + 1)

num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch = 0
for batch_number in batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch:
    time_taken_to_update_incremental_pca_with_new_batch_file = os.path.join(incremental_pca_results_dir, "incremental-pca-time-taken-to-update-incremental-pca-with-new-batch-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number={}.npy".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, batch_number))
    if os.path.exists(time_taken_to_update_incremental_pca_with_new_batch_file):
        time_taken_to_update_incremental_pca_with_new_batch = np.load(time_taken_to_update_incremental_pca_with_new_batch_file)   
        times_taken_to_update_incremental_pca_with_new_batch.append(time_taken_to_update_incremental_pca_with_new_batch)
        num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch += 1

ax = plt.figure(figsize=(11, 11)).gca()

ax.set_xlabel("Batch number", fontsize=20)
ax.set_ylabel("Time taken to update Incremental PCA (seconds)", fontsize=20)

ax.set_xlim(min(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch]), max(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch]))
ax.set_ylim(min(times_taken_to_update_incremental_pca_with_new_batch), max(times_taken_to_update_incremental_pca_with_new_batch))

ax.set_xticks(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch][9::100])
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch], times_taken_to_update_incremental_pca_with_new_batch)

plt.setp(ax.get_xticklabels(), fontsize=20)
plt.setp(ax.get_yticklabels(), fontsize=20)
plt.show()


In [None]:
#print("%d diffraction patterns fit to Incremental PCA per second" % (num_downsampled_diffraction_patterns / np.sum(times_taken_to_update_incremental_pca_with_new_batch)))
print("%d diffraction patterns fit to Incremental PCA per second" % ((num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch * num_downsampled_diffraction_patterns_to_fit_per_batch) / np.sum(times_taken_to_update_incremental_pca_with_new_batch)))


## Time taken to project the downsampled diffraction patterns seen thus far

In [None]:
times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far = []
#batch_numbers = np.arange(num_iters_to_project_downsampled_diffraction_patterns_seen_thus_far, num_batches_of_downsampled_diffraction_patterns + 1, num_iters_to_project_downsampled_diffraction_patterns_seen_thus_far)
batch_numbers_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far = np.arange(num_iters_to_project_downsampled_diffraction_patterns_seen_thus_far,  batch_number_converged + 1, num_iters_to_project_downsampled_diffraction_patterns_seen_thus_far)

num_batches_processed_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far = 0
for batch_number in batch_numbers_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far:
    time_taken_to_project_downsampled_diffraction_patterns_seen_thus_far_file = os.path.join(incremental_pca_results_dir, "incremental-pca-time-taken-to-project-downsampled-diffraction-patterns-seen-thus-far-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number={}.npy".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, batch_number))
    if os.path.exists(time_taken_to_project_downsampled_diffraction_patterns_seen_thus_far_file):
        time_taken_to_project_downsampled_diffraction_patterns_seen_thus_far = np.load(time_taken_to_project_downsampled_diffraction_patterns_seen_thus_far_file)   
        times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far.append(time_taken_to_project_downsampled_diffraction_patterns_seen_thus_far)
        num_batches_processed_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far += 1
        
ax = plt.figure(figsize=(11, 11)).gca()

ax.set_xlabel("Batch number", fontsize=20)
ax.set_ylabel("Time taken to project diffraction patterns (seconds)", fontsize=20)

ax.set_xlim(min(batch_numbers_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far[:num_batches_processed_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far]), max(batch_numbers_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far[:num_batches_processed_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far]))
ax.set_ylim(min(times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far), max(times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far))

ax.set_xticks(batch_numbers_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far[:num_batches_processed_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far][9::100])
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(batch_numbers_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far[:num_batches_processed_for_times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far], times_taken_to_project_downsampled_diffraction_patterns_seen_thus_far)

plt.setp(ax.get_xticklabels(), fontsize=20)
plt.setp(ax.get_yticklabels(), fontsize=20)
plt.show()


## Time taken to compute convergence measure for downsampled diffraction patterns seen thus far

In [None]:
times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns = []
#batch_numbers = np.arange(num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far * 2, num_batches_of_downsampled_diffraction_patterns + 1, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far)

batch_numbers_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns = np.arange(num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far * 2, batch_number_converged + 1, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far)

num_batches_processed_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns = 0
for batch_number in batch_numbers_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns:
    time_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns_file = os.path.join(incremental_pca_results_dir, "time-taken-to-compute-convergence-measure-using-all-downsampled-diffraction-patterns-seen-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number={}.npy".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, batch_number))
    if os.path.exists(time_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns_file):
        time_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns = np.load(time_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns_file)   
        times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns.append(time_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns)
        num_batches_processed_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns += 1

ax = plt.figure(figsize=(11, 11)).gca()

ax.set_xlabel("Batch number", fontsize=20)
# ax.set_ylabel("Time taken to compute convergence\nmeasure for all downsampled\ndiffraction patterns (seconds)", fontsize=15)
ax.set_ylabel("Time taken to compute convergence (seconds)", fontsize=20)

ax.set_xlim(min(batch_numbers_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns]), max(batch_numbers_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns]))
ax.set_ylim(min(times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns), max(times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns))

ax.set_xticks(batch_numbers_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns][9::100])
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(batch_numbers_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns], times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns)

plt.setp(ax.get_xticklabels(), fontsize=20)
plt.setp(ax.get_yticklabels(), fontsize=20)
plt.show()


In [None]:
times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns = []
#batch_numbers = np.arange(num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far * 2, num_batches_of_downsampled_diffraction_patterns + 1, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far)

batch_number_converged = 10000
batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns = np.arange(num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far, batch_number_converged + 1, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far)

num_batches_processed_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns = 0
for batch_number in batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns:
    time_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns_file = os.path.join(incremental_pca_results_dir, "time-taken-to-compute-reconstruction-mse-for-downsampled-diffraction-patterns-seen-usings-last-n-iters-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number={}.npy".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, batch_number))
    if os.path.exists(time_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns_file):
        time_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns = np.load(time_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns_file)   
#         print(time_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns)
        times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns.append(time_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns)
        num_batches_processed_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns += 1

ax = plt.figure(figsize=(11, 11)).gca()

ax.set_xlabel("Batch number", fontsize=20)
# ax.set_ylabel("Time taken to compute convergence\nmeasure for all downsampled\ndiffraction patterns (seconds)", fontsize=15)
ax.set_ylabel("Time taken to compute reconstruction MSE (seconds)", fontsize=20)

ax.set_xlim(min(batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns]), max(batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns]))
#ax.set_ylim(min(times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns), max(times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns))

ax.set_xticks(batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns][9::100])
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns[:num_batches_processed_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns], times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns)

plt.setp(ax.get_xticklabels(), fontsize=20)
plt.setp(ax.get_yticklabels(), fontsize=20)
plt.show()


In [None]:
print(time_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns_file)


In [None]:
len(times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns)

In [None]:
(10000 - 200) // 10 + 1

In [None]:
incremental_pca_results_dir


In [None]:
# def smooth(y, box_pts):
#     box = np.ones(box_pts)/box_pts
#     y_smooth = np.convolve(y, box, mode='same')
#     return y_smooth

incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far = []
batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far = np.arange(num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far, num_batches_of_downsampled_diffraction_patterns + 1, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far)
num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far = 0
batch_number_converged = batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[-1]
for batch_number in batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far:
    incremental_pca_mean_squared_reconstruction_error_for_diffraction_patterns_seen_thus_far_file = os.path.join(incremental_pca_results_dir, "incremental-pca-reconstruction-mse-for-downsampled-diffraction-patterns-seen-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number={}.npy".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, batch_number))
    if os.path.exists(incremental_pca_mean_squared_reconstruction_error_for_diffraction_patterns_seen_thus_far_file):
        incremental_pca_mean_squared_reconstruction_error_for_diffraction_patterns_seen_thus_far = np.load(incremental_pca_mean_squared_reconstruction_error_for_diffraction_patterns_seen_thus_far_file)   
        incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far.append(incremental_pca_mean_squared_reconstruction_error_for_diffraction_patterns_seen_thus_far)
        num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far += 1
        if incremental_pca_mean_squared_reconstruction_error_for_diffraction_patterns_seen_thus_far[-1] < minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers:
            batch_number_converged = batch_number
            break

derivatives = []

for i in range(num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far):
       
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(22, 11)) # figure(figsize=(11, 11)).gca()

    ax[0].set_xlabel("Batch number", fontsize=20)
    ax[0].set_ylabel("Incremental PCA Reconstruction MSE", fontsize=20)

    #ax.set_xlim(min(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[:num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far]), max(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[:num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far]))
    ax[0].set_xlim(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[0], batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far - 1])
    ax[0].set_ylim(0.5, 1.5)
    #ax.set_ylim(min(minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers, min(convergence_measures_for_downsampled_diffraction_patterns_thus_far)), max(convergence_measures_for_downsampled_diffraction_patterns_thus_far))

    ax[0].set_xticks(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[:num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far][9::100])
    ax[0].xaxis.set_major_locator(MaxNLocator(integer=True))

    x = batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[:num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far][:i+1]
    y = incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[i]
    ax[0].plot(x, y)
    
    if i == 0:
        ax[0].set_title("Incremental PCA Reconstruction MSEs for Batch Number={}".format(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[i]), fontsize=20, pad=20)
    
    else:
        cs = CubicSpline(x, y)
        delta = 1e-10
        derivative = (cs(x[-1]) - cs(x[-1] - delta)) / delta
        ax[0].set_title("Incremental PCA Reconstruction MSEs for Batch Number={}".format(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[i]), fontsize=20, pad=20)
        derivatives.append(derivative)
        
        ax[1].set_xlabel("Batch number", fontsize=20)
        ax[1].set_ylabel("Absolute Derivative of the Reconstruction MSE", fontsize=20)
        ax[1].set_ylim(0.0, 0.01)
        ax[1].set_xlim(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[0], batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far - 1])
        ax[1].set_xticks(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[:num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far][9::100])
        ax[1].xaxis.set_major_locator(MaxNLocator(integer=True))
        ax[1].plot(x[1:], np.abs(derivatives))
        ax[1].set_title("Absolute Derivative at Batch Number={}\n(Absolute Derivative={}, Delta={})".format(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[i], np.abs(derivative), delta), fontsize=20, pad=20)
        plt.setp(ax[1].get_xticklabels(), fontsize=20)
        plt.setp(ax[1].get_yticklabels(), fontsize=20)
    
    # ax.axhline(minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers, color='red')

    # ax.legend(['convergence_measures_for_downsampled_diffraction_patterns_thus_far', 'minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers'])
    
    plt.setp(ax[0].get_xticklabels(), fontsize=20)
    plt.setp(ax[0].get_yticklabels(), fontsize=20)

    fig.tight_layout()
    # plt.show()
    plt.savefig("incremental-pca-convergence-plots/incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far_batch_number={:05d}.png".format(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far[i]))
    plt.close()
    

In [None]:
img_dir = "incremental-pca-convergence-plots"

# Define input image file pattern
input_img_file_pattern = os.path.join(img_dir, "*.png")

# Get paths to input PNG images using file pattern
input_img_files = glob.glob(input_img_file_pattern)

# Get input PNG images
input_imgs = []
for i, input_img_file in enumerate(sorted(input_img_files)):
#     if i == 5:
#         break
    input_img = imageio.imread(input_img_file)
    input_imgs.append(input_img)
    
# Define output image path for the animated GIF
output_img_file = "incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far_animated.gif"
output_img_path = os.path.join(img_dir, output_img_file)

# Convert list of PNG images to an animated GIF
imageio.mimsave(output_img_path, input_imgs)


In [None]:
max(batch_numbers_for_incremental_pca_mean_squared_reconstruction_errors_for_diffraction_patterns_seen_thus_far)

In [None]:
incremental_pca_mean_squared_reconstruction_error_for_diffraction_patterns_seen_thus_far_using_last_n_iters_file

In [None]:
np.load("/reg/data/ana03/scratch/deebanr/3iyf-100K-mixed-hit-99/dataset/incremental-pca/09-24-20/run-5/time-taken-to-compute-reconstruction-mse-for-downsampled-diffraction-patterns-seen-usings-last-n-iters-dataset_name=3iyf-100K-mixed-hit-99-downsampled_shape=128x128-num_diffraction_patterns=100000-batch_size=10-batch_number=210.npy")


In [None]:
np.load("/reg/data/ana03/scratch/deebanr/3iyf-100K-mixed-hit-99/dataset/incremental-pca/09-24-20/run-5/incremental-pca-reconstruction-mse-for-downsampled-diffraction-patterns-seen-using-last-n-iters-dataset_name=3iyf-100K-mixed-hit-99-downsampled_shape=128x128-num_diffraction_patterns=100000-batch_size=10-batch_number=210.npy")


In [None]:
len(incremental_pca_mean_squared_reconstruction_errors_for_all_downsampled_diffraction_patterns)


In [None]:
downsampled_diffraction_patterns_h5_file_handle = h5.File(downsampled_diffraction_patterns_h5_file, 'r')

downsampled_diffraction_patterns = downsampled_diffraction_patterns_h5_file_handle["downsampled_diffraction_patterns"]
all_downsampled_diffraction_patterns = downsampled_diffraction_patterns[:].reshape((num_downsampled_diffraction_patterns, -1))

downsampled_diffraction_patterns_h5_file_handle.close()


In [None]:
incremental_pca_results_dir = dataset_params["incrementalPcaResultsDir"]
new_batch_number_to_load_incremental_pca_model = 1700 #dataset_params["newBatchNumberToLoadIncrementalPcaModel"]

incremental_pca_model_file = os.path.join(incremental_pca_results_dir, "incremental-pca-incremental-pca-model-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number={}.pkl".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, new_batch_number_to_load_incremental_pca_model))
            
with open(incremental_pca_model_file, 'rb') as incremental_pca_model_file_handle:
    incremental_pca = pickle.load(incremental_pca_model_file_handle)

incremental_pca_mean_for_diffraction_patterns_seen_thus_far = incremental_pca.mean_
incremental_pca_components_for_diffraction_patterns_seen_thus_far = incremental_pca.components_

current_projection_for_all_downsampled_diffraction_patterns = np.dot(all_downsampled_diffraction_patterns - incremental_pca_mean_for_diffraction_patterns_seen_thus_far, incremental_pca_components_for_diffraction_patterns_seen_thus_far.T)


In [None]:
fig, axes = plt.subplots(1, 4, figsize=(24, 24))

axes[0].matshow(incremental_pca_mean_for_diffraction_patterns_seen_thus_far.reshape((downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width)), cmap='gray')
axes[0].set_title("Average", pad=20)
axes[0].set_xticks([])
axes[0].set_yticks([])
axes[0].set_xlabel(downsampled_diffraction_pattern_width)
axes[0].set_ylabel(downsampled_diffraction_pattern_height)

axes[1].matshow(incremental_pca_components_for_diffraction_patterns_seen_thus_far[0].reshape((downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width)), cmap='gray')
axes[1].set_title("PC #1", pad=20)
axes[1].set_xticks([])
axes[1].set_yticks([])
axes[1].set_xlabel(downsampled_diffraction_pattern_width)
axes[1].set_ylabel(downsampled_diffraction_pattern_height)

axes[2].matshow(incremental_pca_components_for_diffraction_patterns_seen_thus_far[1].reshape((downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width)), cmap='gray')
axes[2].set_title("PC #2", pad=20)
axes[2].set_xticks([])
axes[2].set_yticks([])
axes[2].set_xlabel(downsampled_diffraction_pattern_width)
axes[2].set_ylabel(downsampled_diffraction_pattern_height)

axes[3].matshow(incremental_pca_components_for_diffraction_patterns_seen_thus_far[2].reshape((downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width)), cmap='gray')
axes[3].set_title("PC #3", pad=20)
axes[3].set_xticks([])
axes[3].set_yticks([])
axes[3].set_xlabel(downsampled_diffraction_pattern_width)
axes[3].set_ylabel(downsampled_diffraction_pattern_height)

fig.show()


In [None]:
downsampled_diffraction_patterns_h5_file = dataset_params["downsampledDiffractionPatternsH5File"]
downsampled_diffraction_patterns_h5_file_handle = h5.File(downsampled_diffraction_patterns_h5_file, 'r')

diffraction_patterns_h5_file = os.path.join(dirname(dirname(abspath(downsampled_diffraction_patterns_h5_file))), basename(downsampled_diffraction_patterns_h5_file))
diffraction_patterns_h5_file_handle = h5.File(diffraction_patterns_h5_file, 'r')

single_hit_idx = np.where(diffraction_patterns_h5_file_handle["single_hits_mask"][:] == 1)[0]
outlier_idx = np.where(diffraction_patterns_h5_file_handle["single_hits_mask"][:] == 0)[0]

np.random.seed(13)
single_hit_random_sample_idx = np.random.choice(single_hit_idx, 3, replace=False)
outlier_random_sample_idx = np.random.choice(outlier_idx, 3, replace=False)

for image_idx in single_hit_random_sample_idx:
    
    fig, axes = plt.subplots(1, 2, figsize=(9, 9))
    
    axes[0].matshow(downsampled_diffraction_patterns_h5_file_handle["downsampled_diffraction_patterns"][image_idx], cmap='gray')
    axes[0].set_title("Downsampled\nDiffraction Pattern #{}".format(image_idx + 1), pad=20)
    axes[0].set_xticks([])
    axes[0].set_yticks([])
    axes[0].set_xlabel(downsampled_diffraction_pattern_width)
    axes[0].set_ylabel(downsampled_diffraction_pattern_height)
    
    incremental_pca_reconstruction_for_downsampled_diffraction_pattern_i = np.dot(current_projection_for_all_downsampled_diffraction_patterns[image_idx, :], incremental_pca_components_for_diffraction_patterns_seen_thus_far) + incremental_pca_mean_for_diffraction_patterns_seen_thus_far
    
    axes[1].matshow(incremental_pca_reconstruction_for_downsampled_diffraction_pattern_i.reshape((downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width)), cmap='gray')
    axes[1].set_title("Reconstructed Downsampled\nDiffraction Pattern #{}".format(image_idx + 1), pad=20)
    axes[1].set_xticks([])
    axes[1].set_yticks([])
    axes[1].set_xlabel(downsampled_diffraction_pattern_width)
    axes[1].set_ylabel(downsampled_diffraction_pattern_height)
    
    fig.show()
    
for image_idx in outlier_random_sample_idx:
    
    fig, axes = plt.subplots(1, 2, figsize=(9, 9))
    
    axes[0].matshow(downsampled_diffraction_patterns_h5_file_handle["downsampled_diffraction_patterns"][image_idx], cmap='gray')
    axes[0].set_title("Downsampled\nDiffraction Pattern #{}".format(image_idx + 1), pad=20)
    axes[0].set_xticks([])
    axes[0].set_yticks([])
    axes[0].set_xlabel(downsampled_diffraction_pattern_width)
    axes[0].set_ylabel(downsampled_diffraction_pattern_height)
    
    incremental_pca_reconstruction_for_downsampled_diffraction_pattern_i = np.dot(current_projection_for_all_downsampled_diffraction_patterns[image_idx, :], incremental_pca_components_for_diffraction_patterns_seen_thus_far) + incremental_pca_mean_for_diffraction_patterns_seen_thus_far
    
    axes[1].matshow(incremental_pca_reconstruction_for_downsampled_diffraction_pattern_i.reshape((downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width)), cmap='gray')
    axes[1].set_title("Reconstructed Downsampled\nDiffraction Pattern #{}".format(image_idx + 1), pad=20)
    axes[1].set_xticks([])
    axes[1].set_yticks([])
    axes[1].set_xlabel(downsampled_diffraction_pattern_width)
    axes[1].set_ylabel(downsampled_diffraction_pattern_height)
    
    fig.show()

downsampled_diffraction_patterns_h5_file_handle.close()


In [None]:
#num_batches_processed = np.min([num_batches_processed_for_times_taken_to_update_incremental_pca_with_new_batch, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far * num_batches_processed_for_incremental_pca_mean_squared_reconstruction_errors_for_all_downsampled_diffraction_patterns, num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far * num_batches_processed_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far])
num_batches_processed = 10000

num_intervals_to_consider = num_batches_processed // num_iters_to_measure_convergence_for_downsampled_diffraction_patterns_seen_thus_far

times_taken_to_compute_batch_of_downsampled_diffraction_patterns = np.copy(times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed])

times_taken_to_compute_batch_of_downsampled_diffraction_patterns[batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns[19:] - 1] += times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns

# # times_taken_to_compute_batch_of_downsampled_diffraction_patterns[200:] += times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns
# # # times_taken_to_compute_batch_of_downsampled_diffraction_patterns[batch_numbers_for_convergence_measures_for_downsampled_diffraction_patterns_thus_far[:num_intervals_to_consider - 1] - 1] += times_taken_to_compute_convergence_measure_using_all_downsampled_diffraction_patterns[:num_intervals_to_consider - 1]
# # # times_taken_to_compute_batch_of_downsampled_diffraction_patterns[batch_numbers_for_times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns[:num_intervals_to_consider] - 1] += times_taken_to_compute_incremental_pca_mean_squared_reconstruction_error_for_all_downsampled_diffraction_patterns[:num_intervals_to_consider]

ax = plt.figure(figsize=(11, 11)).gca()

ax.set_xlabel("Batch number", fontsize=20)
ax.set_ylabel("Time taken to compute a batch", fontsize=20)

ax.set_xlim(min(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed]), max(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed]))
ax.set_ylim(0, np.max(times_taken_to_compute_batch_of_downsampled_diffraction_patterns)) 
    
ax.set_xticks(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed][9::100])
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

ax.plot(batch_numbers_for_times_taken_to_update_incremental_pca_with_new_batch[:num_batches_processed], times_taken_to_compute_batch_of_downsampled_diffraction_patterns)
# ax.axhline(minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers, color='red')

# ax.legend(['convergence_measures_for_downsampled_diffraction_patterns_thus_far', 'minimum_convergence_measure_to_start_flagging_downsampled_diffraction_pattern_outliers'])

plt.setp(ax.get_xticklabels(), fontsize=20)
plt.setp(ax.get_yticklabels(), fontsize=20)
plt.show()


In [None]:
# #print("%d diffraction patterns fit to Incremental PCA per second" % (num_downsampled_diffraction_patterns / np.sum(times_taken_to_update_incremental_pca_with_new_batch)))
# print((num_batches_processed * num_downsampled_diffraction_patterns_to_fit_per_batch))
# print(times_taken_to_compute_batch_of_downsampled_diffraction_patterns)
# print(np.sum(times_taken_to_compute_batch_of_downsampled_diffraction_patterns))
# print("%d diffraction patterns fit to Incremental PCA per second" % ((num_batches_processed * num_downsampled_diffraction_patterns_to_fit_per_batch) / np.sum(times_taken_to_compute_batch_of_downsampled_diffraction_patterns)))


## Show performance for Elliptic Envelope

In [None]:
# https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal
def perf_measure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
           TP += 1
        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
           FP += 1
        if y_actual[i]==y_hat[i]==0:
           TN += 1
        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
           FN += 1

    return(TP, FP, TN, FN)


In [None]:
elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_h5_file = os.path.join(incremental_pca_results_dir, "elliptic-envelope-outlier-predictions-for-downsampled-diffraction-patterns-seen-thus-far-dataset_name={}-downsampled_shape={}x{}-num_diffraction_patterns={}-batch_size={}-batch_number_converged={}.hdf5".format(dataset_name, downsampled_diffraction_pattern_height, downsampled_diffraction_pattern_width, num_downsampled_diffraction_patterns, num_downsampled_diffraction_patterns_to_fit_per_batch, batch_number_converged))
elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_h5_key = "elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far"
elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_h5_file_handle = h5.File(elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_h5_file, 'r')
elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_boolean_array = elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_h5_file_handle[elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_h5_key][:]  
elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_h5_file_handle.close()

mixed_hits_h5_file_3iyf = "/reg/data/ana03/scratch/deebanr/3iyf-100K-mixed-hit-99/dataset/cspi_synthetic_dataset_diffraction_patterns_3iyf-100K-mixed-hit_uniform_quat_dataset-size=100000_diffraction-pattern-shape=1024x1040.hdf5"

mixed_hits_h5_file_3los_80 = "/reg/data/ana03/scratch/deebanr/3los-10K-mixed-hit-80/dataset/cspi_synthetic_dataset_diffraction_patterns_3los-10K-mixed-hit_uniform_quat_dataset-size=10000_diffraction-pattern-shape=1024x1040.hdf5"
mixed_hits_h5_file_3los_90 = "/reg/data/ana03/scratch/deebanr/3los-10K-mixed-hit-90/dataset/cspi_synthetic_dataset_diffraction_patterns_3los-10K-mixed-hit_uniform_quat_dataset-size=10000_diffraction-pattern-shape=1024x1040.hdf5"
mixed_hits_h5_file_3los_95 = "/reg/data/ana03/scratch/deebanr/3los-10K-mixed-hit-95/dataset/cspi_synthetic_dataset_diffraction_patterns_3los-10K-mixed-hit_uniform_quat_dataset-size=10000_diffraction-pattern-shape=1024x1040.hdf5"
mixed_hits_h5_file_3los_99 = "/reg/data/ana03/scratch/deebanr/3los-10K-mixed-hit-99/dataset/cspi_synthetic_dataset_diffraction_patterns_3los-10K-mixed-hit_uniform_quat_dataset-size=10000_diffraction-pattern-shape=1024x1040.hdf5"

mixed_hits_h5_file_handle = h5.File(mixed_hits_h5_file_3los_99, 'r')
single_hits_mask_key = "single_hits_mask"
single_hits_mask = mixed_hits_h5_file_handle[single_hits_mask_key][:]
mixed_hits_h5_file_handle.close()

(TP, FP, TN, FN) = perf_measure(single_hits_mask, elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_boolean_array)
single_hit_downsampled_diffraction_pattern_detection_performance_matrix = np.array([[TN, FP], [FN, TP]])

actual = ["Actual outlier", "Actual single-hit"]
predicted = ["Predicted outlier", "Predicted single-hit"]
confusion_matrix_lookup_table = np.array([["true negatives", "false positives"], ["false negatives", "true positives"]], dtype=np.object)

fig, ax = plt.subplots(figsize=(8, 8))
im = ax.imshow(single_hit_downsampled_diffraction_pattern_detection_performance_matrix, cmap="viridis")

ax.set_xticks(np.arange(len(predicted)))
ax.set_yticks(np.arange(len(actual)))

ax.set_xticklabels(predicted)
ax.set_yticklabels(actual)

plt.setp(ax.get_yticklabels(), fontsize=15)
plt.setp(ax.get_xticklabels(), rotation=0, rotation_mode="anchor", fontsize=15)

for i in range(len(actual)):
    for j in range(len(predicted)):
        if single_hit_downsampled_diffraction_pattern_detection_performance_matrix[i, j] > 5000:
            text_color = 'black'
        else:
            text_color = 'white'
        
        text = ax.text(j, i, "# of {}\n{}".format(confusion_matrix_lookup_table[i, j], single_hit_downsampled_diffraction_pattern_detection_performance_matrix[i, j]), ha="center", va="center", color=text_color, fontsize=15)

ax.set_title("Single-hit downsampled diffraction pattern\ndetection performance for Elliptic Envelope\nduring the outlier flagging with Incremental PCA\nconverged at batch number {}".format(batch_number_converged), fontsize=15)
fig.tight_layout()
plt.show()


In [None]:
dataset_name

In [None]:
y_test = single_hits_mask
y_score = elliptic_envelope_outlier_prediction_mask_for_downsampled_diffraction_patterns_seen_thus_far_boolean_array

fpr = {}
tpr = {}
roc_auc = {}

fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

plt.figure()
lw = 2
plt.plot(fpr['micro'], tpr['micro'], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc['micro'])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
