## Extract the HAADFs, equalize medians and normalize them (1/99%)

In [None]:
# imports

import os
import sys
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.append(parent_dir)

from datetime import datetime
import numpy as np
import tifffile as tf
import time
from functions_EDX import *
import gc

### Choose the directory containing the NPZ files

In [None]:
# Choose the output directory to save the folder containing the processed EM images. 
# It could be the home directory containing the EMD and NPZ files (see structure below), or a new one 
# e.g. if storage doesn't allow saving them in the same drive.

# HomePath Structure:
# /path/to/directory
#   |-- EMD    (contains the EMD file)
#   |-- NPZ    
#   |-- HAADFS   (the preprocessed data, optional can be defined by OutPath)

HomePath = r"/path/to/directory" 
OutPath = HomePath

lower_percentile = 0.5 # 1 for 30 tileset
upper_percentile = 99.5 # 99 for 30 tileset

In [None]:
print("Start time: ",datetime.now())

# find files
file_names = os.listdir(os.path.join(HomePath,'NPZ'))
file_names = [name for name in file_names if name.endswith('npz')]
file_names.sort()

# make export directory if necessary
try:
    os.mkdir(os.path.join(OutPath,'HAADFs'))
except:
    print("Folder exists.")

# create empty lists
haadfs, medians = [], []

# load HAADFs
for file_idx,file_name in enumerate(file_names):
    start = datetime.now()
    file_path = os.path.join(HomePath,'NPZ',file_names[file_idx])
    loaded_file = np.load(file_path)
    haadf = loaded_file['haadf']
    haadfs.append(haadf)
    medians.append(int(np.median(haadf)))
    del loaded_file, haadf
    gc.collect()

# calculate shift from median and correct each HAADF individually
cf = medians - np.min(medians)
for i in range(len(haadfs)):
    haadfs[i] = haadfs[i] - cf[i]  
    
# normalize HAADFs, with saturation of bottom and top 1%
n_range = np.percentile(haadfs, upper_percentile) - np.percentile(haadfs, lower_percentile)
haadfs = ((haadfs - np.percentile(haadfs, lower_percentile)) / (n_range))
haadfs = np.where(haadfs < 0, 0, haadfs)
haadfs = np.where(haadfs > 1, 1, haadfs)

# binning
haadfs = haadfs.reshape(haadfs.shape[0], 
               int(haadfs.shape[1]/2),
               2,
               int(haadfs.shape[2]/2),
               2).mean(axis=4).mean(axis=2)

# convert to 8-bit & invert 
haadfs = np.invert((haadfs*255).astype('uint8'))

# export
for i in range(len(haadfs)):
    tf.imwrite(os.path.join(OutPath,'HAADFs','Tile_%02d.tiff' % (i)), haadfs[i])

end = datetime.now()
print('Duration: {}'.format(end - start))