# Raw Data Transformation

# Import Libraries

In [1]:
import os
import numpy as np
from tqdm import tqdm
import tifffile as tif
from scipy.ndimage.interpolation import rotate
from tensorflow.keras.utils import to_categorical

# Define Functions

In [2]:
def import_tif(file):
    """Function to import numpy file and take the np.abs of the data
    
    Parameters
    ==========
    
    file - (str)
        the directory path on where the .tif file image is
        
    
    Returns
    =======
    
    the data held inside the tif file designated by the User
    
    """
    diff3D = tif.imread(file).astype(np.float32)
    return diff3D


def rot90(m, k=1, axis=2):
    """Rotate an array k*90 degrees in the counter-clockwise direction around the given axis
    
    
    """
    m = np.swapaxes(m, 2, axis)
    m = np.rot90(m, k)
    m = np.swapaxes(m, 2, axis)
    return m

def rotations24(polycube):
    """Obtain all the 24 different projections of a cube
    
    """
    # imagine shape is pointing in axis 0 (up)

    # 4 rotations about axis 0
    first = rotations4(polycube, 0)

    # rotate 180 about axis 1, now shape is pointing down in axis 0
    # 4 rotations about axis 0
    second = rotations4(rot90(polycube, 2, axis=1), 0)

    # rotate 90 or 270 about axis 1, now shape is pointing in axis 2
    # 8 rotations about axis 2
    third = rotations4(rot90(polycube, axis=1), 2)
    fourth = rotations4(rot90(polycube, -1, axis=1), 2)

    # rotate about axis 2, now shape is pointing in axis 1
    # 8 rotations about axis 1
    fifth =  rotations4(rot90(polycube, axis=2), 1)
    sixth =  rotations4(rot90(polycube, -1, axis=2), 1)
    
    return np.concatenate((first, second, third, fourth, fifth, sixth))

def rotations4(polycube, axis):
    """List the four rotations of the given cube about the given axis."""
    master = []
    
    for i in range(4):
        master.append(rot90(polycube, i, axis))
        
    return master


def slice3D_easy(dataset, labels, diff3D, n_projections, label, Nc=64):
    """Cut slices out of the center of the 3D array
    
    Parameters
    ==========
    
    dataset - (np.array)
        the dataset array you would like to append the data to
    
    labels - (np.array)
        the labels array you would like to append the label to 
    
    diff3D - (np.array)
        the current 3D diffraction image to take slices from
    
    label - (int)
        the label of the dataset you are inputing   
    

    Returns
    =======
    Nothing
    """
    
    #Slice 3D array into specified 2D slices - lower theta rotation range from higher theta rotation range
    thetas = np.linspace(-45, 45, n_projections, endpoint=False)

    for theta in thetas:
        slice2D = rotate(diff3D, theta, axes=(0%3, (0+1)%3),
                         reshape=False, order=1, mode='constant')[:, int(Nc/2)]

        dataset.append(slice2D)
        labels.append(label)

# Obtainin Raw Data Location

In [3]:
# Where the raw pynx_data lives
main_dir = '../raw_data/'

# Obtainin the list of all the files
reflection_list = os.listdir(main_dir)

all_files = []

# For each reflection in the list - obtain all the sampling rates - obtain all the files
for reflection in reflection_list:

    sampling_rate_list = sorted(os.listdir(f'{main_dir}{reflection}'))
    
    for sampling_rate in sampling_rate_list:

        files = os.listdir(f'{main_dir}{reflection}/{sampling_rate}/tif')
        
        adjusted_files = [f'{main_dir}{reflection}/{sampling_rate}/tif/{f}' for f in files]

        all_files.append(adjusted_files)
        
        
all_files = np.asarray(all_files)
all_files = all_files.flatten()

# Taking The Data And Saving It To Individual .Tif Files

In [4]:
# Setup
save_dataset = []
save_labels = []


new_all_files = []

# Allow the User to pick which sampling rate they would like to calculate
user_search_field = '_sprate_225'

# Allow the User to set an identifier for the images they are creating
type_of_images = '128x128_2dslices'

number_of_slice_per_90_degree = 18

image_size = 128

user_directory_name = 'intermediate_dir'



# Data Creation
for search_file in all_files:
    if user_search_field in search_file:
        new_all_files.append(search_file)

for file in tqdm(new_all_files, total=len(new_all_files)):
    
    save_dataset = []
    save_labels = []

    # importing the raw data files
    data = import_tif(file)

    # Obtaining the correct label for the data
    if '_none_' in file:
        label = 0
    elif '_edge_' in file:
        label = 1
    elif '_screw_' in file:
        label = 2

    splitting_filename = file.split('/')
    first_splitting =  ['..'] + [user_directory_name] + [type_of_images] + [splitting_filename[-1][:-4]+'_2dslices_']
    
    # Creating savenames for the data
    data_savename = '/'.join(first_splitting) + 'dataset.tif'
    labels_savename = '/'.join(first_splitting) + 'labels.tif'

    print(data_savename)
    # Raw data is a NxNxN cube
    # Obtain all 24 cube rotations for the data
    crystal_rotations = rotations24(data)
    for crystal in crystal_rotations:    
        slice3D_easy(save_dataset, save_labels, crystal, number_of_slice_per_90_degree , label, Nc=image_size)


    save_dataset = np.asarray(save_dataset)
    save_labels = np.asarray(save_labels)

    print(np.shape(save_dataset))
    #tif.imsave(data_savename, save_dataset)
    #tif.imsave(labels_savename, save_labels)

  0%|          | 0/480 [00:00<?, ?it/s]

../intermediate_dir/128x128_2dslices/c1_none_[-110][111][-1-12]_0_to_[100][010][001]__reflection_[1-1-1]_sprate_225_2dslices_dataset.tif


  0%|          | 1/480 [00:53<7:08:16, 53.65s/it]

(432, 128, 128)
../intermediate_dir/128x128_2dslices/c3_screw_[111][11-2][1-10]_0_to_[100][010][001]__reflection_[1-1-1]_sprate_225_2dslices_dataset.tif


KeyboardInterrupt: 

# Concatonating Data Into Train and Test Data

# Define Functions

In [12]:
def get_app_data(slice_dir, sprate=('sprate_225', ), crys_seperate=('c2_',)):
    """Allows the User to obtain the data for a specific sampling rate if needed along with being able to
    seperate out the training datasets.
    
    Parameters
    ==========
    
    sprate (tuple) - a tuple of strings defining which sample rate names the user would like to obtain data for
    
    crys_seperate (tuple) - a tuple of strings defining which samples to seperate for the training datasets
    
    Returns
    =======
    
    
    """
    files = sorted(os.listdir(slice_dir))
    
    # Obtain All Locations
    og_slice_data_list = sorted([f'{slice_dir}{f}' for f in files if '_dataset.tif' in f])
    og_slice_labels_list = sorted([f'{slice_dir}{f}' for f in files if '_labels.tif' in f])
    
    
    og_slice_data_list = [f'{f}' for f in og_slice_data_list if any(prate in f for prate in sprate)]
    og_slice_labels_list = [f'{f}' for f in og_slice_labels_list if any(prate in f for prate in sprate)]
    
    seperating_data_list = [f'{f}' for f in og_slice_data_list if any(crys in f for crys in crys_seperate)]
    seperating_labels_list = [f'{f}' for f in og_slice_labels_list if any(crys in f for crys in crys_seperate)]
    
    og_slice_data_list = [f'{f}' for f in og_slice_data_list if not any(crys in f for crys in crys_seperate)]
    og_slice_labels_list = [f'{f}' for f in og_slice_labels_list if not any(crys in f for crys in crys_seperate)]   
    
    return og_slice_data_list, og_slice_labels_list, seperating_data_list, seperating_labels_list

# Repeat Next Sections  4 Times

### Once for the training data creation
### Once for the creation of C2 stand alone dataset
### Once for the creation of C3 stand alone dataset
### Once for the creation of C14 stand alone dataset
#### Make these by changing the crys_seperate variable in the get_app_data function

In [None]:
# OG Slices
og_slice_dir = '../intermediate_dir/128x128_2dslices/'

out1 = get_app_data(og_slice_dir)
og_slice_data_list, og_slice_labels_list, sep_og_slice_data_list, sep_og_slice_labels_list = out1


In [None]:
og_slice_data = np.asarray([])
rot_slice_data = np.asarray([])
trans_slice_data = np.asarray([])
rot_transslice_data = np.asarray([])


og_slice_labels = np.asarray([])
rot_slice_labels = np.asarray([])
trans_slice_labels = np.asarray([])
rot_transslice_labels = np.asarray([])


# Used To Get Seperate Data - change crys_sep parameter to obtain different testing data
data_data = np.hstack((sep_og_slice_data_list))
data_labels = np.hstack((sep_og_slice_labels_list))


# Used To Get Main Data - set all testing data names into the crys_sep arugment and uncommen
# to get the main training data

#data_data = np.hstack((og_slice_data_list))
#data_labels = np.hstack((og_slice_labels_list))


# Normalize Dataset
for j, file in tqdm(enumerate(data_data), total=len(data_data)):
    #label check 
    lab_name = data_labels[j][:-11]
    if lab_name != file [:-12]:
        print('help')
        print(lab_name)
        print(file [:-12])
        print('')
    
    
    # opening and normalizing data
    d = tif.imread(file)
    l = tif.imread(data_labels[j])
    
    # Making sure there are no np.nan in the dataset
    if np.min(d) < 0:
        print(file, np.max(d), np.min(d))

    
    try:
        og_slice_data = np.vstack((og_slice_data, np.asarray(d)))
        
        if '128x128_2dslices/' in file:
            
            og_slice_labels = np.vstack((og_slice_labels, np.asarray(l)))
            
        else:
            
            og_slice_labels = np.vstack((og_slice_labels,np.asarray(l), np.asarray(l)))
        
    
    except Exception as ex:
        print(ex)
        og_slice_data = np.asarray(d)
        if '128x128_2dslices/' in file:
            og_slice_labels = np.asarray(l)
        else:
            og_slice_labels = np.vstack((np.asarray(l), np.asarray(l)))
        
    
shape = og_slice_labels.shape

og_slice_labels = og_slice_labels.reshape((shape[0] * shape[1]))

print(og_slice_data.shape, og_slice_labels.shape)

In [None]:
np.save('/testiing_data/test_c2_128x128_dataset_01.npy', og_slice_data)
np.save('/testing_data/test_c2_128x128_labels_01.npy', og_slice_labels)


#np.save('/testiing_data/test_c3_128x128_dataset_01.npy', og_slice_data)
#np.save('/testing_data/test_c3_128x128_labels_01.npy', og_slice_labels)

#np.save('/testiing_data/test_c14_128x128_dataset_01.npy', og_slice_data)
#np.save('/testing_data/test_c14_128x128_labels_01.npy', og_slice_labels)

#np.save('/training_data/training_128x128_dataset_01.npy', og_slice_data)
#np.save('/training_data/training_128x128_labels_01.npy', og_slice_labels)


# Adding Poisson Noise To Data

# Functions

In [None]:
def add_train_noise(img, VARIABILITY):
    """Add random poisson noise to an image
    
    Parameters
    ==========
    img (np) - 2D numpy image
    
    VARIABILITY - int - the value to be passed into the np.random.poisson function
    
    """
    
    mins = np.min(img)
    maxs = np.max(img)

    PEAK = VARIABILITY

    img = np.random.poisson(img / maxs * PEAK) / PEAK * maxs

    # Clip data so there are no negative numbers
    img = np.clip(img, mins, maxs)
        
    return img

In [None]:
X_data = np.load('/testing_data/test_c2_128x128_dataset_01.npy')
y_data = np.load('/testing_data/test_c2_128x128_labels_01.npy')

#X_data = np.load('/testing_data/test_c3_128x128_dataset_01.npy')
#y_data = np.load('/testing_data/test_c3_128x128_labels_01.npy')

#X_data = np.load('/testing_data/test_c14_128x128_dataset_01.npy')
#y_data = np.load('/testing_data/test_c14_128x128_labels_01.npy')

#X_data = np.load('/testing_data/training_128x128_dataset_01.npy')
#y_data = np.load('/testing_data/training_128x128_labels_01.npy')


# Add noise to the data
new_X_data = []
new_y_data = []

for ids, slices in tqdm(enumerate(X_data), total=len(X_data)):
    for value in [4000]:

        append_imgs = add_train_noise(slices.copy(), value)

        new_X_data.append(append_imgs)
        new_y_data.append(y_data[ids])
        
        
new_X_data = np.asarray(new_X_data)
new_y_data = np.asarray(new_y_data)

new_y_data = to_categorical(new_y_data, 3)
        
print(new_X_data.shape, new_y_data.shape)
        


In [None]:
np.save('/testing_data/noise_test_c14_128x128_dataset_01.npy', new_X_data)
np.save('/testing_data/noise_test_c14_128x128_labels_01.npy', new_y_data)

# Save A 64x64 Version Of All Slices For AutoKeras and Training

In [None]:
dataset_64 = new_X_data[:, 32:96, 32:96]


np.save('../testing_data/noise_test_c2_dataset_01.npy', dataset_64)
np.save('../testing_data/noise_test_c2_labels_01.npy', new_y_data)

np.save('../testing_data/noise_test_c3_dataset_01.npy', dataset_64)
np.save('../testing_data/noise_test_c3_labels_01.npy', new_y_data)

np.save('../testing_data/noise_test_c14_dataset_01.npy', dataset_64)
np.save('../testing_data/noise_test_c14_labels_01.npy', new_y_data)

np.save('../training_data/noise_train_dataset_01.npy', dataset_64)
np.save('/..training_data/noise_train_labels_01.npy', new_y_data)