# Create simualted datasets

For both the training and testing datasets.

The processing steps include:
- Domain randomisation (relrod, spot_spread)
- Multiple phases (6)
- Corruption of data (disabled for this example)
- Adding nose (S&P)
- Adding background intensity

In [1]:
# Packages
%matplotlib qt
import numpy as np
import hyperspy.api as hs
import pyxem as pxm
import diffpy.structure
from matplotlib import pyplot as plt
from tempfile import TemporaryFile
from diffsims.libraries.structure_library import StructureLibrary
from diffsims.generators.diffraction_generator import DiffractionGenerator
from diffsims.generators.library_generator import DiffractionLibraryGenerator, VectorLibraryGenerator
from pyxem.utils.sim_utils import sim_as_signal
import tqdm
import gc
import os



In [2]:
### Variables

# Paths
root = r'C:\Users\Sauron\Documents\jf631\SED_scripts'
root = r'C:\Users\Sauron\PycharmProjects\ml_pyxem\SED_scripts'
structures_path = os.path.join(root, 'nn_models/crystal_phases')
phase_files = ['p4mbm_tetragonal.cif',  'gratia_2h.cif', 'gratia_4h.cif', 'gratia_6h.cif', 'pbi2_2h.cif', 'pb.cif',]

# Calibration values

calibrations = [0.0056, 0.0058,]

# Processing values
n_angle_points = 600

# Domain amplification
simulated_direct_beam_bool = [False,]
relrod_list = [0.002, 0.02, 0.03]
spot_spread_list = [0.006, 0.014, 0.022]

# Simulation microscope values (for azimuthal integration)
detector_size = 515 #px
beam_energy = 200.0 #keV
wavelength = 2.5079e-12 #m
detector_pix_size = 55e-6 #m
from pyxem.detectors import Medipix515x515Detector
detector = Medipix515x515Detector()

# Data corruption
#corrupt_n_times = 2

# Noise addition values (do not change)
include_also_non_noisy_simulation = True
snrs = [0.9, 0.99]
intensity_spikes = [0.25,]

# Cropping and post-processing
cropping_start_k = 0.11 #k units
cropping_stop_k = 1.30 #k_units
cropped_signal_k_points = 147 # To rebin signal, if necessary (when using k_units)

cropping_start_px = 13.55 #pixels
cropping_stop_px = 160.55 #pixels
sqrt_signal = False


# Background parameterisation values (A: pre-exp factor, tau: decay time constant)
a_vals = [0.5, 1., 2., 5.]
tau_vals = [0.5, 1, 1.5]

In [3]:
val = n_angle_points * (len(phase_files) + 1)* len(relrod_list) * len(spot_spread_list) * len(snrs) * len(intensity_spikes)
print('Approx amount of 2D diffraction patterns that will be produced: {}'.format(val))
memory = detector_size**2 * val * 4 / 1e9  #4 bytes per float32 value
print('Approx memory needed: {} GB'.format(memory))

Approx amount of 2D diffraction patterns that will be produced: 75600
Approx memory needed: 80.20404 GB


# Rep 1

In [4]:
calibration = calibrations[0]

## Simulate data for each phase

In [5]:
phase_dict = {}
for phase in phase_files:
    name = phase.split(".")[0]
    phase_dict[name] = diffpy.structure.loadStructure(os.path.join(structures_path, phase))

print('n_phases = {}'.format(len(phase_dict)))

n_phases = 6


In [6]:
def get_random_euler(npoints):
    radius = 1
    np.random.seed(1)
    u = np.random.randint(-100,100+1,size=(npoints,))/100 
    u2 = 2*np.pi*np.random.random(size=(npoints,))
    theta = 2*np.pi*np.random.random(size=(npoints,))
    x = radius*np.sqrt(1-u**2)*np.cos(theta)
    y = radius*np.sqrt(1-u**2)*np.sin(theta)
    z = radius*u 
    phi = np.arccos(z/radius)
    eulerAlpha = u2
    eulerBeta = phi
    eulerGamma = theta
    return np.array([np.rad2deg(eulerAlpha),np.rad2deg(eulerBeta),np.rad2deg(eulerGamma)]).T 


def get_reciprocal_radius(detector_size, calibration):
    half_pattern_size = detector_size // 2
    reciprocal_radius = calibration * half_pattern_size
    return reciprocal_radius


def create_diffraction_library(phase_dict, euler_list,
                                       beam_energy, relrod_length,
                                       calibration, detector_size,
                                       with_direct_beam):

    phase_names = list(phase_dict.keys())
    phases = list(phase_dict.values())
    euler_list_n = [euler_list, ] * len(phase_names)

    sample_lib = StructureLibrary(phase_names, phases, euler_list_n)
    ediff = DiffractionGenerator(beam_energy, relrod_length)
    diff_gen = DiffractionLibraryGenerator(ediff)

    reciprocal_radius = get_reciprocal_radius(detector_size, calibration)
    library = diff_gen.get_diffraction_library(sample_lib,
                                               calibration=calibration,
                                               reciprocal_radius=reciprocal_radius,
                                               half_shape=(detector_size//2, detector_size//2),
                                               with_direct_beam=with_direct_beam)
    return library

In [7]:
%%capture
data = {}
for key, val in phase_dict.items():
    data[key] = []
for with_direct_beam in simulated_direct_beam_bool:
    for relrod_length in tqdm.tqdm(relrod_list):
        for spot_spread in spot_spread_list:

            euler_list = get_random_euler(n_angle_points)

            library = create_diffraction_library(phase_dict, euler_list,
                                                 beam_energy, relrod_length,
                                                 calibration, detector_size,
                                                 with_direct_beam)

            reciprocal_radius = get_reciprocal_radius(detector_size, calibration)
            for euler in euler_list:
                for phase in library.keys():
                    pattern = sim_as_signal(library.get_library_entry(phase=phase,
                                                                      angle=euler)['Sim'],
                                            detector_size, spot_spread, reciprocal_radius)

                    data[phase].append(pattern)

In [8]:
# Stack data
import dask.array as da

for i, value in enumerate(data.values()):
    list_data = da.from_array([x.data for x in value], chunks=(10, detector_size, detector_size))

    if i ==0:
        #list_data = np.expand_dims(list_data, 1)
        training_data = list_data
    else:
        #list_data = np.expand_dims(list_data, 1)
        training_data = da.vstack([training_data, list_data],)

del data
del library
del list_data
gc.collect()

shape = (len(phase_dict.keys()),
         n_angle_points*len(relrod_list)*len(spot_spread_list)*len(simulated_direct_beam_bool),
         detector_size,
         detector_size)

training_data = training_data.reshape(shape)
training_data = pxm.LazyElectronDiffraction2D(training_data)
training_data.set_diffraction_calibration(calibration)
print(training_data)

<LazyElectronDiffraction2D, title: , dimensions: (5400, 6|515, 515)>


## Recenter

In [9]:
shiftList = np.zeros((np.size(training_data.data,0),
                      np.size(training_data.data,1),
                      2,)
                     )

shiftList[:,:,0]=0.5
shiftList[:,:,1]=0.5

shiftList = shiftList.reshape(-1, shiftList.shape[-1]) # Flatten the 2D navigtion axis

training_data.compute()
training_data.align2D(shifts=shiftList,crop=False,fill_value=0., parallel=True)

#name = '2D_simulated_data_{}classes_{}neuler_domainrand_centered_{}cal.hspy'.format(np.size(training_data.data,0),  n_angle_points, calibration)
#training_data.save(os.path.join('2d_simulated_data', name))
print(training_data)



[########################################] | 100% Completed | 12.1s
<ElectronDiffraction2D, title: , dimensions: (5400, 6|515, 515)>


  0%|          | 0/32400 [00:00<?, ?it/s]

## Add background phase (without signal)

Create a blank detector in which noise and a bkg will be added.

In [10]:
# Add phase in the dictionary
phase_dict['bkg_phase'] = []

# Create blank datector
shape_blank = np.shape(training_data,)[1:]
shape_blank = (1,) + shape_blank
blank = pxm.signals.electron_diffraction2d.ElectronDiffraction2D(np.zeros(shape_blank))
training_data = hs.stack([training_data, blank], axis=1)

print(len(phase_dict))
training_data.data.shape


[########################################] | 100% Completed |  3.2s
7


(7, 5400, 515, 515)

## Add noise

In two steps:
- S&P noise
- Poisson noise

In [11]:
def add_noise_to_simulation(simulation_arr, snr, int_salt,):

    import numpy as np

    # Salt and pepper
    def addsalt_pepper(dp_arr, snr, int_min = 0, int_max = int_salt,):

        p0 = snr
        # Add noise
        size = np.shape(dp_arr)
        mask = np.random.choice(a=(0, 1, 2),
                                size=size,
                                p=[p0, (1 - p0) / 2., (1 - p0) / 2.])

        im = dp_arr.copy()
        #im[mask == 1] = int_min # salt noise
        im[mask == 2] = int_max # pepper noise

        return im

    # Add poisson noise on sp noise and normalise
    im = simulation_arr.copy()
    im += np.random.poisson(im)

    max = im.max()
    if max == 0:
        im = im
    else:
        im = im / im.max()

    # Add bright spots randomly accross detector
    im_sp = addsalt_pepper(im, snr,)

    return im_sp

In [12]:
# Map the noise addition function on signal

training_data_noisy = []

# Include the non-corrupted data in the dataset?
if include_also_non_noisy_simulation:
    training_data_noisy.append(training_data)

# Append noisy data
for snr in snrs:
    for int_spike in intensity_spikes:

        signal_noisy = training_data.map(add_noise_to_simulation,
                                         snr=snr, int_salt=int_spike,
                                         inplace=False, parallel=True)

        training_data_noisy.append(signal_noisy)

del training_data
del signal_noisy
gc.collect()

  0%|          | 0/37800 [00:00<?, ?it/s]

  0%|          | 0/37800 [00:00<?, ?it/s]

0

In [13]:
training_data_noisy = hs.stack(training_data_noisy, axis=0)
print(training_data_noisy)

[########################################] | 100% Completed |  2.3s
<ElectronDiffraction2D, title: , dimensions: (16200, 7|515, 515)>


## Integrate radially

In [14]:
camera_length = detector_pix_size / (wavelength * calibration * 1e10)
training_data_noisy.unit = "k_A^-1"
training_data_noisy.set_experimental_parameters(beam_energy=beam_energy)
radial_steps = int(np.ceil((int(detector_size/2) - 1)/2)*2)
training_data_1D = training_data_noisy.get_azimuthal_integral1d(npt_rad=radial_steps,
                                                          center=([detector_size/2,detector_size/2]),
                                                          detector=detector,
                                                          detector_dist=camera_length,
                                                          map_kwargs={'parallel':True})
print(training_data_1D)

del training_data_noisy
gc.collect()

[0, 12.775920463850575]
<ElectronDiffraction1D, title: , dimensions: (16200, 7|256)>


  0%|          | 0/113400 [00:00<?, ?it/s]

1805

## Normalise (and sqrt)

In [15]:
# Sqrt signal (if wanted)
if sqrt_signal:
    training_data_1D.data = np.sqrt(training_data_1D.data)

# Normalise
dpmax = training_data_1D.data.max(2)
training_data_1D_norm = training_data_1D.data/dpmax[:,:,np.newaxis]

# Correct any nan value
nan_mask = np.isnan(training_data_1D_norm)
training_data_1D_norm[nan_mask] = 0

print(training_data_1D_norm.shape)

(7, 16200, 256)


  import sys


## Add simulated background

Approximate background as a $A*exp^{(-tau \: q)}$ value.

In [16]:
def add_background_to_signal1d_array(normalised_1d_sim_data_array, x_axis,
                                     a_val, tau_val, bkg_function='exp_decay'):
    """
    :param normalised_1d_sim_data_array:
        The normalised 1d signal array (nav axis should be (points, phases, q))
    :param x_axis: array of the actual q values
        The A and tau values are optimised for 1/A-1 magnitude
    :return: extended signal with new sets of sim data without and with bakgrounds
    """
    def inv_q(x, A, tau):
        return A * x**(-tau)

    def exp_decay(x, A, tau):
        return A * np.exp(- tau * x)

    if bkg_function == 'exp_decay':
        bkg = exp_decay(x_axis, a_val, tau_val)
    elif bkg_function == 'inv_q':
        bkg = inv_q(x_axis, a_val, tau_val)

    s = normalised_1d_sim_data_array + bkg
    return s


# Expand datasets by copying and adding bkg
training_data_1D_norm_bkg = training_data_1D_norm

# Get the x-axis values from which to calculate bkg
qs = training_data_1D.axes_manager.signal_axes[0].axis

# Add bkg to signal
for a in a_vals:
    for tau in tau_vals:
        bkg_data = add_background_to_signal1d_array(training_data_1D_norm, qs, a, tau)
        training_data_1D_norm_bkg = np.hstack((training_data_1D_norm_bkg, bkg_data))

training_data_1D_norm_bkg.shape

(7, 210600, 256)

## Crop, rebin and renormalise

Crop both in terms of q (rebin but no shift) and pixel values (shift but no rebin).

In [17]:
training_data_1D_norm_bkg = hs.signals.Signal1D(training_data_1D_norm_bkg)

training_data_1D_px = training_data_1D_norm_bkg.deepcopy()

# Recreate .hspy object to crop with k units
scale = training_data_1D.axes_manager.signal_axes[0].scale
offset = training_data_1D.axes_manager.signal_axes[0].offset
training_data_1D_norm_bkg.axes_manager.signal_axes[0].scale = scale
training_data_1D_norm_bkg.axes_manager.signal_axes[0].offset = offset

training_data_1D_q = training_data_1D_norm_bkg.deepcopy()

del training_data_1D
del bkg_data
del training_data_1D_norm
del training_data_1D_norm_bkg
gc.collect()

# In k units:
# Crop in k units
training_data_1D_q.crop_signal1D(cropping_start_k, cropping_stop_k)
# Rebin
scale_rebin = training_data_1D_q.data.shape[-1] / cropped_signal_k_points
scale_rebin
training_data_1D_q = training_data_1D_q.rebin(scale=(1,1,scale_rebin))
# Renormalise data
dpmax = training_data_1D_q.data.max(-1)
training_data_1D_q = training_data_1D_q.data/dpmax[:,:,np.newaxis]

# In pixel units:
# Crop in pixel units
training_data_1D_px.crop_signal1D(cropping_start_px, cropping_stop_px)
# Renormalise data
dpmax = training_data_1D_px.data.max(-1)
training_data_1D_px = training_data_1D_px.data/dpmax[:,:,np.newaxis]

print(training_data_1D_q.shape)
print(training_data_1D_px.shape)



(7, 210600, 147)
(7, 210600, 147)


## NN requirements: reshape and labelling

In [18]:
training_data_1D_q = training_data_1D_q.reshape(-1, training_data_1D_q.shape[-1])
training_data_1D_px = training_data_1D_px.reshape(-1, training_data_1D_px.shape[-1])

print(training_data_1D_q.shape)
print(training_data_1D_px.shape)

(1474200, 147)
(1474200, 147)


In [19]:
# Create labels
n_phases = len(phase_dict)
labels = np.zeros((n_phases, int(training_data_1D_q.shape[0]/n_phases)))
for i in range(n_phases):
    labels[i,:] = i

training_labels = labels.flatten()
training_labels.shape

(1474200,)

In [20]:
# Check for outliers and nan values
where_nan_q = np.argwhere(np.isnan(training_data_1D_q))
where_nan_px = np.argwhere(np.isnan(training_data_1D_px))

training_data_1D_q = np.delete(training_data_1D_q, where_nan_q[:,0], axis = 0)
training_labels_q = np.delete(training_labels, where_nan_q[:,0], axis = 0)

training_data_1D_px = np.delete(training_data_1D_px, where_nan_px[:,0], axis = 0)
training_labels_px = np.delete(training_labels, where_nan_px[:,0], axis = 0)

print(training_data_1D_q.shape, training_labels_q.shape)
print(training_data_1D_px.shape, training_labels_px.shape)

(1468764, 147) (1468764,)
(1468743, 147) (1468743,)


In [21]:
phase_names = list(phase_dict.keys())

print(phase_names)
print(training_labels[-1])

['p4mbm_tetragonal', 'gratia_2h', 'gratia_4h', 'gratia_6h', 'pbi2_2h', 'pb', 'bkg_phase']
6.0


In [22]:
store_train_data = TemporaryFile()
x = training_data_1D_q
y = training_labels_q

np.savez('1D_simulated_data_cal{}_cropK_{}classesInclBkg_{}neuler_domainrand_noisy_bkg'.format(calibration,
                                                                              n_phases,
                                                                        n_angle_points,),
         x=x, y=y, phases=phase_names)

In [23]:
store_train_data = TemporaryFile()
x = training_data_1D_px
y = training_labels_px

np.savez('1D_simulated_data_cal{}_cropPX_{}classesInclBkg_{}neuler_domainrand_noisy_bkg'.format(calibration,
                                                                              n_phases,
                                                                        n_angle_points,),
         x=x, y=y, phases=phase_names)

In [24]:
i = 0
plt.figure()
plt.plot(training_data_1D_px[i], label='px')
plt.plot(training_data_1D_q[i], label='q')
plt.legend()

del training_data_1D_px
del training_data_1D_q
gc.collect()

3088

# Rep 2

In [25]:
calibration = calibrations[1]

## Simulate data for each phase

In [26]:
phase_dict = {}
for phase in phase_files:
    name = phase.split(".")[0]
    phase_dict[name] = diffpy.structure.loadStructure(os.path.join(structures_path, phase))

print('n_phases = {}'.format(len(phase_dict)))

n_phases = 6


In [27]:
def get_random_euler(npoints):
    radius = 1
    np.random.seed(1)
    u = np.random.randint(-100,100+1,size=(npoints,))/100
    u2 = 2*np.pi*np.random.random(size=(npoints,))
    theta = 2*np.pi*np.random.random(size=(npoints,))
    x = radius*np.sqrt(1-u**2)*np.cos(theta)
    y = radius*np.sqrt(1-u**2)*np.sin(theta)
    z = radius*u
    phi = np.arccos(z/radius)
    eulerAlpha = u2
    eulerBeta = phi
    eulerGamma = theta
    return np.array([np.rad2deg(eulerAlpha),np.rad2deg(eulerBeta),np.rad2deg(eulerGamma)]).T


def get_reciprocal_radius(detector_size, calibration):
    half_pattern_size = detector_size // 2
    reciprocal_radius = calibration * half_pattern_size
    return reciprocal_radius


def create_diffraction_library(phase_dict, euler_list,
                                       beam_energy, relrod_length,
                                       calibration, detector_size,
                                       with_direct_beam):

    phase_names = list(phase_dict.keys())
    phases = list(phase_dict.values())
    euler_list_n = [euler_list, ] * len(phase_names)

    sample_lib = StructureLibrary(phase_names, phases, euler_list_n)
    ediff = DiffractionGenerator(beam_energy, relrod_length)
    diff_gen = DiffractionLibraryGenerator(ediff)

    reciprocal_radius = get_reciprocal_radius(detector_size, calibration)
    library = diff_gen.get_diffraction_library(sample_lib,
                                               calibration=calibration,
                                               reciprocal_radius=reciprocal_radius,
                                               half_shape=(detector_size//2, detector_size//2),
                                               with_direct_beam=with_direct_beam)
    return library

In [28]:
%%capture
data = {}
for key, val in phase_dict.items():
    data[key] = []
for with_direct_beam in simulated_direct_beam_bool:
    for relrod_length in tqdm.tqdm(relrod_list):
        for spot_spread in spot_spread_list:

            euler_list = get_random_euler(n_angle_points)

            library = create_diffraction_library(phase_dict, euler_list,
                                                 beam_energy, relrod_length,
                                                 calibration, detector_size,
                                                 with_direct_beam)

            reciprocal_radius = get_reciprocal_radius(detector_size, calibration)
            for euler in euler_list:
                for phase in library.keys():
                    pattern = sim_as_signal(library.get_library_entry(phase=phase,
                                                                      angle=euler)['Sim'],
                                            detector_size, spot_spread, reciprocal_radius)

                    data[phase].append(pattern)

In [29]:
# Stack data
import dask.array as da

for i, value in enumerate(data.values()):
    list_data = da.from_array([x.data for x in value], chunks=(10, detector_size, detector_size))

    if i ==0:
        #list_data = np.expand_dims(list_data, 1)
        training_data = list_data
    else:
        #list_data = np.expand_dims(list_data, 1)
        training_data = da.vstack([training_data, list_data],)

del data
del library
del list_data
gc.collect()

shape = (len(phase_dict.keys()),
         n_angle_points*len(relrod_list)*len(spot_spread_list)*len(simulated_direct_beam_bool),
         detector_size,
         detector_size)

training_data = training_data.reshape(shape)
training_data = pxm.LazyElectronDiffraction2D(training_data)
training_data.set_diffraction_calibration(calibration)
print(training_data)

<LazyElectronDiffraction2D, title: , dimensions: (5400, 6|515, 515)>


## Recenter

In [30]:
shiftList = np.zeros((np.size(training_data.data,0),
                      np.size(training_data.data,1),
                      2,)
                     )

shiftList[:,:,0]=0.5
shiftList[:,:,1]=0.5

shiftList = shiftList.reshape(-1, shiftList.shape[-1]) # Flatten the 2D navigtion axis

training_data.compute()
training_data.align2D(shifts=shiftList,crop=False,fill_value=0., parallel=True)

#name = '2D_simulated_data_{}classes_{}neuler_domainrand_centered_{}cal.hspy'.format(np.size(training_data.data,0),  n_angle_points, calibration)
#training_data.save(os.path.join('2d_simulated_data', name))
print(training_data)



[########################################] | 100% Completed | 12.0s
<ElectronDiffraction2D, title: , dimensions: (5400, 6|515, 515)>


  0%|          | 0/32400 [00:00<?, ?it/s]

## Add background phase (without signal)

Create a blank detector in which noise and a bkg will be added.

In [31]:
# Add phase in the dictionary
phase_dict['bkg_phase'] = []

# Create blank datector
shape_blank = np.shape(training_data,)[1:]
shape_blank = (1,) + shape_blank
blank = pxm.signals.electron_diffraction2d.ElectronDiffraction2D(np.zeros(shape_blank))
training_data = hs.stack([training_data, blank], axis=1)

print(len(phase_dict))
training_data.data.shape


[########################################] | 100% Completed |  3.3s
7


(7, 5400, 515, 515)

## Add noise

In two steps:
- S&P noise
- Poisson noise

In [32]:
def add_noise_to_simulation(simulation_arr, snr, int_salt,):

    import numpy as np

    # Salt and pepper
    def addsalt_pepper(dp_arr, snr, int_min = 0, int_max = int_salt,):

        p0 = snr
        # Add noise
        size = np.shape(dp_arr)
        mask = np.random.choice(a=(0, 1, 2),
                                size=size,
                                p=[p0, (1 - p0) / 2., (1 - p0) / 2.])

        im = dp_arr.copy()
        #im[mask == 1] = int_min # salt noise
        im[mask == 2] = int_max # pepper noise

        return im

    # Add poisson noise on sp noise and normalise
    im = simulation_arr.copy()
    im += np.random.poisson(im)

    max = im.max()
    if max == 0:
        im = im
    else:
        im = im / im.max()

    # Add bright spots randomly accross detector
    im_sp = addsalt_pepper(im, snr,)

    return im_sp

In [33]:
# Map the noise addition function on signal

training_data_noisy = []

# Include the non-corrupted data in the dataset?
if include_also_non_noisy_simulation:
    training_data_noisy.append(training_data)

# Append noisy data
for snr in snrs:
    for int_spike in intensity_spikes:

        signal_noisy = training_data.map(add_noise_to_simulation,
                                         snr=snr, int_salt=int_spike,
                                         inplace=False, parallel=True)

        training_data_noisy.append(signal_noisy)

del training_data
del signal_noisy
gc.collect()

  0%|          | 0/37800 [00:00<?, ?it/s]

  0%|          | 0/37800 [00:00<?, ?it/s]

0

In [34]:
training_data_noisy = hs.stack(training_data_noisy, axis=0)
print(training_data_noisy)

[########################################] | 100% Completed |  2.2s
<ElectronDiffraction2D, title: , dimensions: (16200, 7|515, 515)>


## Integrate radially

In [35]:
camera_length = detector_pix_size / (wavelength * calibration * 1e10)
training_data_noisy.unit = "k_A^-1"
training_data_noisy.set_experimental_parameters(beam_energy=beam_energy)
radial_steps = int(np.ceil((int(detector_size/2) - 1)/2)*2)
training_data_1D = training_data_noisy.get_azimuthal_integral1d(npt_rad=radial_steps,
                                                          center=([detector_size/2,detector_size/2]),
                                                          detector=detector,
                                                          detector_dist=camera_length,
                                                          map_kwargs={'parallel':True})
print(training_data_1D)

del training_data_noisy
gc.collect()

[0, 13.23126531791953]
<ElectronDiffraction1D, title: , dimensions: (16200, 7|256)>


  0%|          | 0/113400 [00:00<?, ?it/s]

1805

## Normalise (and sqrt)

In [36]:
# Sqrt signal (if wanted)
if sqrt_signal:
    training_data_1D.data = np.sqrt(training_data_1D.data)

# Normalise
dpmax = training_data_1D.data.max(2)
training_data_1D_norm = training_data_1D.data/dpmax[:,:,np.newaxis]

# Correct any nan value
nan_mask = np.isnan(training_data_1D_norm)
training_data_1D_norm[nan_mask] = 0

print(training_data_1D_norm.shape)

(7, 16200, 256)


  import sys


## Add simulated background

Approximate background as a $A*exp^{(-tau \: q)}$ value.

In [37]:
def add_background_to_signal1d_array(normalised_1d_sim_data_array, x_axis,
                                     a_val, tau_val, bkg_function='exp_decay'):
    """
    :param normalised_1d_sim_data_array:
        The normalised 1d signal array (nav axis should be (points, phases, q))
    :param x_axis: array of the actual q values
        The A and tau values are optimised for 1/A-1 magnitude
    :return: extended signal with new sets of sim data without and with bakgrounds
    """
    def inv_q(x, A, tau):
        return A * x**(-tau)

    def exp_decay(x, A, tau):
        return A * np.exp(- tau * x)

    if bkg_function == 'exp_decay':
        bkg = exp_decay(x_axis, a_val, tau_val)
    elif bkg_function == 'inv_q':
        bkg = inv_q(x_axis, a_val, tau_val)

    s = normalised_1d_sim_data_array + bkg
    return s


# Expand datasets by copying and adding bkg
training_data_1D_norm_bkg = training_data_1D_norm

# Get the x-axis values from which to calculate bkg
qs = training_data_1D.axes_manager.signal_axes[0].axis

# Add bkg to signal
for a in a_vals:
    for tau in tau_vals:
        bkg_data = add_background_to_signal1d_array(training_data_1D_norm, qs, a, tau)
        training_data_1D_norm_bkg = np.hstack((training_data_1D_norm_bkg, bkg_data))

training_data_1D_norm_bkg.shape

(7, 210600, 256)

## Crop, rebin and renormalise

Crop both in terms of q (rebin but no shift) and pixel values (shift but no rebin).

In [38]:
training_data_1D_norm_bkg = hs.signals.Signal1D(training_data_1D_norm_bkg)

training_data_1D_px = training_data_1D_norm_bkg.deepcopy()

# Recreate .hspy object to crop with k units
scale = training_data_1D.axes_manager.signal_axes[0].scale
offset = training_data_1D.axes_manager.signal_axes[0].offset
training_data_1D_norm_bkg.axes_manager.signal_axes[0].scale = scale
training_data_1D_norm_bkg.axes_manager.signal_axes[0].offset = offset

training_data_1D_q = training_data_1D_norm_bkg.deepcopy()

del training_data_1D
del bkg_data
del training_data_1D_norm
del training_data_1D_norm_bkg
gc.collect()

# In k units:
# Crop in k units
training_data_1D_q.crop_signal1D(cropping_start_k, cropping_stop_k)
# Rebin
scale_rebin = training_data_1D_q.data.shape[-1] / cropped_signal_k_points
scale_rebin
training_data_1D_q = training_data_1D_q.rebin(scale=(1,1,scale_rebin))
# Renormalise data
dpmax = training_data_1D_q.data.max(-1)
training_data_1D_q = training_data_1D_q.data/dpmax[:,:,np.newaxis]

# In pixel units:
# Crop in pixel units
training_data_1D_px.crop_signal1D(cropping_start_px, cropping_stop_px)
# Renormalise data
dpmax = training_data_1D_px.data.max(-1)
training_data_1D_px = training_data_1D_px.data/dpmax[:,:,np.newaxis]

print(training_data_1D_q.shape)
print(training_data_1D_px.shape)



(7, 210600, 147)
(7, 210600, 147)


## NN requirements: reshape and labelling

In [39]:
training_data_1D_q = training_data_1D_q.reshape(-1, training_data_1D_q.shape[-1])
training_data_1D_px = training_data_1D_px.reshape(-1, training_data_1D_px.shape[-1])

print(training_data_1D_q.shape)
print(training_data_1D_px.shape)

(1474200, 147)
(1474200, 147)


In [40]:
# Create labels
n_phases = len(phase_dict)
labels = np.zeros((n_phases, int(training_data_1D_q.shape[0]/n_phases)))
for i in range(n_phases):
    labels[i,:] = i

training_labels = labels.flatten()
training_labels.shape

(1474200,)

In [41]:
# Check for outliers and nan values
where_nan_q = np.argwhere(np.isnan(training_data_1D_q))
where_nan_px = np.argwhere(np.isnan(training_data_1D_px))

training_data_1D_q = np.delete(training_data_1D_q, where_nan_q[:,0], axis = 0)
training_labels_q = np.delete(training_labels, where_nan_q[:,0], axis = 0)

training_data_1D_px = np.delete(training_data_1D_px, where_nan_px[:,0], axis = 0)
training_labels_px = np.delete(training_labels, where_nan_px[:,0], axis = 0)

print(training_data_1D_q.shape, training_labels_q.shape)
print(training_data_1D_px.shape, training_labels_px.shape)

(1468764, 147) (1468764,)
(1468767, 147) (1468767,)


In [42]:
phase_names = list(phase_dict.keys())

print(phase_names)
print(training_labels[-1])

['p4mbm_tetragonal', 'gratia_2h', 'gratia_4h', 'gratia_6h', 'pbi2_2h', 'pb', 'bkg_phase']
6.0


In [43]:
store_train_data = TemporaryFile()
x = training_data_1D_q
y = training_labels_q

np.savez('1D_simulated_data_cal{}_cropK_{}classesInclBkg_{}neuler_domainrand_noisy_bkg'.format(calibration,
                                                                              n_phases,
                                                                        n_angle_points,),
         x=x, y=y, phases=phase_names)

In [44]:
store_train_data = TemporaryFile()
x = training_data_1D_px
y = training_labels_px

np.savez('1D_simulated_data_cal{}_cropPX_{}classesInclBkg_{}neuler_domainrand_noisy_bkg'.format(calibration,
                                                                              n_phases,
                                                                        n_angle_points,),
         x=x, y=y, phases=phase_names)

In [45]:
i = 0
plt.figure()
plt.plot(training_data_1D_px[i], label='px')
plt.plot(training_data_1D_q[i], label='q')
plt.legend()

del training_data_1D_px
del training_data_1D_q
gc.collect()




3705