# Synthetic data creation

In [None]:
import numpy as np
from synthetic_data_creator import DataCreator

## Initialization of the inputs

In [None]:
# Defines the dimensions (height, width) of the 2D arrays representing the instances in the dataset.
resolution = (128, 128)

# Dictates the composition of the dataset, specifying the quantity of each class to be included.
data_structure = {
    'None': 50,
    'Pulse': 50,
    'BBRFI': 50,
    'NBRFI': 50,
    'Pulse_BBRFI': 50,
    'Pulse_NBRFI': 50,
    'BBRFI_NBRFI': 50,
    'Pulse_BBRFI_NBRFI': 50
}

# Contains the specifications and parameters for simulating different signal classes. It helps in customizing the characteristics of each signal class.
signal_detailes = {
    'Pulse': {
        'f_hi': 1.53, # GHz
        'f_lo': 1.21, # GHz
        'DM': 56.758, # Crab pulsar DM
        'n_channels': 128,
        't_resol': 0.0001024, # seconds
        'amplitude': 1, # amplitude
        'sigma': 2, # half-width of the pulse (befor scattering)
        'location': 20, # position of the pulse in a window (for the first frequency channel)
        'noise_amplitude': (0.05, 0.5)
    },
    'BBRFI': {
        'size_range': (0.2, 15),
        'location_range': (0, resolution[1]),
        'noise_amplitude': (0.05, 0.5)
    },
    'NBRFI': {
        'size_range': (0.2, 15),
        'location_range': (0, resolution[0]),
        'noise_amplitude': (0.05, 0.5)
    }
}

## Initialisation and creation of the dataset

In [None]:
# Creating an instance of DataCreator class with the predefined resolution, 
# data structure dictating the composition of the dataset, and signal details 
# dictating the parameters for simulating different signal classes.
creator = DataCreator(resolution, data_structure, signal_detailes)

In [None]:
# Initiating the process to create the dataset based on the given data structure and signal details.
# This will fill the dataset with instances of different classes as per the definitions in the DataCreator class.
creator.create_dataset()

# Ploting

In [None]:
# An example to plot one random spectrogram from the dataset 
creator.plot_images(creator.data, 1)

In [None]:
# An example to plot one random spectrogram from the dataset with a proper label
creator.plot_images(creator.data, 1, labels=creator.lablels)

In [None]:
# An example to plot three random spectrograms from the dataset with a proper label
creator.plot_images(creator.data, 3, labels=creator.lablels)

## Suffling the dataset

In [None]:
# An example to plot 25 random spectrograms from the first part of dataset with a proper label (all of them should be None)
creator.plot_images(creator.data[:50], 25, creator.lablels[:50])

In [None]:
# Shuffling the created dataset to ensure that the instances of different classes are randomly distributed.
creator.suffle_data()

In [None]:
# An example to plot 25 random spectrograms from the first part of dataset with a proper label (should contain different classes)
creator.plot_images(creator.data[:50], 25, creator.lablels[:50])

# Saving the dataset

In [None]:
# Saving the created, shuffled dataset with the name 'test_dataset' in the './saved_data/' directory.
creator.save_dataset('test_dataset', '../2_training/data/datasets/')

In [None]:
# Loading the saved dataset 'test_dataset.npy' from the './saved_data/' directory into the 'data' variable.
data = np.load('../2_training/data/datasets/test_dataset.npy')

In [None]:
# Checking whether the loaded 'data' array is equal to 'creator.data' array, returns True if they are equal, False otherwise.
np.array_equal(data, creator.data)

In [None]:
# Loading the labels corresponding to the instances in the dataset from the saved file into the 'labels' variable.
labels = np.load('../2_training/data/datasets/test_dataset_labels.npy')

In [None]:
# Comparing if the loaded 'labels' array is equal to the 'labels' attribute of the 'creator' object. 
# This will return True if both arrays have the same shape and elements, False otherwise.
np.array_equal(labels, creator.lablels)