### Training Data

In [1]:
import matplotlib.pyplot as plt

import numpy as np
import numpy
import torch
import time
import random
from timeit import default_timer as timer
from tqdm.autonotebook import tqdm

from mkidreadoutanalysis.quasiparticletimestream import QuasiparticleTimeStream
from data_engineering import make_dataset, plot_stream_data, train_step, test_step, make_predictions

  from tqdm.autonotebook import tqdm


In [2]:
# Generate the dataset

NO_PULSE_FRACTION = 0.2
NUM_SAMPLES = 1000 # This is approximate, the number of photons in the last iteration of the loop is Poisson distributied
QP_TIME_LENGTH = 0.01 # secs
SAMPLING_FREQ = 2e6 # Hz
FALL_TIME = 30
EDGE_PAD = FALL_TIME * 2
WINDOW_SIZE = 1000
RANDOM_SEED = 42


no_pulses = []
pulses = []

# Generate qp timestream object
qp_timestream = QuasiparticleTimeStream(SAMPLING_FREQ, QP_TIME_LENGTH)
qp_timestream.gen_quasiparticle_pulse(tf = FALL_TIME)

# Make the dataset
make_dataset(qp_timestream,
             NUM_SAMPLES,
             NO_PULSE_FRACTION,
             pulses,
             no_pulses,
             True,
             edge_padding=EDGE_PAD,
             window_size=WINDOW_SIZE)


Num samples with photons: 79/800.0



Num samples with photons: 100/800.0



Num samples with photons: 139/800.0



Num samples with photons: 165/800.0



Num samples with photons: 213/800.0



Num samples with photons: 216/800.0



Num samples with photons: 271/800.0



Num samples with photons: 273/800.0



Num samples with photons: 277/800.0



Num samples with photons: 300/800.0



Num samples with photons: 353/800.0



Num samples with photons: 401/800.0



Num samples with photons: 437/800.0



Num samples with photons: 447/800.0



Num samples with photons: 455/800.0



Num samples with photons: 469/800.0



Num samples with photons: 540/800.0



Num samples with photons: 570/800.0



Num samples with photons: 600/800.0



Num samples with photons: 606/800.0



Num samples with photons: 659/800.0



Num samples with photons: 670/800.0

More than 1 photon arriving per time step. Lower the count rate?


Num samples with photons: 695/800.0

More than 1 photon arriving per time step. Lower the count rate?


Num samples with photons: 712/800.0



Num samples with photons: 717/800.0



Num samples with photons: 800/800.0
Number of samples with pulses: 800
Number of samples without pulses: 200


In [18]:
# Now lets start splitting out the training samples and the label. In this model, we want scalar value for the label, not a time series
from random import shuffle

X = []
y = []

# Lets create one big list of the pulse and no pulse samples randomly shuffled together 
train_data = pulses + no_pulses
shuffle(train_data)

# Now lets separate the training samples (I/Q data) from the label data (photon arrival)
for element in train_data:
    X.append(element[0:2,:])
    y.append(np.array([element[2].sum()])) # Just need scalar here as tensor for the model

In [23]:
# With the training and label data now separated, lets start defining our training/testing metrics
# and split the dataset into train and test
TEST_RATIO = 0.2
BATCH_SIZE = 32

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=TEST_RATIO, # Ratio of test data to use from full dataset; Training is the complement
    random_state=RANDOM_SEED
)

# Now lets convert the lists to Tensors. Converting to np arrays first based on warning from torch
X_train = torch.Tensor(numpy.array(X_train))
X_test = torch.Tensor(numpy.array(X_test))
y_train = torch.Tensor(numpy.array(y_train))
y_test = torch.Tensor(numpy.array(y_test))

print(f'# of train samples: {len(X_train)}, # of test samples: {len(X_test)}')

# of train samples: 800, # of test samples: 200


In [24]:
# It's finally time to create our Dataloader objects
from torch.utils.data import TensorDataset, DataLoader

# Let's first convert from numpy arrays to Tensors and create datasets
train_dataset = TensorDataset(X_train,
                              y_train)
test_dataset = TensorDataset(X_test,
                             y_test)

train_dloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)
test_dloader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# Now lets inpect the objects.
print(f'Type: {type(train_dloader)}')
train_batch_img, train_batch_labels = next(iter(train_dloader))
print(f'Batch Img: {train_batch_img.shape}, Batch Labels: {train_batch_labels.shape}')

Type: <class 'torch.utils.data.dataloader.DataLoader'>
Batch Img: torch.Size([32, 2, 1000]), Batch Labels: torch.Size([32, 1])
