### Training Data

In [None]:
import matplotlib.pyplot as plt

import numpy as np
import numpy
import torch
import time
from timeit import default_timer as timer
import random
from tqdm.autonotebook import tqdm

from mkidreadoutanalysis.quasiparticletimestream import QuasiparticleTimeStream
from training import train_step, test_step, make_predictions
from eval import accuracy_regression, plot_stream_data

In [None]:
# Set the parameters for the run

NO_PULSE_FRACTION = 0.0
NUM_SAMPLES = 10000 # This is approximate, the number of photons in the last iteration of the loop is Poisson distributied
QP_TIME_LENGTH = 0.01 # secs
SAMPLING_FREQ = 2e6 # Hz
FALL_TIME = 30
EDGE_PAD = FALL_TIME * 2
WINDOW_SIZE = 1000
RANDOM_SEED = 42

no_pulses = []
pulses = []

In [None]:
# Load the dataset

pulse_list = np.load('data/models/conv_reg/pulses_num10000_win1000_pad60.npz')
pulses = list(pulse_list['pulses'])
print(len(pulses))

In [None]:
# Now lets start splitting out the training samples and the label. In this model, we want scalar value for the label, not a time series
X = []
y = []

# Lets create one big list of the pulse and no pulse samples randomly shuffled together 
train_data = pulses + no_pulses
random.shuffle(train_data)

# Now lets separate the training samples (I/Q data) from the label data (photon arrival)
for element in train_data:
    X.append(element[0:2,:])
    y.append(np.argwhere(element[2] == 1) / 1000)

In [None]:
# With the training and label data now separated, lets start defining our training/testing metrics
# and split the dataset into train and test
TEST_RATIO = 0.2
BATCH_SIZE = 32

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=TEST_RATIO, # Ratio of test data to use from full dataset; Training is the complement
    random_state=RANDOM_SEED
)

# Now lets convert the lists to Tensors. Converting to np arrays first based on warning from torch
X_train = torch.Tensor(numpy.array(X_train))
X_test = torch.Tensor(numpy.array(X_test))
y_train = torch.Tensor(numpy.array(y_train))
y_test = torch.Tensor(numpy.array(y_test))

print(f'# of train samples: {len(X_train)}, # of test samples: {len(X_test)}')

In [None]:
# It's finally time to create our Dataloader objects
from torch.utils.data import TensorDataset, DataLoader

# Let's first convert from numpy arrays to Tensors and create datasets
train_dataset = TensorDataset(X_train,
                              y_train)
test_dataset = TensorDataset(X_test,
                             y_test)

train_dloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)
test_dloader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# Now lets inpect the objects.
print(f'Type: {type(train_dloader)}')
train_batch_img, train_batch_labels = next(iter(train_dloader))
print(f'Batch Img: {train_batch_img.shape}, Batch Labels: {train_batch_labels.shape}')

### Model Definition

In [None]:
from models import ConvRegv1

In [None]:
# Lets create a model instance, loss, and optimizer
torch.manual_seed(95)

# Adding device agnostic code
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
device

conv_reg_v1 = ConvRegv1(in_channels=2)
optimizer = torch.optim.SGD(params=conv_reg_v1.parameters(), lr=0.1)
loss_fn = torch.nn.L1Loss(reduction='mean')# 'mean' reduction takes all the loss values from the batch and averages them to get the loss

In [None]:
# Now lets give the train/test loop a try!

# Now lets create a quick little function that gives the run time of the loop
total_time = lambda start_time, stop_time: stop_time - start_time

EPOCHS = 5
train_time_cnn_start = timer()
for epoch in tqdm(range(EPOCHS)):
    print(f'Epoch: {epoch}\n-----------')
    train_step(
        conv_reg_v1,
        train_dloader,
        loss_fn,
        optimizer,
        accuracy_regression,
        device
    )
    test_step(
        conv_reg_v1,
        test_dloader,
        loss_fn,
        accuracy_regression,
        device
    )
train_time_cnn_end = timer()
print(f'Total time to train: {total_time(train_time_cnn_start, train_time_cnn_end):.2f}s')

In [None]:
# Pick n random samples/labels from the test data and plot them
test_samples = []
test_labels = []

for sample, label in random.sample(list(test_dataset), k=15): # random.sample samples k elements from the given population without replacement; returns list of samples.
    test_samples.append(sample.to(device))
    test_labels.append(label.to(device))

print(f'Test Sample Shape: {test_samples[0].shape}, Test Label Shape: {test_labels[0].shape}')
preds = make_predictions(conv_reg_v1, [x.unsqueeze(dim=0) for x in test_samples]) # returns a tensor
print(f'Preds shape {preds[0].shape}')



In [None]:
for i in range(10):
    index = (preds[i] * 1000).int()
    pred_stream = torch.zeros_like(test_samples[0][0])
    pred_stream[index] = 1
    plot_stream_data(test_samples[i][0].to('cpu').numpy(),
                     test_samples[i][1].to('cpu').numpy(),
                     pred_stream.to('cpu').numpy(),
                     units='us')