# Torch Tests
This notebook is for tests of Data Loaders and Transformations of PyTorch
before we start training

In [1]:
import utils as ut
import torch_utils as tu
import numpy as np
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
sensor1 = ut.Sensor(40, 0.0, 0.2)
sensor2 = ut.Sensor(80, 0.1, 0.1)
sensor3 = ut.Sensor(80, 0.0, 0.3)
sensor4 = ut.Sensor(20, 0.0, 0.1)
sensors = [sensor1, sensor2, sensor3, sensor4]
dataset = tu.BadSensorsDataset(sensors,200, 20, jiggle_offsets=None)

# if everything worked right we now have 4 * 20 samples a 200 timepoints in 100Hz resolution
assert len(dataset) == 4 * 20
assert len(dataset[0]) == 200

data with only two columns is assumed to have no missing value field, just return the whole index
data with only two columns is assumed to have no missing value field, just return the whole index
data with only two columns is assumed to have no missing value field, just return the whole index
data with only two columns is assumed to have no missing value field, just return the whole index


In [3]:
# let's see if iteration works correctly:

last_samle = dataset[0]
for i in range(1, 4 * 20):
    sample = dataset[i]
    # now we assume each sample to be quite different so:
    diff = np.abs(last_samle - sample).sum()
    if diff < 0.01:
        print("unrealistic small change among different samples at ", i)

In [4]:
# we have a special case for transforms for contrastive learning:
# we want to apply different transforms on the same section to make it invariant
# for it

transform_options = [
    transforms.Compose([tu.AblateBlock(5,30), tu.ToTensor()]),
    transforms.Compose([tu.AddNoise((-0.1, 0.1), (0.0, 0.3)), tu.ToTensor()]),
    transforms.Compose([tu.RandomDownsample(), tu.ToTensor()])
]
trsfm = transforms.RandomChoice(transform_options)

dataset = tu.BadSensorsDataset(sensors,200, 20, jiggle_offsets=20, transform=trsfm, return_two_transforms=True)

dataloader = DataLoader(dataset, 10, shuffle=True)
for i_batch, sample_batched in enumerate(dataloader):
    print(i_batch, len(sample_batched), len(sample_batched[0]), sample_batched[0][0].size())
    s1, s2 = sample_batched
    d1 = (s1[0]-s2[0]).abs().sum().numpy()
    d2 = (s1[0]-s1[5]).abs().sum().numpy()
    if d1 > d2:
        print('strange, two augmentations of same sample have larger difference then different samples')

data with only two columns is assumed to have no missing value field, just return the whole index
data with only two columns is assumed to have no missing value field, just return the whole index
data with only two columns is assumed to have no missing value field, just return the whole index
data with only two columns is assumed to have no missing value field, just return the whole index
0 2 10 torch.Size([200, 3])
1 2 10 torch.Size([200, 3])
2 2 10 torch.Size([200, 3])
3 2 10 torch.Size([200, 3])
4 2 10 torch.Size([200, 3])
5 2 10 torch.Size([200, 3])
6 2 10 torch.Size([200, 3])
7 2 10 torch.Size([200, 3])


In [12]:
batch_size = 4
valid_size = 0.2
test_size = 0.1
num_workers = 2

num_train = len(dataloader)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(test_size * num_train))
train_and_valid_idx, test_idx = indices[split:], indices[:split]
num_train = len(train_and_valid_idx)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = train_and_valid_idx[split:], train_and_valid_idx[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
test_sampeler = SubsetRandomSampler(test_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = DataLoader(dataset, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = DataLoader(dataset, batch_size=batch_size,
    sampler=valid_sampler, num_workers=num_workers)
test_loader = DataLoader(dataset, batch_size=batch_size,
    sampler=test_sampeler, num_workers=num_workers)

# TODO: add train, test, val split (random indexes)
# TODO: add code from https://github.com/Spijkervet/SimCLR
# TODO: add notebooks: Problem Setup/Description

In [13]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is not available.  Training on CPU ...


In [20]:
n_epochs = 10

for epoch in range(1, n_epochs+1):
    for i_batch, (sk1, sk2) in enumerate(train_loader):
        print(i_batch, len(sk1), len(sk2), sk2[0].size())
        d1 = (sk1[0]-sk2[0]).abs().sum().numpy()
        d2 = (sk1[0]-sk1[2]).abs().sum().numpy()
        if d1 > d2:
            print('strange, two augmentations of same sample have larger difference then different samples')


0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
0 4 4 torch.Size([200, 3])
1 3 3 torch.Size([200, 3])
