# Data preprocessing and training

## Imports

In [26]:
from tracNet import TracNet
from data_preparation import matFiles_to_npArray, extract_fields, reshape
from training_and_evaluation import initialize_weights, fit

import matplotlib.pyplot as plt
import numpy as np
import torch

from datetime import datetime
from gc import collect
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchinfo import summary

Set seeds for reproducability.

In [14]:
random_seed = 1
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.benchmark = False

Use CUDA if available.

In [15]:
collect()
torch.cuda.empty_cache()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Running on device: {device}")

Running on device: cpu


## Data loading and preprocessing

Set paths to training.

In [16]:
# Martinsried
# dspl_path = '/home/alexrichard/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/dspl'
# dsplRadial_path = '/home/alexrichard/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/dsplRadial'
# trac_path = '/home/alexrichard/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/trac'
# tracRadial_path = '/home/alexrichard/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/tracRadial'

# Macbook
dspl_path = '/Users/alex/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/foo_dspl'
dsplRadial_path = '/Users/alex/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/foo_dsplRadial'
trac_path = '/Users/alex/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/foo_trac'
tracRadial_path = '/Users/alex/LRZ Sync+Share/ML in Physics/Repos/DL-TFM-main/train/trainData104/foo_tracRadial'

Create `ndarrays` of `dicts` containing either the inputs or targets.

In [17]:
samples = matFiles_to_npArray(dspl_path) # each dict has keys ['brdx', 'brdy', 'dspl', 'name']
dspl_radials = matFiles_to_npArray(dsplRadial_path) # each dict has keys ['dspl', 'name']
targets = matFiles_to_npArray(trac_path) # each dict has keys ['brdx', 'brdy', 'trac', 'name']
trac_radials = matFiles_to_npArray(tracRadial_path) # each dict has keys ['trac', 'name']

Split training data into train and validation set using stratified samples.

In [18]:
radial_X_train, radial_X_val, radial_y_train, radial_y_val = train_test_split(dspl_radials, trac_radials, test_size=0.05)
X_train, X_val, y_train, y_val = train_test_split(samples, targets, test_size=0.05)
X_train, X_val, y_train, y_val = np.append(radial_X_train, X_train), np.append(radial_X_val, X_val), np.append(radial_y_train, y_train), np.append(radial_y_val, y_val)

Extract displacement and traction fields from the data and drop (meta-) data which is not needed for training purposes.

In [19]:
X_train = extract_fields(X_train)
X_val = extract_fields(X_val)
y_train = extract_fields(y_train)
y_val = extract_fields(y_val)

Current shape of the datasets is (samples, width, height, depth). 
Reshape them to (samples, channels, depth, height, width) to allow 3D-Convolutions during training.

In [20]:
X_train = reshape(X_train)
X_val = reshape(X_val)
y_train = reshape(y_train)
y_val = reshape(y_val)

Convert datasets to Pytorch tensors.

In [21]:
X_train = torch.from_numpy(X_train).double()
X_val = torch.from_numpy(X_val).double()
y_train = torch.from_numpy(y_train).double()
y_val = torch.from_numpy(y_val).double()

Create Pytorch dataloaders, specify batch sizes and number of workers.

In [22]:
train_set = TensorDataset(X_train, y_train)
val_set = TensorDataset(X_val, y_val)

batch_size = 4

dataloaders = {}
dataloaders['train'] = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)
dataloaders['val'] = DataLoader(val_set, batch_size=2*batch_size, num_workers=8, pin_memory=True)



## Training

Define custom loss function corresponding to the forward loss function in the Matlab regression layer for image-to-image networks:
 
$${loss} = \frac{1}{2} \sum \limits _{p=1} ^{HWC} (t_{p} - y_{p})^{2}$$

In [24]:
class Custom_Loss(torch.nn.Module):
    def __init__(self):
        super(Custom_Loss, self).__init__();
    
    def forward(self, predictions, target):
        loss = 0.5 * torch.sum(torch.pow(target - predictions, 2))
        return loss

Instantiate the model (including logs for evaluation), the optimizer and train the model.

In [25]:
NAME = "TracNet104-{:%Y-%b-%d %H:%M:%S}".format(datetime.now())
writer = SummaryWriter(log_dir='logs/{}'.format(NAME))
model = TracNet(n_channels=1).double()
model.to(device)
model.apply(initialize_weights)

# To create a computional graph in Tensorboard, uncomment the following lines.
# inputs, targets = next(iter(dataloaders['train']))
# inputs = inputs.to(device)
# targets = targets.to(device)
# writer.add_graph(model, inputs)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0006, weight_decay=0.0005)
scheduler = StepLR(optimizer, step_size=10, gamma=0.7943, verbose=True)
loss_fn = Custom_Loss()

# fit(model, loss_fn, scheduler, dataloaders, optimizer, device, max_epochs=100, patience=5, writer, NAME)

Adjusting learning rate of group 0 to 6.0000e-04.
