## Load Libraries

In [1]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as utils

from functools import partial as func_partial
from functools import reduce as func_reduce
from operator import mul as op_mul
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from os import cpu_count, path
from time import strftime

In [2]:
# Show number of avlaible CPU threads
# With mulithreading this number is twice the number of physical cores
cpu_av = cpu_count()
print("Number of available CPU's: {}".format(cpu_av))

Number of available CPU's: 12


In [3]:
# Set the number CPUS that should be used per trial and dataloader
# If set to 1 number of cucurrent training networking is equal to this number
# In case of training with GPU this will be limited to number of models training simultaneously on GPU
# So number of CPU threads for each trial can be increased 
cpus_per_trial = 1
gpus_per_trial = 0

## Load data from files

In [4]:
# Needs reworking opening and closing the file too many times with multithreading
# which leads to crashing
# if clusters were saved as seperate images it would make sense to load them
class ClusterDataset_Partial(utils.Dataset):
    """Cluster dataset."""

    def __init__(self, npz_file, arrsize=20):
        """
        Args:
            npz_file (string): Path to the npz file.
        """
        self.data = np.load(npz_file, allow_pickle=True)
        self.arrsize = arrsize
        

    def __len__(self):
        return self.data["Size"]
    
    def __ReconstructCluster(self, ncell, modnum, row, col, cdata):
        _row = row.copy()
        _col = col.copy()       
        if not np.all( modnum[0] == modnum[:ncell]):
            ModNumDif = modnum - np.min(modnum[:ncell])
            mask = np.where(ModNumDif == 1)
            _col[mask] += 48
            mask = np.where(ModNumDif == 2)
            _row[mask] += 24
            mask = np.where(ModNumDif == 3)
            _row[mask] += 24
            _col[mask] += 48
        
        arr = np.zeros(( self.arrsize, self.arrsize ), dtype=np.float32)
  
        col_min = np.min(_col[:ncell])
        row_min = np.min(_row[:ncell])
        width = np.max(_col[:ncell]) - col_min
        height = np.max(_row[:ncell]) - row_min
        offset_h = int((self.arrsize-height)/2)
        offset_w = int((self.arrsize-width)/2)
        
        for i in range(ncell):
            arr[ _row[i] - row_min + offset_h, _col[i] - col_min + offset_w ] = cdata[i]

        return arr
    
    def __GetClusters(self, ncell, modnum, row, col, energy, timing):
        
        cluster_e = self.__ReconstructCluster(ncell, modnum, row, col, energy)
        cluster_t = self.__ReconstructCluster(ncell, modnum, row, col, timing)

        return np.stack([cluster_e, cluster_t], axis=1)
    
    def __ChangePID(self, PID):
        if (PID != 111) & (PID != 221):
            PID = np.int16(0)
        if PID == 111:
            PID = np.int16(1)
        if PID == 221:
            PID = np.int16(2)
        return PID

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        ClusterN = self.data['ClusterN'][idx]
        Cluster = self.data['Cluster'][idx]
        ClusterTiming = self.data['ClusterTiming'][idx]
        ClusterType = self.data['ClusterType'][idx]
        ClusterE = self.data['ClusterE'][idx]
        ClusterPt = self.data['ClusterPt'][idx]
        ClusterModuleNumber = self.data['ClusterModuleNumber'][idx]
        ClusterCol = self.data['ClusterCol'][idx]
        ClusterRow = self.data['ClusterRow'][idx]
        ClusterM02 = self.data['ClusterM02'][idx]
        ClusterM20 = self.data['ClusterM20'][idx]
        ClusterDistFromVert = self.data['ClusterDistFromVert'][idx]
        PartE = self.data['PartE'][idx]
        PartPt = self.data['PartPt'][idx]
        PartEta = self.data['PartEta'][idx]
        PartPhi = self.data['PartPhi'][idx]
        PartIsPrimary = self.data['PartIsPrimary'][idx]
        PartPID = self.data['PartPID'][idx]
       
        PartPID = self.__ChangePID(PartPID)
        
        img = self.__GetClusters(ClusterN, ClusterModuleNumber, ClusterRow, ClusterCol, Cluster, ClusterTiming)
        img = torch.from_numpy(img)
        
        features = { "ClusterType" : ClusterType, "ClusterE" : ClusterE, "ClusterPt" : ClusterPt
                    , "ClusterM02" : ClusterM02, "ClusterM20" : ClusterM20 , "ClusterDistFromVert" : ClusterDistFromVert}
        labels = { "PartE" : PartE, "PartPt" : PartPt, "PartEta" : PartEta, "PartPhi" : PartPhi
                  , "PartIsPrimary" : PartIsPrimary, "PartPID" : PartPID }
        
        return img, features, labels

In [5]:
class ClusterDataset_Full(utils.Dataset):
    """Cluster dataset."""

    def __init__(self, npz_file, arrsize=20):
        """
        Args:
            npz_file (string): Path to the npz file.
        """
        self.data = np.load(npz_file, allow_pickle=True)
        self.arrsize = arrsize
        self.ClusterN = self.data['ClusterN']
        self.Cluster = self.data['Cluster']
        self.ClusterTiming = self.data['ClusterTiming']
        self.ClusterType = self.data['ClusterType']
        self.ClusterE = self.data['ClusterE']
        self.ClusterPt = self.data['ClusterPt']
        self.ClusterModuleNumber = self.data['ClusterModuleNumber']
        self.ClusterCol = self.data['ClusterCol']
        self.ClusterRow = self.data['ClusterRow']
        self.ClusterM02 = self.data['ClusterM02']
        self.ClusterM20 = self.data['ClusterM20']
        self.ClusterDistFromVert = self.data['ClusterDistFromVert']
        self.PartE = self.data['PartE']
        self.PartPt = self.data['PartPt']
        self.PartEta = self.data['PartEta']
        self.PartPhi = self.data['PartPhi']
        self.PartIsPrimary = self.data['PartIsPrimary']
        self.PartPID = self.data['PartPID']

    def __len__(self):
        return self.data["Size"]
    
    def __ReconstructCluster(self, ncell, modnum, row, col, cdata):
        _row = row.copy()
        _col = col.copy()       
        if not np.all( modnum[0] == modnum[:ncell]):
            ModNumDif = modnum - np.min(modnum[:ncell])
            mask = np.where(ModNumDif == 1)
            _col[mask] += 48
            mask = np.where(ModNumDif == 2)
            _row[mask] += 24
            mask = np.where(ModNumDif == 3)
            _row[mask] += 24
            _col[mask] += 48
        
        arr = np.zeros(( self.arrsize, self.arrsize ), dtype=np.float32)
  
        col_min = np.min(_col[:ncell])
        row_min = np.min(_row[:ncell])
        width = np.max(_col[:ncell]) - col_min
        height = np.max(_row[:ncell]) - row_min
        offset_h = int((self.arrsize-height)/2)
        offset_w = int((self.arrsize-width)/2)
        
        for i in range(ncell):
            arr[ _row[i] - row_min + offset_h, _col[i] - col_min + offset_w ] = cdata[i]
        return arr
    
    def __GetClusters(self, ncell, modnum, row, col, energy, timing):       
        cluster_e = self.__ReconstructCluster(ncell, modnum, row, col, energy)
        cluster_t = self.__ReconstructCluster(ncell, modnum, row, col, timing)
        return np.stack([cluster_e, cluster_t], axis=0)
    
    def __ChangePID(self, PID):
        if (PID != 111) & (PID != 221):
            PID = np.int16(0)
        if PID == 111:
            PID = np.int16(1)
        if PID == 221:
            PID = np.int16(2)
        return PID

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        ClusterN = self.ClusterN[idx]
        Cluster = self.Cluster[idx]
        ClusterTiming = self.ClusterTiming[idx]
        ClusterType = self.ClusterType[idx]
        ClusterE = self.ClusterE[idx]
        ClusterPt = self.ClusterPt[idx]
        ClusterModuleNumber = self.ClusterModuleNumber[idx]
        ClusterCol = self.ClusterCol[idx]
        ClusterRow = self.ClusterRow[idx]
        ClusterM02 = self.ClusterM02[idx]
        ClusterM20 = self.ClusterM20[idx]
        ClusterDistFromVert = self.ClusterDistFromVert[idx]
        PartE = self.PartE[idx]
        PartPt = self.PartPt[idx]
        PartEta = self.PartEta[idx]
        PartPhi = self.PartPhi[idx]
        PartIsPrimary = self.PartIsPrimary[idx]
        PartPID = self.PartPID[idx]
        
        PartPID = self.__ChangePID(PartPID)
        
        img = self.__GetClusters(ClusterN, ClusterModuleNumber, ClusterRow, ClusterCol, Cluster, ClusterTiming)
        #img = torch.from_numpy(img)
        
        features = { "ClusterType" : ClusterType, "ClusterE" : ClusterE, "ClusterPt" : ClusterPt
                    , "ClusterM02" : ClusterM02, "ClusterM20" : ClusterM20 , "ClusterDist" : ClusterDistFromVert}
        labels = { "PartE" : PartE, "PartPt" : PartPt, "PartEta" : PartEta, "PartPhi" : PartPhi
                  , "PartIsPrimary" : PartIsPrimary, "PartPID" : PartPID }
        
        return (img, features, labels)

In [6]:
def load_data_train(path='/home/jhonerma/ML-Notebooks/CNN/Data/data_train.npz'):
    ds_train = ClusterDataset_Full(path)
    return ds_train

def load_data_test(path='/home/jhonerma/ML-Notebooks/CNN/Data/data_test.npz'):
    ds_test = ClusterDataset_Full(path)
    return ds_test

In [7]:
def get_dataloader(train_ds, val_ds, bs):
    dl_train = utils.DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=cpus_per_trial-1)
    dl_val = utils.DataLoader(val_ds, batch_size=bs * 2, shuffle=True, num_workers=cpus_per_trial-1)
    return  dl_train, dl_val

In [8]:
def unsqueeze_features(features):
    for key in features.keys():
        features[key] = features[key].view(-1,1)        
    return features

## Instance Noise

In [9]:
# https://arxiv.org/abs/1610.04490
INSTANCE_NOISE = False

def add_instance_noise(data, std=0.01):
    return data + torch.distributions.Normal(0, std).sample(data.shape) #.to(device)

## Define the network

In [10]:
class CNN(nn.Module):
    def __init__(self, l1=100, l2=50, l3=25, input_dim=(2,20,20), num_in_features=5):
        super(CNN, self).__init__()
        self.feature_ext = nn.Sequential(
            nn.Conv2d(2,10, kernel_size=3, padding=2),
            nn.ReLU(),
            nn.Conv2d(10,10, kernel_size=3,  padding=2),
            nn.ReLU(),
            nn.Conv2d(10,10, kernel_size=3, padding=0),
            nn.ReLU(),
            nn.Conv2d(10,5, kernel_size=1, padding=0),
            nn.ReLU()
        )
        
        self.flatten = nn.Flatten()
        
        # Gives the number of features after the conv layer
        num_features_after_conv = func_reduce(op_mul, list(self.feature_ext(torch.rand(1, *input_dim)).shape))
        
        self.dense_nn = nn.Sequential(
            nn.Linear(num_features_after_conv + num_in_features, l1),
            nn.ReLU(),
            nn.Linear(l1, l1),
            nn.ReLU(),
            nn.Linear(l1, l2),
            nn.ReLU(),
            nn.Linear(l2, l3),
            nn.ReLU(),
            nn.Linear(l3,3),
            nn.ReLU()
        )
        
    def forward(self, cluster, clusNumXYEPt):
        cluster = self.feature_ext(cluster)
        x = self.flatten(cluster)
        x = torch.cat([x, clusNumXYEPt], dim=1)
        logits = self.dense_nn(x)
        return logits

## Implement train and validation loop
Data[0] contains an image of of the cell energies and timings. <br>
Data[1] contains all features in a dict. Their shapes have to be changed from [batch_size] to [batch_size,1] for input into linear layers, implemented via function here <br>
Data[2] contains all labels

In [11]:
def train_loop(epoch, dataloader, model, loss_fn, optimizer, device="cpu"):

    size = len(dataloader.dataset)
    running_loss = 0.0
    epoch_steps = 0

    for batch, Data in enumerate(dataloader):
        Clusters = Data[0].to(device)
        Features = unsqueeze_features(Data[1])
        Labels = Data[2]
        
        ClusterProperties = torch.cat([Features["ClusterE"], Features["ClusterPt"], Features["ClusterM02"]
                                      , Features["ClusterM20"], Features["ClusterDist"]], dim=1)
        ClusterProperties.to(device)
               
        # zero parameter gradients
        optimizer.zero_grad()
        
        # prediction and loss
        pred = model(Clusters, ClusterProperties)
        loss = loss_fn(pred, Labels["PartPID"].long())
        
        # Backpropagation
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        epoch_steps += 1
        
        if batch % 10 == 9:
            print("[%d, %5d] loss: %.3f" % (epoch + 1, batch + 1,
                                            running_loss / epoch_steps))
            running_loss = 0.0        

In [12]:
def val_loop(epoch, dataloader, model, loss_fn, optimizer, device="cpu"):
    
    val_loss = 0.0
    val_steps = 0
    total = 0
    correct = 0
    size = len(dataloader.dataset)
    
    for batch, Data in enumerate(dataloader):
        with torch.no_grad():
            Clusters = Data[0].to(device)
            Features = unsqueeze_features(Data[1])
            Labels = Data[2]
            ClusterProperties = torch.cat([Features["ClusterE"], Features["ClusterPt"], Features["ClusterM02"]
                                      , Features["ClusterM20"], Features["ClusterDist"]], dim=1)           
            ClusterProperties.to(device)
            
            pred = model(Clusters, ClusterProperties)
            correct += (pred.argmax(1) == Labels["PartPID"]).type(torch.float).sum().item()

            loss = loss_fn(pred, Labels["PartPID"].long())#.item()
            val_loss += loss.cpu().numpy()
            val_steps += 1
    
    with tune.checkpoint_dir(epoch) as checkpoint_dir:
        _path = path.join(checkpoint_dir, "checkpoint")
        torch.save((model.state_dict(), optimizer.state_dict()), _path)
        
    tune.report(loss=(val_loss / val_steps), accuracy= correct / size)

## Implement method for accuracy testing on test set

In [13]:
def test_accuracy(model, device="cpu"):
    
    dataset_test = load_data_test()
    
    dataloader_test = utils.DataLoader(
        dataset_test, batch_size=4, shuffle=False, num_workers=2)
    
    correct = 0
    total = len(dataloader_test.dataset)
    
    with torch.no_grad():
        for batch, Data in enumerate(dataloader_test):
            Clusters = Data[0].to(device)
            Features = unsqueeze_features(Data[1])
            Labels = Data[2]
            ClusterProperties = torch.cat([Features["ClusterE"], Features["ClusterPt"], Features["ClusterM02"]
                                      , Features["ClusterM20"], Features["ClusterDist"]], dim=1)            
            ClusterProperties.to(device)
            
            
            pred = model(Clusters, ClusterProperties)
            correct += (pred.argmax(1) == Labels["PartPID"]).type(torch.float).sum().item()

    return correct / total

## Implement training routine

In [14]:
def train_model(config, checkpoint_dir=None):
    
    # load model
    model = CNN(config["l1"],config["l2"],config["l3"])
    
    # check for avlaible resource and initialize device
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    # send model to device
    model.to(device)
    
    # initialise loss function and opptimizer
    loss_fn = F.cross_entropy
    optimizer = torch.optim.Adam(model.parameters(),lr=config["lr"])
    
    # check whether checkpoint is available
    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            path.join(checkpoint_dir, "checkpoint"))
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
        
    # load dataset
    dataset_train = load_data_train()
    
    # split trainset in train and validation subsets
    test_abs = int(len(dataset_train) * 0.8)
    subset_train, subset_val = utils.random_split(
        dataset_train, [test_abs, len(dataset_train) - test_abs])

    # get dataloaders 
    dataloader_train, dataloader_val = get_dataloader(subset_train, subset_val, int(config["batch_size"]))
                                                      
    for epoch in range(100):
        train_loop(epoch, dataloader_train, model, loss_fn, optimizer, device=device)
        val_loop(epoch, dataloader_train, model, loss_fn, optimizer, device=device)                                              
    
    print("Finished Training")

## Setup all Ray Tune functionality and start training

In [15]:
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=1):
    
    # Setup hyperparameter-space to search
    config = {
        "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 10)),
        "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 10)),
        "l3": tune.sample_from(lambda _: 2 ** np.random.randint(2, 10)),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([2, 4, 8, 16, 32, 64])
    }

    # Init the scheduler
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    
    # Init the Reporter
    reporter = CLIReporter(
        parameter_columns=["l1", "l2", "l3", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"])
    
    #Get Current date and time
    timestr = strftime("%Y_%m_%d-%H:%M:%S")
    name = "ASHA-" + timestr
    
    # Init the run method
    result = tune.run(
        func_partial(train_model),
        name = name,
        resources_per_trial={"cpu": cpus_per_trial, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        local_dir = "./Ray_Results",
        scheduler=scheduler,
        progress_reporter=reporter)
    
    # Find best trial and use it on the testset
    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(best_trial.last_result["accuracy"]))
    
    best_trained_model = CNN(best_trial.config["l1"], best_trial.config["l2"], best_trial.config["l3"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)
    
    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)
    
    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))

In [16]:
main(num_samples=10, max_num_epochs=10, gpus_per_trial=gpus_per_trial)

2021-09-17 16:16:59,701	INFO registry.py:66 -- Detected unknown callable for trainable. Converting to class.


== Status ==
Memory usage on this node: 5.2/15.1 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 1.0/12 CPUs, 0/0 GPUs, 0.0/6.63 GiB heap, 0.0/3.32 GiB objects
Result logdir: /home/jhonerma/ML-Notebooks/CNN/Ray_Results/ASHA-2021_09_17-16:16:58
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+---------------------+----------+-------+------+------+------+-------------+--------------+
| Trial name          | status   | loc   |   l1 |   l2 |   l3 |          lr |   batch_size |
|---------------------+----------+-------+------+------+------+-------------+--------------|
| DEFAULT_ec003_00000 | RUNNING  |       |  128 |   32 |  128 | 0.0301595   |           32 |
| DEFAULT_ec003_00001 | PENDING  |       |  256 |   32 |  512 | 0.000866715 |            4 |
| DEFAULT_ec003_00002 | PENDING  |       |   32 |   64 |    4 | 0.000342879 |           64 |
| DEFAULT_ec003_00003 | PENDING  |       |   64 |  256 

[2m[36m(pid=39614)[0m [2,    10] loss: 0.928
[2m[36m(pid=39614)[0m [2,    20] loss: 0.365
[2m[36m(pid=39624)[0m [2,    10] loss: 0.926
[2m[36m(pid=39614)[0m [2,    30] loss: 0.300
[2m[36m(pid=39614)[0m [2,    40] loss: 0.267
[2m[36m(pid=39614)[0m [2,    50] loss: 0.159
[2m[36m(pid=39614)[0m [2,    60] loss: 0.125
[2m[36m(pid=39624)[0m [2,    20] loss: 0.445
[2m[36m(pid=39614)[0m [2,    70] loss: 0.128
[2m[36m(pid=39624)[0m [2,    30] loss: 0.178
[2m[36m(pid=39614)[0m [3,    10] loss: 1.120
[2m[36m(pid=39614)[0m [3,    20] loss: 0.450
[2m[36m(pid=39624)[0m [3,    10] loss: 0.918
[2m[36m(pid=39614)[0m [3,    30] loss: 0.252
[2m[36m(pid=39614)[0m [3,    40] loss: 0.211
Result for DEFAULT_ec003_00002:
  accuracy: 0.7006802721088435
  date: 2021-09-17_16-17-02
  done: true
  experiment_id: d2fe9ea29a2c41a290c658dd6506620f
  hostname: jhonerma-tuxedo
  iterations_since_restore: 10
  loss: 0.9903576970100403
  node_ip: 10.67.95.37
  pid: 39623
  sh

2021-09-17 16:17:06,658	INFO tune.py:561 -- Total run time: 6.96 seconds (6.75 seconds for the tuning loop).


Best trial config: {'l1': 64, 'l2': 256, 'l3': 16, 'lr': 0.0025922365967975597, 'batch_size': 2}
Best trial final validation loss: 0.8191089835521337
Best trial final validation accuracy: 0.6938775510204082
Best trial test set accuracy: 0.7021276595744681
