# Library loading

In [2]:
%matplotlib inline

import pandas as pd # manipulate dataframes
import matplotlib
import matplotlib.pyplot as plt # plotting
import numpy as np

import time, h5py, neuravi, torch

from sklearn.metrics import mean_squared_error

from tqdm import tqdm 

# First we check if CUDA is available
print("CUDA AVAILABLE? ",torch.cuda.is_available())

def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
      
device = get_default_device()
print(device)

# Fixing random seeds for reproducibility
torch.manual_seed(42)
np.random.seed = 42

CUDA AVAILABLE?  True
cuda


# training function for several models

In [3]:
def train_model(ds,nb_neurons,nb_layers,p_drop, name, device, patience=100, min_delta=0.05):
    
    neuralmodel = neuravi.model(4,nb_neurons,nb_layers,ds.nb_channels_raman,p_drop=p_drop) # declaring model

    optimizer = torch.optim.Adam(neuralmodel.parameters(), lr = 0.001) # optimizer

    # the criterion : MSE
    criterion = torch.nn.MSELoss() # criterion for match, sent on device
    criterion.to(device)

    neuralmodel.output_bias_init() # we initialize the output bias
    neuralmodel.to(device) # we send the neural net on device
    
    # pretraining
    neuralmodel, record_pretrain_loss, record_prevalid_loss = neuravi.training(neuralmodel,ds,criterion,optimizer,name,
                                                                               verbose=False, mode="pretrain")
                
    # training
    neuralmodel, record_train_loss, record_valid_loss = neuravi.training(neuralmodel,ds,criterion,optimizer,name,
                                                                         train_patience=patience,min_delta=min_delta,verbose=False)

    # to avoid any problem with CUDA memory...
    del neuralmodel, criterion
    torch.cuda.empty_cache()

# Dataset size experiment

In [2]:
# paths of data and results
path_data = ["./data/DataSet_0p10val.hdf5",
             "./data/DataSet_0p20val.hdf5",
             "./data/DataSet_0p30val.hdf5",
             "./data/DataSet_0p40val.hdf5",
             "./data/DataSet_0p50val.hdf5",
             "./data/DataSet_0p60val.hdf5",
             "./data/DataSet_0p70val.hdf5",
             "./data/DataSet_0p80val.hdf5"]
save_names = ["./model/exp_trainsize/model_l4_n200_p10_data0p10val",
              "./model/exp_trainsize/model_l4_n200_p10_data0p20val",
              "./model/exp_trainsize/model_l4_n200_p10_data0p30val",
              "./model/exp_trainsize/model_l4_n200_p10_data0p40val",
              "./model/exp_trainsize/model_l4_n200_p10_data0p50val",
              "./model/exp_trainsize/model_l4_n200_p10_data0p60val",
              "./model/exp_trainsize/model_l4_n200_p10_data0p70val",
              "./model/exp_trainsize/model_l4_n200_p10_data0p80val"]

# the selected architecture
nb_neurons = 300
nb_layers = 4
p_drop = 0.1

#
# Main loop for the experiment
#
for i in range(len(path_data)):
    print('Experiment on dataset {} started...'.format(i))
    ds = neuravi.data_loader(path_data,
                         "./data/NKAS_Raman.hdf5",
                         "./data/NKAS_density.hdf5",
                         "./data/NKAS_optical.hdf5",device)
    
    for j in tqdm(range(10)):
        train_model(ds, nb_neurons, nb_layers, p_drop, 
                    save_names[i]+"_{}.pth".format(j), device)

Experiment on dataset 0 started...


100%|██████████| 10/10 [05:14<00:00, 31.46s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Experiment on dataset 1 started...


100%|██████████| 10/10 [06:35<00:00, 39.53s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Experiment on dataset 2 started...


100%|██████████| 10/10 [06:53<00:00, 41.31s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Experiment on dataset 3 started...


100%|██████████| 10/10 [05:19<00:00, 31.95s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Experiment on dataset 4 started...


100%|██████████| 10/10 [05:48<00:00, 34.88s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Experiment on dataset 5 started...


100%|██████████| 10/10 [06:11<00:00, 37.18s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Experiment on dataset 6 started...


100%|██████████| 10/10 [03:23<00:00, 20.34s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Experiment on dataset 7 started...


100%|██████████| 10/10 [03:48<00:00, 22.88s/it]


# Architecture experiment

In [4]:
#
# Start calculations
#
nb_exp = 2000
nb_neurons = np.random.randint(10,high=500,size=nb_exp)
nb_layers = np.random.randint(1,high=10,size=nb_exp)
p_drop = np.around(np.random.random_sample(nb_exp)*0.5,2)

# custom data loader, automatically sent to device
ds = neuravi.data_loader("./data/DataSet_0p20val.hdf5",
                         "./data/NKAS_Raman.hdf5",
                         "./data/NKAS_density.hdf5",
                         "./data/NKAS_optical.hdf5",
                         device)
    
for i in tqdm(range(nb_exp)):
        
    # name for saving
    name = "./model/exp_arch/l"+str(nb_layers[i])+"_n"+str(nb_neurons[i])+"_p"+str(p_drop[i])+".pth"
    
    train_model(ds,nb_neurons[i],nb_layers[i],p_drop[i], name, device, patience = 100, min_delta=0.05)

100%|██████████| 2000/2000 [26:31:37<00:00, 47.75s/it]   
