In [1]:
%matplotlib notebook  

In [2]:
!nvidia-smi

Tue Jul 13 10:18:11 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.56       CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 208...  On   | 00000000:18:00.0 Off |                  N/A |
| 29%   36C    P8    39W / 250W |   3830MiB / 11019MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  On   | 00000000:3B:00.0 Off |                  N/A |
| 29%   30C    P8    21W / 250W |   2910MiB / 11019MiB |      0%      Default |
|       

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
import pandas as pd

# Python 3 standard library
from pathlib import Path

from model.collectdata_kde_Ellipsoids import collect_t2kde_data
from model.kde_loss_E import Loss
from model.models_kde import TracksToKDE_Ellipsoids_DirtyDozen as Model
from model.training_kde import trainNet, select_gpu, Results
from model.plots import dual_train_plots, replace_in_ax

In [4]:
#The purpose of this Notebook is to compare different model's learning ability. Therefore, to even the playing field,
#all models will learn at the same rate and for the same number of epochs. 

#Number of Learning Iterations
n_epochs = 500
#Learning Rate
learning_rate = 1e-5
#Size of Batches
batch_size = 64

architectures = [[5, 7, 10, 25, 50, 75, 100, 150, 250, 350, 500]]
model_names = ['5to500_exp_inc_nodes']
# architectures = [[5]*11,
#                  [10]*11,
#                  [15]*11
#                  [20]*11
#                  [25]*11
#                  [30]*11
#                  [5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20],
#                  [5, 5, 5, 6, 7, 8, 10, 12, 14, 17, 20],
#                  [5, 6, 7, 8, 9, 11, 12, 14, 16, 17, 20]]
# model_names = ['5_nodes',
#                '10_nodes',
#                '15_nodes',
#                '20_nodes',
#                '25_nodes',
#                '30_nodes',
#                '5to20_lin_inc_nodes',
#                '5to20_quadly_inc_nodes',
#                '5to20_exp_inc_nodes']

device = select_gpu(2)
##device = "cpu"

1 available GPUs (initially using device 0):
  0 GeForce RTX 3090


In [5]:
#Empty List of file destinations
destinations = []

for i in range(len(model_names)):
    # Name is the output file name
    name = time.strftime('%d%B%Y') + '_DirtyDozen_SetVar_' + model_names[i] + '_' + str(1000) + '_epochs_' + str(learning_rate)
    
    # Make an output folder named "name" (change if you want)
    
    # Special instructions for those working on goofy at UC
    # Please be very careful to make sure that your folder
    # does not live in a subdirectory of your home directory
    # this disk has very little capacity. Instead, use 
    # a subdirectory in /share/lazy with a symbolic link to
    # it in this (the notebooks) subdirectory
    
    folderpath = 'jgo_files/' + name
    output = Path(folderpath)
    output.mkdir(exist_ok=True)
    
    #Data Frame for storing learning results
    results = pd.DataFrame([], columns=Results._fields)
    
    #Building the Model per settings written in 'architectures' array
    nOut1, nOut2, nOut3, nOut4, nOut5, nOut6, nOut7, nOut8, nOut9, nOut10, nOut11 = architectures[i]
    model = Model(nOut1,nOut2,nOut3,nOut4,nOut5,nOut6,nOut7,nOut8,nOut9,nOut10,nOut11)
    
    #Defining Loss Function
    loss = Loss(epsilon=3e-6, debug=False)
    
    ct = 0
    for child in model.children():
      print('ct, child = ',ct, "  ", child)
      if ct < 0:
        print("     About to set param.requires_grad=False for ct = ", ct, "params")
        for param in child.parameters():
            param.requires_grad = False 
      ct += 1
    
    ##   ML -> /share/lazy/sokoloff/ML
    model_dict = model.state_dict()
    ## mds 190725 for debugging
    print("for model_dict")
    index = 0
    for k,v in model_dict.items():
        print("index, k =  ",index,"  ",k)
        index = index+1
    ##    print("value = ", v)

    updated_dict = model_dict
    ##print("updated_dict = ",updated_dict)
    ## when starting "ab initio", reduce biases as the bias gets summed for each track
    ## contributing to the predicted KDE
    updated_dict["layer1.bias"] = 0.005*model_dict["layer1.bias"]
    updated_dict["layer2.bias"] = 0.005*model_dict["layer2.bias"]
    updated_dict["layer3.bias"] = 0.005*model_dict["layer3.bias"]
    updated_dict["layer4.bias"] = 0.005*model_dict["layer4.bias"]
    updated_dict["layer5.bias"] = 0.005*model_dict["layer5.bias"]
    updated_dict["layer6.bias"] = 0.005*model_dict["layer6.bias"]
    updated_dict["layer7.bias"] = 0.005*model_dict["layer7.bias"]
    updated_dict["layer8.bias"] = 0.005*model_dict["layer8.bias"]
    updated_dict["layer9.bias"] = 0.005*model_dict["layer9.bias"]
    updated_dict["layer10.bias"] = 0.005*model_dict["layer10.bias"]
    updated_dict["layer11.bias"] = 0.005*model_dict["layer11.bias"]
    model.load_state_dict(updated_dict,strict=False)

    model_dict = model.state_dict()
#     olddate = '12July2021'
#     previous = olddate + '_DirtyDozen_SetVar_' + model_names[i] + '_' + str(800) + '_epochs_' + str(learning_rate)
#     pretrained_dict = torch.load('jgo_files/'+ previous + '/' + previous +'_final.pyt')
#     print(" ")
#     print("  for pretrained_dict")
#     index = 0
#     for k,v in pretrained_dict.items():
#         print("index, k =  ",index,"  ",k)
#         index = index+1
     ##print("model_dict instantiated")
    # 1. filter out unnecessary keys
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    print("pretrained_dict iterated")
    # 2. overwrite entries in the existing state dict
    model_dict.update(pretrained_dict) 
    ##
    #   when starting from a model with a fully connected last layer rather than a convolutional layer
    # 3. load the new state dict
    #   need to use strict=False as the two models state model attributes do not agree exactly
    #   see https://pytorch.org/docs/master/_modules/torch/nn/modules/module.html#Module.load_state_dict

    model.load_state_dict(pretrained_dict,strict=False)

    
    #Transfering Model to GPU (or CPU, if chosen)
    model = model.to(device)
    
    #Defining Optimizer (Reminder to ask about that)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    
    # Data Import. If we are using a larger dataset (240K events, with the datasets above, and 11 GB  of GPU memory),
    # the dataset will overflow the GPU memory; device=device will allow the data to move back
    # and forth between the CPU and GPU memory. While this allows use of a larger dataset, it slows
    # down performance by about 10%.  So comment out when not needed.
    
    # Defining training/validation partition point
    slice_point = 18000
    
    #Training Data
    train_loader = collect_t2kde_data('dataAA/20K_POCA_kernel_evts_200926.h5', batch_size=batch_size, device=device,
                           slice = slice(None,slice_point))
                            
    # Validation dataset. You can slice to reduce the size.
    val_loader = collect_t2kde_data('dataAA/20K_POCA_kernel_evts_200926.h5', batch_size=batch_size,                                device=device,
                           slice = slice(slice_point,None))
    
    for result in trainNet(model, optimizer, loss, train_loader, val_loader, n_epochs, epoch_start=len(results), notebook=True):
        
        #Record Epoch Results
        results = results.append(pd.Series(result._asdict()), ignore_index=True)

        # Save each model state dictionary
        torch.save(model.state_dict(), output / f'{name}_{result.epoch}.pyt')
    
    #Save final point in model
    torch.save(model.state_dict(), output / f'{name}_final.pyt')
    results.to_hdf(output / f'{name}_stats.hdf5', 'results')
    destinations.append(output / f'{name}_stats.hdf5')
    #

ct, child =  0    Linear(in_features=9, out_features=5, bias=True)
ct, child =  1    Linear(in_features=5, out_features=7, bias=True)
ct, child =  2    Linear(in_features=7, out_features=10, bias=True)
ct, child =  3    Linear(in_features=10, out_features=25, bias=True)
ct, child =  4    Linear(in_features=25, out_features=50, bias=True)
ct, child =  5    Linear(in_features=50, out_features=75, bias=True)
ct, child =  6    Linear(in_features=75, out_features=100, bias=True)
ct, child =  7    Linear(in_features=100, out_features=150, bias=True)
ct, child =  8    Linear(in_features=150, out_features=250, bias=True)
ct, child =  9    Linear(in_features=250, out_features=350, bias=True)
ct, child =  10    Linear(in_features=350, out_features=500, bias=True)
ct, child =  11    Linear(in_features=500, out_features=4000, bias=True)
for model_dict
index, k =   0    layer1.weight
index, k =   1    layer1.bias
index, k =   2    layer2.weight
index, k =   3    layer2.bias
index, k =   4    layer3

NameError: name 'pretrained_dict' is not defined

In [None]:
#Print File destinations
stringer = ''
for file in destinations:
    stringer = stringer + '\'' + name + '\', '
print(stringer)

In [None]:
folderpath = 'jgo_files/' + name
output = Path(folderpath)
torch.save(model.state_dict(), output / f'{name}_final.pyt')
results.to_hdf(output / f'{name}_stats.hdf5', 'results')

In [None]:
quit()