In [1]:
# Import Dependencies

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#%matplotlib notebook

import sys
sys.path.append("../new_notebooks/ipynb/dlp_opendata_api")
sys.path.append("../new_notebooks/ipynb")
from osf.image_api import image_reader_3d
from osf.particle_api import *
from osf.cluster_api import *

from torch.utils.data import Dataset, DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sparseconvnet as scn
import glob
import os.path as osp
import numpy as np

import os
os.environ["CUDA_VISIBLE_DEVICES"]="2"

TESTSET_SIZE = 100

Welcome to JupyROOT 6.14/04


In [2]:
#ls /gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10
use_cuda = torch.cuda.is_available()

In [3]:
print(use_cuda)

True


In [4]:
class UResNet(torch.nn.Module):
    def __init__(self, dim=3, size=192, nFeatures=16, depth=5, nClasses=5):
        import sparseconvnet as scn
        super(UResNet, self).__init__()
        #self._flags = flags
        dimension = dim
        reps = 2  # Conv block repetition factor
        kernel_size = 2  # Use input_spatial_size method for other values?
        m = nFeatures  # Unet number of features
        nPlanes = [i*m for i in range(1, depth+1)]  # UNet number of features per level
        # nPlanes = [(2**i) * m for i in range(1, num_strides+1)]  # UNet number of features per level
        nInputFeatures = 1
        self.sparseModel = scn.Sequential().add(
           scn.InputLayer(dimension, size, mode=3)).add(
           scn.SubmanifoldConvolution(dimension, nInputFeatures, m, 3, False)).add( # Kernel size 3, no bias
           scn.UNet(dimension, reps, nPlanes, residual_blocks=True, downsample=[kernel_size, 2])).add(  # downsample = [filter size, filter stride]
           scn.BatchNormReLU(m)).add(
           scn.OutputLayer(dimension))
        self.linear = torch.nn.Linear(m, nClasses)

    def forward(self, point_cloud):
        """
        point_cloud is a list of length minibatch size (assumes mbs = 1)
        point_cloud[0] has 3 spatial coordinates + 1 batch coordinate + 1 feature
        shape of point_cloud[0] = (N, 4)
        """
        #coords = point_cloud[:, 0:-1].float()
        #features = point_cloud[:, -1][:, None].float()
        x = self.sparseModel(point_cloud)
        x = self.linear(x)
        return x

In [5]:
def get_unet(fname, dimension=3, size=192, nFeatures=16, depth=5, nClasses=5):
    model = UResNet(dim=dimension, size=size, nFeatures=nFeatures, depth=depth, nClasses=nClasses)
    model = nn.DataParallel(model)
    #print(model.state_dict().keys())
    checkpoint = torch.load(fname, map_location='cpu')
    #print()
    #print(checkpoint['state_dict'].keys())
    model.load_state_dict(checkpoint['state_dict'], strict=True)
    # just return the pre-trained unet
    return model.module.sparseModel

In [6]:
fname = '/gpfs/slac/staas/fs1/g/neutrino/.scn_paper/new/sparse_is192_uns5_uf16_bs64/weights3/snapshot-29999.ckpt'
unet = get_unet(fname)
unet = unet.cuda()
unet = unet.eval()

In [7]:
class ClusteringAEData(Dataset):
    """
    A customized data loader for clustering.
    """
    def __init__(self, root, numPixels=192, filenames=None):
        """
        Initialize Clustering Dataset

        Inputs:
            - root: root directory of dataset
            - preload: if preload dataset into memory.
        """
        self.cluster_filenames = []
        self.energy_filenames = []
        self.root = root
        self.numPixels = str(numPixels)
        
        if filenames:
            self.energy_filenames = filenames[0]
            self.cluster_filenames = filenames[1]
            print(self.energy_filenames)

        self.energy_filenames.sort()
        self.cluster_filenames.sort()
        self.cluster_reader = cluster_reader(*self.cluster_filenames)
        self.energy_reader = image_reader_3d(*self.energy_filenames)
        self.len = self.energy_reader.entry_count()
        assert self.len == self.cluster_reader.entry_count()

    def __getitem__(self, index):
        """
        Get a sample from dataset.
        """
        voxel, label = self.cluster_reader.get_image(index)
        _, energy, _ = self.energy_reader.get_image(index)
        voxel, label = torch.from_numpy(voxel), torch.from_numpy(label)
        energy = torch.from_numpy(energy)
        energy = torch.unsqueeze(energy, dim=1)
        label = torch.unsqueeze(label, dim=1).type(torch.LongTensor)
        voxel = voxel.cuda()
        energy = energy.cuda()
        with torch.no_grad():
            out = unet((voxel, energy))
        return out, label

    def __len__(self):
        """
        Total number of sampels in dataset.
        """
        return self.len

In [8]:
def ae_collate(batch):
    """
    Custom collate_fn for Autoencoder.
    """
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    return [data, target]

In [9]:
root = '/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10' #replace with your own path to root folder. 
trainset_cluster = [root + '/cluster/dlprod_cluster_192px_0{}.root'.format(i) for i in range(8)]
devset_cluster = [root + '/cluster/dlprod_cluster_192px_0{}.root'.format(8)]
#testset_cluster = [root + '/cluster/dlprod_cluster_192px_0{}.root'.format(9)]

trainset_energy = [root + '/dlprod_192px_0{}.root'.format(i) for i in range(8)]
devset_energy = [root + '/dlprod_192px_0{}.root'.format(8)]
#testset_energy = [root + '/dlprod_192px_0{}.root'.format(9)]

for i, f in enumerate(trainset_cluster):
    print(f)
    print(trainset_energy[i])
    
for i, f in enumerate(devset_cluster):
    print(f)
    print(devset_energy[i])
    
#for i, f in enumerate(testset_cluster):
#    print(f)
#    print(testset_energy[i])

trainset = ClusteringAEData(root, 192, filenames=[trainset_energy, trainset_cluster])
devset = ClusteringAEData(root, 192, filenames=[devset_energy, devset_cluster])
#testset = ClusteringAEData(root, 192, filenames=[testset_energy, testset_cluster])
print('Number of entries in training set: {}'.format(len(trainset)))
print('Number of entries in validation set: {}'.format(len(devset)))
#print('Number of entries in test set: {}'.format(len(testset)))

/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/cluster/dlprod_cluster_192px_00.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/dlprod_192px_00.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/cluster/dlprod_cluster_192px_01.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/dlprod_192px_01.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/cluster/dlprod_cluster_192px_02.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/dlprod_192px_02.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/cluster/dlprod_cluster_192px_03.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/dlprod_192px_03.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/cluster/dlprod_cluster_192px_04.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/dlprod_192px_04.root
/gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10/cluster/dlprod_cluster_192px_05.root
/gpfs/slac/staas/fs1/g/n

In [10]:
trainloader = DataLoader(trainset, batch_size=1, shuffle=True, collate_fn=ae_collate, num_workers=0, pin_memory=False)
#devloader = DataLoader(devset, batch_size=1, shuffle=True, collate_fn=ae_collate, num_workers=0, pin_memory=False)

In [11]:
entry, labels = trainset[48]

In [12]:
entry.shape

torch.Size([5180, 16])

In [13]:
labels.shape

torch.Size([5180, 1])

In [14]:
from loss import DiscriminativeLoss

In [15]:
criterion = DiscriminativeLoss()

In [16]:
criterion(entry, labels)

tensor(1.7519, device='cuda:0')

In [17]:
class ClusteringMLP(nn.Module):
    def __init__(self, input_dim=16, nHidden1=32, nHidden2=16, nClasses=3):
        super(ClusteringMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, nHidden1)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(nHidden1, nHidden2)
        nn.init.kaiming_normal_(self.fc2.weight)
        self.fc3 = nn.Linear(nHidden2, nClasses)
        nn.init.kaiming_normal_(self.fc3.weight)
        
        self.bn_1 = nn.BatchNorm1d(nHidden1)
        self.bn_2 = nn.BatchNorm1d(nHidden2)
        
    def forward(self, x):
        x = F.leaky_relu(self.bn_1(self.fc1(x)))
        x = F.leaky_relu(self.bn_2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [64]:
model = ClusteringMLP()
model = model.cuda()

In [65]:
out = model(entry)
loss = criterion(out, labels)

In [66]:
loss

tensor(5.6401, device='cuda:0', grad_fn=<AddBackward0>)

In [67]:
trainiter = iter(trainloader)
x_batch, y_batch = trainiter.next()

In [68]:
x_batch[0].shape

torch.Size([4297, 16])

In [69]:
import csv
f_train_loss = open('train_loss.csv', 'w')
f_acc = open('train_acc.csv', 'w')
trainlossWriter = csv.writer(f_train_loss, delimiter=',')
trainaccWriter = csv.writer(f_acc, delimiter=',')

f_dev_loss = open('dev_loss.csv', 'w')
devlossWriter = csv.writer(f_dev_loss, delimiter=',')

In [70]:
training_epochs=10
#training_epoch=scn.checkpoint_restore(unet,exp_name,'unet',use_cuda)
optimizer = optim.Adam(model.parameters())
print('#classifer parameters', sum([x.nelement() for x in model.parameters()]))

('#classifer parameters', 1219)


In [56]:
def save_checkpoint(checkpoint_path, model, optimizer):
    # state_dict: a Python dictionary object that:
    # - for a model, maps each layer to its parameter tensor;
    # - for an optimizer, contains info about the optimizer’s states and hyperparameters used.
    state = {
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to %s' % checkpoint_path)
    
def load_checkpoint(checkpoint_path, model, optimizer):
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    print('model loaded from %s' % checkpoint_path)

In [61]:
sampler = torch.utils.data.sampler.RandomSampler(devset, True, TESTSET_SIZE)
devloader = DataLoader(devset, sampler=sampler, batch_size=16, collate_fn=ae_collate)

In [62]:
def test(model, devloader):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for k, batch in enumerate(devloader):
            x_batch = batch[0]
            y_batch = batch[1]
            for j, data in enumerate(x_batch):
                out = model(data)
                loss = criterion(out, y_batch[j])
                test_loss += loss
    return test_loss.item() / float(TESTSET_SIZE)

In [71]:
import time

trainset_len = len(trainset)

for epoch in range(1, training_epochs+1):
    stats = {}
    start = time.time()
    train_loss=0
    for i,batch in enumerate(trainloader):
        model.train()
        optimizer.zero_grad()
        data = batch[0][0]
        label = batch[1][0]
#        try:
        out = model(data)
        out = out.cpu()
        loss = criterion(out, label)
        train_loss+=loss.item()
        loss.backward()
        print("Examples = {}/{}, Loss = {}".format(i+1, trainset_len, loss))
        optimizer.step()
        trainlossWriter.writerow([loss.item()])
        if i % 100 == 0:
            dev_loss = test(model, devloader)
            print(dev_loss)
#        except:
#            print("Warning: Error Encounterd!!")
#            continue
    save_checkpoint('checkpoint{}.ckpt'.format(epoch), model, optimizer)
#scn.checkpoint_save(unet,exp_name,'unet',epoch, use_cuda)

Examples = 1/80000, Loss = 3.50177431107
6.8834375
Examples = 2/80000, Loss = 0.00177151232492
Examples = 3/80000, Loss = 4.65874767303
Examples = 4/80000, Loss = 4.93723869324
Examples = 5/80000, Loss = 6.28511714935
Examples = 6/80000, Loss = 5.02825737
Examples = 7/80000, Loss = 0.0210904404521
Examples = 8/80000, Loss = 5.36056137085
Examples = 9/80000, Loss = 5.4809384346
Examples = 10/80000, Loss = 2.99925899506
Examples = 11/80000, Loss = 4.93933963776
Examples = 12/80000, Loss = 2.34561610222
Examples = 13/80000, Loss = 6.68533849716
Examples = 14/80000, Loss = 5.29439067841
Examples = 15/80000, Loss = 4.82317972183
Examples = 16/80000, Loss = 0.0301759261638
Examples = 17/80000, Loss = 5.05984973907
Examples = 18/80000, Loss = 5.90257072449
Examples = 19/80000, Loss = 5.78770065308
Examples = 20/80000, Loss = 4.49145460129
Examples = 21/80000, Loss = 4.50738763809
Examples = 22/80000, Loss = 3.48338150978
Examples = 23/80000, Loss = 4.33403110504
Examples = 24/80000, Loss = 2.

KeyboardInterrupt: 

Error in <TTreeCache::FillBuffer>: Inconsistency: fCurrentClusterStart=1093 fEntryCurrent=2158 fNextClusterStart=2186 but fCurrentEntry should not be in between the two
Error in <TTreeCache::FillBuffer>: Inconsistency: fCurrentClusterStart=4324 fEntryCurrent=5370 fNextClusterStart=5405 but fCurrentEntry should not be in between the two
Error in <TTreeCache::FillBuffer>: Inconsistency: fCurrentClusterStart=0 fEntryCurrent=967 fNextClusterStart=988 but fCurrentEntry should not be in between the two
Error in <TTreeCache::FillBuffer>: Inconsistency: fCurrentClusterStart=0 fEntryCurrent=4240 fNextClusterStart=4352 but fCurrentEntry should not be in between the two
Error in <TTreeCache::FillBuffer>: Inconsistency: fCurrentClusterStart=0 fEntryCurrent=4292 fNextClusterStart=4406 but fCurrentEntry should not be in between the two
Error in <TTreeCache::FillBuffer>: Inconsistency: fCurrentClusterStart=5405 fEntryCurrent=7553 fNextClusterStart=7567 but fCurrentEntry should not be in between the t