In [1]:
# Import Dependencies

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#%matplotlib notebook

import sys
sys.path.append("./dlp_opendata_api")
#sys.path.append("../new_notebooks/ipynb")
from osf.image_api import image_reader_3d
from osf.particle_api import *
from osf.cluster_api import *

from torch.utils.data import Dataset, DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sparseconvnet as scn
import glob
import os.path as osp
import numpy as np

import os
os.environ["CUDA_VISIBLE_DEVICES"]="2"

Welcome to JupyROOT 6.14/04


In [2]:
TESTSET_SIZE = 50

In [3]:
#ls /gpfs/slac/staas/fs1/g/neutrino/kterao/data/dlprod_ppn_v10
use_cuda = torch.cuda.is_available()

In [4]:
print(use_cuda)

False


In [5]:
class UResNet(torch.nn.Module):
    def __init__(self, dim=3, size=192, nFeatures=16, depth=5, nClasses=5):
        import sparseconvnet as scn
        super(UResNet, self).__init__()
        #self._flags = flags
        dimension = dim
        reps = 2  # Conv block repetition factor
        kernel_size = 2  # Use input_spatial_size method for other values?
        m = nFeatures  # Unet number of features
        nPlanes = [i*m for i in range(1, depth+1)]  # UNet number of features per level
        # nPlanes = [(2**i) * m for i in range(1, num_strides+1)]  # UNet number of features per level
        nInputFeatures = 1
        self.sparseModel = scn.Sequential().add(
           scn.InputLayer(dimension, size, mode=3)).add(
           scn.SubmanifoldConvolution(dimension, nInputFeatures, m, 3, False)).add( # Kernel size 3, no bias
           scn.UNet(dimension, reps, nPlanes, residual_blocks=True, downsample=[kernel_size, 2])).add(  # downsample = [filter size, filter stride]
           scn.BatchNormReLU(m)).add(
           scn.OutputLayer(dimension))
        self.linear = torch.nn.Linear(m, nClasses)

    def forward(self, point_cloud):
        """
        point_cloud is a list of length minibatch size (assumes mbs = 1)
        point_cloud[0] has 3 spatial coordinates + 1 batch coordinate + 1 feature
        shape of point_cloud[0] = (N, 4)
        """
        #coords = point_cloud[:, 0:-1].float()
        #features = point_cloud[:, -1][:, None].float()
        x = self.sparseModel(point_cloud)
        x = self.linear(x)
        return x

In [6]:
def get_unet(fname, dimension=3, size=192, nFeatures=16, depth=5, nClasses=5):
    model = UResNet(dim=dimension, size=size, nFeatures=nFeatures, depth=depth, nClasses=nClasses)
    model = nn.DataParallel(model)
    #print(model.state_dict().keys())
    checkpoint = torch.load(fname, map_location='cpu')
    #print()
    #print(checkpoint['state_dict'].keys())
    model.load_state_dict(checkpoint['state_dict'], strict=True)
    # just return the pre-trained unet
    return model.module.sparseModel

In [7]:
fname = './unet.ckpt'
unet = get_unet(fname)
unet = unet.cpu()
unet = unet.eval()

In [8]:
class ClusteringAEData(Dataset):
    """
    A customized data loader for clustering.
    """
    def __init__(self, root, numPixels=192, filenames=None):
        """
        Initialize Clustering Dataset

        Inputs:
            - root: root directory of dataset
            - preload: if preload dataset into memory.
        """
        self.cluster_filenames = []
        self.energy_filenames = []
        self.root = root
        self.numPixels = str(numPixels)
        
        if filenames:
            self.energy_filenames = filenames[0]
            self.cluster_filenames = filenames[1]
            print(self.energy_filenames)

        self.energy_filenames.sort()
        self.cluster_filenames.sort()
        self.cluster_reader = cluster_reader(*self.cluster_filenames)
        self.energy_reader = image_reader_3d(*self.energy_filenames)
        self.len = self.energy_reader.entry_count()
        assert self.len == self.cluster_reader.entry_count()

    def __getitem__(self, index):
        """
        Get a sample from dataset.
        """
        voxel, label = self.cluster_reader.get_image(index)
        _, energy, _ = self.energy_reader.get_image(index)
        voxel, label = torch.from_numpy(voxel), torch.from_numpy(label)
        energy = torch.from_numpy(energy)
        energy = torch.unsqueeze(energy, dim=1)
        label = torch.unsqueeze(label, dim=1).type(torch.LongTensor)
        voxel = voxel.cpu()
        energy = energy.cpu()
        with torch.no_grad():
            out = unet((voxel, energy))
        return out, label

    def __len__(self):
        """
        Total number of sampels in dataset.
        """
        return self.len

In [9]:
def ae_collate(batch):
    """
    Custom collate_fn for Autoencoder.
    """
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    return [data, target]

In [10]:
root = './' #replace with your own path to root folder. 
trainset_cluster = [root+'dlp_192px_cluster/dlprod_cluster_192px_00.root']
devset_cluster = [root+'dlp_192px_cluster/dlprod_cluster_192px_08.root']
#testset_cluster = [root + '/cluster/dlprod_cluster_192px_0{}.root'.format(9)]

trainset_energy = [root+'dlp_192px_data/dlprod_192px_00.root']
devset_energy = [root+'dlp_192px_data/dlprod_192px_08.root']
#testset_energy = [root + '/dlprod_192px_0{}.root'.format(9)]

for i, f in enumerate(trainset_cluster):
    print(f)
    print(trainset_energy[i])
    
for i, f in enumerate(devset_cluster):
    print(f)
    print(devset_energy[i])
    
#for i, f in enumerate(testset_cluster):
#    print(f)
#    print(testset_energy[i])

trainset = ClusteringAEData(root, 192, filenames=[trainset_energy, trainset_cluster])
devset = ClusteringAEData(root, 192, filenames=[devset_energy, devset_cluster])
#testset = ClusteringAEData(root, 192, filenames=[testset_energy, testset_cluster])
print('Number of entries in training set: {}'.format(len(trainset)))
print('Number of entries in validation set: {}'.format(len(devset)))
#print('Number of entries in test set: {}'.format(len(testset)))

./dlp_192px_cluster/dlprod_cluster_192px_00.root
./dlp_192px_data/dlprod_192px_00.root
./dlp_192px_cluster/dlprod_cluster_192px_08.root
./dlp_192px_data/dlprod_192px_08.root
['./dlp_192px_data/dlprod_192px_00.root']
['./dlp_192px_data/dlprod_192px_08.root']
Number of entries in training set: 10000
Number of entries in validation set: 10000


In [11]:
trainloader = DataLoader(trainset, batch_size=1, shuffle=True, collate_fn=ae_collate, num_workers=0, pin_memory=False)
#devloader = DataLoader(devset, batch_size=1, shuffle=True, collate_fn=ae_collate, num_workers=0, pin_memory=False)

In [12]:
entry, labels = trainset[48]

In [13]:
entry.shape

torch.Size([5180, 16])

In [14]:
labels.shape

torch.Size([5180, 1])

In [15]:
from loss import DiscriminativeLoss

In [16]:
criterion = DiscriminativeLoss()

In [17]:
criterion(entry, labels)

tensor(1.7519)

In [18]:
class ClusteringMLP(nn.Module):
    def __init__(self, input_dim=16, nHidden1=32, nHidden2=16, nClasses=2):
        super(ClusteringMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, nHidden1)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(nHidden1, nHidden2)
        nn.init.kaiming_normal_(self.fc2.weight)
        self.fc3 = nn.Linear(nHidden2, nClasses)
        nn.init.kaiming_normal_(self.fc3.weight)
        
        self.bn_1 = nn.BatchNorm1d(nHidden1)
        self.bn_2 = nn.BatchNorm1d(nHidden2)
        
    def forward(self, x):
        x = F.leaky_relu(self.bn_1(self.fc1(x)))
        x = F.leaky_relu(self.bn_2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [19]:
model = ClusteringMLP()
#model = model.cuda()

In [20]:
out = model(entry)
loss = criterion(out, labels)

In [21]:
loss

tensor(4.7566, grad_fn=<AddBackward0>)

In [22]:
trainiter = iter(trainloader)
x_batch, y_batch = trainiter.next()

In [23]:
x_batch[0].shape

torch.Size([3838, 16])

In [24]:
from sklearn.cluster import MeanShift
from sklearn.metrics import adjusted_mutual_info_score 

In [25]:
def accuracy(out, labels, model):
    out = out.detach().numpy()
    model = model.eval()
    with torch.no_grad():
        model_labels = MeanShift(bandwidth=0.5, bin_seeding=True).fit(out).labels_
        score = adjusted_mutual_info_score(labels.squeeze(), model_labels)  
        return score


In [26]:
import csv
f_train_loss = open('train_loss.csv', 'w')
f_train_acc = open('train_acc.csv', 'w')
trainlossWriter = csv.writer(f_train_loss, delimiter=',')
trainaccWriter = csv.writer(f_train_acc, delimiter=',')

f_dev_loss = open('dev_loss.csv', 'w')
f_dev_acc = open('dev_acc.csv', 'w')
devlossWriter = csv.writer(f_dev_loss, delimiter=',')
devaccWriter = csv.writer(f_dev_acc, delimiter=',')


In [27]:
training_epochs=10
#training_epoch=scn.checkpoint_restore(unet,exp_name,'unet',use_cuda)
optimizer = optim.Adam(model.parameters())
print('#classifer parameters', sum([x.nelement() for x in model.parameters()]))

('#classifer parameters', 1202)


In [28]:
def save_checkpoint(checkpoint_path, model, optimizer):
    # state_dict: a Python dictionary object that:
    # - for a model, maps each layer to its parameter tensor;
    # - for an optimizer, contains info about the optimizer’s states and hyperparameters used.
    state = {
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to %s' % checkpoint_path)
    
def load_checkpoint(checkpoint_path, model, optimizer):
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    print('model loaded from %s' % checkpoint_path)

In [29]:
sampler = torch.utils.data.sampler.RandomSampler(devset, True, TESTSET_SIZE)
devloader = DataLoader(devset, sampler=sampler, batch_size=16, collate_fn=ae_collate)

In [30]:
def test(model, devloader):
    model.eval()
    test_loss = 0
    test_accuracy = 0
    with torch.no_grad():
        for k, batch in enumerate(devloader):
            x_batch = batch[0]
            y_batch = batch[1]
            for j, data in enumerate(x_batch):
                try:
                    out = model(data)
                    loss = criterion(out, y_batch[j])
                    acc = accuracy(out, y_batch[j], model)
                    test_loss += loss
                    test_accuracy += acc
                    #print('loss: ', loss.item(), ' acc: ', acc.item())
                except:
                    print("Warning: Error Encounterd!!")
                    continue            
                
    
    return test_loss.item() / float(TESTSET_SIZE), test_accuracy.item()/float(TESTSET_SIZE)

In [None]:
import time

errCount = 0
trainset_len = len(trainset)

for epoch in range(1, training_epochs+1):
    stats = {}
    start = time.time()
    train_loss=0
    for i,batch in enumerate(trainloader):
        model.train()
        optimizer.zero_grad()
        data = batch[0][0]
        label = batch[1][0]
        try:
            out = model(data)
            out = out.cpu()
            loss = criterion(out, label)
            train_loss+=loss.item()
            loss.backward()
            acc = accuracy(out, label, model)
            print("Examples = {}/{}, Loss = {}, Accuracy = {}".format(i+1, trainset_len, loss, acc))
            optimizer.step()
            trainlossWriter.writerow([loss.item()])
            trainaccWriter.writerow([acc])
            if i % 500 == 0:
                dev_loss, dev_acc = test(model, devloader)
                print('loss: ', dev_loss, ' acc: ', dev_acc)
                devlossWriter.writerow([dev_loss])
                devaccWriter.writerow([dev_acc])
                save_checkpoint('checkpoint{}.ckpt'.format(epoch), model, optimizer)
        except:
            errCount += 1
            print("Warning: Error Encounterd!!")
            continue
            
print(errCount)
#scn.checkpoint_save(unet,exp_name,'unet',epoch, use_cuda)



Examples = 1/10000, Loss = 2.8373837471, Accuracy = 0.0932963583634
('loss: ', 6.047755737304687, ' acc: ', 0.26080959224275074)
model saved to checkpoint1.ckpt
Examples = 2/10000, Loss = 4.46273851395, Accuracy = -1.44673175611e-15
Examples = 3/10000, Loss = 3.39613652229, Accuracy = 0.30541374024
Examples = 4/10000, Loss = 5.19240140915, Accuracy = 0.187999725367
Examples = 5/10000, Loss = 4.08955430984, Accuracy = 0.0309019264591
Examples = 6/10000, Loss = 5.74718475342, Accuracy = 0.0768757221121
Examples = 7/10000, Loss = 5.68146657944, Accuracy = 0.118630859432
Examples = 8/10000, Loss = 5.04599952698, Accuracy = 0.2506013661
Examples = 9/10000, Loss = 3.39339900017, Accuracy = 0.504605019051
Examples = 10/10000, Loss = 3.20865893364, Accuracy = 0.316581606188
Examples = 11/10000, Loss = 5.50170040131, Accuracy = 0.0579477741015
Examples = 12/10000, Loss = 4.90642929077, Accuracy = 0.0901817180181
Examples = 13/10000, Loss = 3.15490293503, Accuracy = 0.326199128494
Examples = 14/

Examples = 118/10000, Loss = 7.68341398239, Accuracy = 0.00127490335614
Examples = 119/10000, Loss = 2.57811379433, Accuracy = 0.121471412271
Examples = 120/10000, Loss = 1.56524610519, Accuracy = 0.450207902594
Examples = 121/10000, Loss = 2.11308526993, Accuracy = 0.505192482242
Examples = 122/10000, Loss = 3.11476135254, Accuracy = 0.280691020058
Examples = 123/10000, Loss = 1.77414953709, Accuracy = 0.48860142189
Examples = 124/10000, Loss = 4.12544775009, Accuracy = 0.143032534327
Examples = 125/10000, Loss = 0.216010123491, Accuracy = 9.01445169789e-17
Examples = 126/10000, Loss = 1.22732782364, Accuracy = 0.679071739678
Examples = 127/10000, Loss = 1.08383357525, Accuracy = 0.189577036247
Examples = 128/10000, Loss = 0.976639389992, Accuracy = 0.635121477883
Examples = 129/10000, Loss = 1.22986412048, Accuracy = 0.276225394788
Examples = 130/10000, Loss = 1.84712827206, Accuracy = 0.569877724716
Examples = 131/10000, Loss = 2.09840679169, Accuracy = 0.449565250443
Examples = 132

Examples = 235/10000, Loss = 1.37888634205, Accuracy = 0.49881917179
Examples = 236/10000, Loss = 0.80257153511, Accuracy = 0.0955639489051
Examples = 237/10000, Loss = 0.900274157524, Accuracy = 0.656371775531
Examples = 238/10000, Loss = 1.37029731274, Accuracy = 0.458974694576
Examples = 239/10000, Loss = 2.30897283554, Accuracy = 0.461597662514
Examples = 240/10000, Loss = 0.41233164072, Accuracy = 0.30078274188
Examples = 241/10000, Loss = 0.102076537907, Accuracy = 0.385728528059
Examples = 242/10000, Loss = 2.87836503983, Accuracy = 0.189242844145
Examples = 243/10000, Loss = 1.52727127075, Accuracy = 0.31771441584
Examples = 244/10000, Loss = 1.2456766367, Accuracy = 0.645297726289
Examples = 245/10000, Loss = 0.968949437141, Accuracy = 0.362732960348
Examples = 246/10000, Loss = 0.903204500675, Accuracy = 0.439874179713
Examples = 247/10000, Loss = 1.8974916935, Accuracy = 0.299268210788
Examples = 248/10000, Loss = 1.49965858459, Accuracy = 0.400631812809
Examples = 249/10000

Examples = 352/10000, Loss = 1.41038477421, Accuracy = 0.513158514645
Examples = 353/10000, Loss = 1.35276222229, Accuracy = 0.666679421763
Examples = 354/10000, Loss = 2.38941216469, Accuracy = 0.46131564321
Examples = 355/10000, Loss = 1.10178542137, Accuracy = 0.581425937315
Examples = 356/10000, Loss = 0.673630356789, Accuracy = 0.430378623388
Examples = 357/10000, Loss = 0.14852103591, Accuracy = 0.474749898656
Examples = 358/10000, Loss = 1.48384487629, Accuracy = 0.436440578925
Examples = 359/10000, Loss = 1.43885612488, Accuracy = 0.410016294134
Examples = 360/10000, Loss = 1.43970489502, Accuracy = 0.506756278078
Examples = 361/10000, Loss = 0.680622696877, Accuracy = 0.536618845676
Examples = 362/10000, Loss = 0.2520378232, Accuracy = 0.175081510568
Examples = 363/10000, Loss = 0.059295039624, Accuracy = 0.692820898396
Examples = 364/10000, Loss = 1.22262978554, Accuracy = 0.516080169536
Examples = 365/10000, Loss = 0.107141010463, Accuracy = 0.312538778273
Examples = 366/100