In [1]:
%load_ext autoreload
%matplotlib inline
%autoreload 2

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd

import os
import pickle
import imp

In [2]:
from DeepJetCore.DataCollection import DataCollection
from DeepJetCore.compiled.c_trainDataGenerator import trainDataGenerator
from DeepJetCore.customObjects import get_custom_objects

from DeepJetCore.TrainData import TrainData

from evaluation_tools import find_best_matching_truth_and_format, write_output_tree, determine_event_properties, write_event_output_tree
from inference import make_particle_inference_dict,  collect_condensates

setGPU: Setting GPU to: 1


In [3]:
# define train_data as our data collection, read from djcdc file
train_data = DataCollection("../data/train_data/dataCollection.djcdc")

train_data.dataclass()

<datastructures.TrainData_PF.TrainData_PF_graph at 0x7f66e636c420>

In [4]:
import torch
import os.path as osp
import gc
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.transforms as T

from torch.utils.checkpoint import checkpoint
from torch_cluster import knn_graph

from torch_geometric.nn import EdgeConv, NNConv
# from torch_geometric.nn.conv import gravnet_conv
# from torch_geometric.nn import GravNetConv

from torch_geometric.nn.pool.edge_pool import EdgePooling

from torch_geometric.utils import normalized_cut
from torch_geometric.utils import remove_self_loops
from torch_geometric.utils.undirected import to_undirected
from torch_geometric.nn import (graclus, max_pool, max_pool_x,
                                global_mean_pool, global_max_pool,
                                global_add_pool)

# # something wrong with the package perhaps
# from torch_geometric.nn import GravNetConv

import sklearn.metrics
from sklearn.model_selection import train_test_split

# transform = T.Cartesian(cat=False)

# from torch.optim import Optimizer
# from torch.optim.lr_scheduler import _LRScheduler
import math
import sys


from Losses import particle_condensation_loss

In [5]:
cuda = torch.device('cuda')
device = torch.device("cuda" if cuda else "cpu")
print(device)

cuda


In [7]:
"""
This module contains the Estimator class implementation which provides
code for doing the training of a PyTorch model.
"""

from __future__ import print_function

from datetime import datetime
from timeit import default_timer as timer

import shutil 
import copy 

from torch_geometric.data import Batch      

from Losses import particle_condensation_loss

def logger(s):
    """Simple logger function which prints date/time"""
    print(datetime.now(), s)

class Estimator():
    """Estimator class"""

    def __init__(self, model, loss_func, opt='Adam',
                 train_losses=None, valid_losses=None,
                 cuda=False, l1=0.):

        self.model = model
        if cuda:
            print("using CUDA...")
            self.model.cuda()
        self.loss_func = loss_func
        if opt == 'Adam':
            self.optimizer = torch.optim.Adam(self.model.parameters())
        elif opt == 'SGD':
            self.optimizer = torch.optim.SGD(self.model.parameters())

        self.train_losses = train_losses if train_losses is not None else []
        self.valid_losses = valid_losses if valid_losses is not None else []
        self.l1 = l1

        logger('Model: \n%s' % model)
        logger('Parameters: %i' %
               sum(param.numel() for param in model.parameters()))

    def l1_penalty(self, arr):
        return torch.abs(arr).sum()
        
    def training_step(self, inputs, targets):
        '''
        Applies single optimization step on batch
        '''
        
        # get dimensions... not the best way to do it but meh
        batch_size = len(inputs[0]) # same as nevents
        num_nodes = len(inputs[0][0]) # should be 200?
        num_features = len(inputs[0][0][0]) # should be 6
        num_truth_vals = len(targets[0][0][0]) # should be 11
                        
        # flatten batch input and target tensors from 3D to 2D by combining B and V dimmensions
        x_tensor_flat = torch.from_numpy(np.array(inputs[0])).view(-1, num_features)
        y_tensor_flat = torch.from_numpy(np.array(targets[0])).view(-1, num_truth_vals)
        
        # tensor keeping track of which batch each entry is from, after flattening
        batch_list = [i for i in range(batch_size) for j in range(num_nodes)]
        batch_np = np.array(batch_list)
        batch_tensor = torch.from_numpy(batch_np)
        
        # make a particle inference dictionary of features and truth
        d = make_particle_inference_dict(None, inputs[0], targets[0])
        
        # cartesian coordinate tensor from x_tensor_flat
        pos_tensor = x_tensor_flat.narrow(1,1,3)
        # pos_tensor = torch.from_numpy(d['f_pos'][:,:]) #,0:3

        # put training input data into a batch structure
        data = Batch(batch=batch_tensor, x=x_tensor_flat, edge_index=None, edge_attr=None, pos=pos_tensor)
        
        # modify data attributes - zero-suppressed
        # mask based on energy - keep only sensors/cells with nonzero energy hit
#         print("mask shape: ", y_tensor_flat[:,0].size())
#         print(y_tensor_flat[:,0])
#         mask = (y_tensor_flat[:,0] > 0.).squeeze()
        t_mask_flat = np.reshape(d['t_mask'][:,:,0], [-1])
#         print("mask shape: ", t_mask_flat.shape)
        mask = (t_mask_flat > 0.).squeeze()
        data.x = data.x[mask]
        data.pos = data.pos[mask,:]
        data.batch = data.batch[mask.squeeze()]
        
        y_tensor_flat = y_tensor_flat[mask]

#         # target_particle_id_tensor here is analogous to MNIST digit class
#         # get the particle ID for the detector hit with the largest energy for each event.
#         # Note the highest energy vaue may repeat many times... we assume that after condensation,
#         # the reconstruted particle with the greatest energy has this energy. We can say that the associated ID
#         # is the identity of the particle (electron or photon) that initiated the event (electromagnetic shower)
#         nevents = d['t_E'].shape[0]
#         idxs_dominant = np.argmax(d['t_E'][:,:,0], axis=1)
#         t_particle_id_list = []
#         for i in range(0, nevents):
#             idx_dominant = idxs_dominant[i]
#             dom_ID = int(d['t_ID'][i][:,0][idx_dominant])
#             t_particle_id_list.append(dom_ID)
#         t_particle_id_np = np.array(t_particle_id_list)
# #         print(t_particle_id_np)
# #         # basically ratio of one of the classes (electron or photon)
# #         print(np.sum(t_particle_id_np) / nevents)
#         target_particle_id_tensor = torch.from_numpy(t_particle_id_np)

        n_true_particles = np.reshape(np.max(d['t_objidx'],axis=1)+1., [d['t_objidx'].shape[0],1])
        n_true_particles = np.reshape(n_true_particles, -1).astype(int)
        target_num_particles_tensor = torch.from_numpy(n_true_particles)
        
        print("Applying single optimization step on batch")
        self.model.zero_grad()
        self.optimizer.zero_grad()
        
        # call the forward step of our model to get the output tensor
        DRN_outputs, OC_outputs = self.model(data)
#         print(DRN_outputs.size())
#         print(DRN_outputs)
        
# #         outputs_np = np.expand_dims(outputs.detach().numpy(), axis=0)
#         outputs_np = outputs.detach().numpy()
#         print(outputs_np)
#         print(outputs.max(1)[1])
        
        # negative log likelihood loss... add the object condensation loss term to this somehow
#         loss = F.nll_loss(outputs, target_particle_id_tensor)

##        will prob have to expand this out using batch_tensor to full unmasked version, filling in the masked out parts with zeros?
#         object_condensation_loss = torch.from_numpy(particle_condensation_loss(y_tensor_flat, OC_outputs))

        # right now just random tensor as a proof of concept
        object_condensation_loss = torch.tensor(0.0, requires_grad=True)
        dynamic_reduction_loss = F.nll_loss(DRN_outputs, target_num_particles_tensor)
        loss = dynamic_reduction_loss + object_condensation_loss
        
        print("DRN step loss: ", dynamic_reduction_loss)
        print("OC step loss: ", object_condensation_loss)
        print("total training step loss: ", loss)
        
        # backwards step
        loss.backward()
        self.optimizer.step()
#         scheduler.batch_step()
        
        return loss

    def save_checkpoint(self, state, is_best, filename='checkpoint.pt'):
        directory = os.path.dirname(filename)
        try:
            os.stat(directory)
        except:
            os.mkdir(directory)
        torch.save(state, filename)
        if is_best:
            bestfilename = directory+'/model_best.pt'
            shutil.copyfile(filename, bestfilename)
            
    def load_checkpoint(self, filename='checkpoint.pt'):
        checkpoint = torch.load(filename)
        self.model.load_state_dict(checkpoint['state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer'])
        self.valid_losses = checkpoint['valid_losses']
        self.train_losses = checkpoint['train_losses']
        
    def load_weights(self, filename='checkpoint.pt'):
        checkpoint = torch.load(filename)
        old_model = copy.deepcopy(self.model)
        old_model.load_state_dict(checkpoint['state_dict'])

        def set_masked_data(new_layer, old_layer):
            if new_layer.mask_flag:
                new_layer.weight.data = old_layer.weight.data * old_layer.mask.data
            else:
                new_layer.weight.data = old_layer.weight.data
              
        set_masked_data(self.model.edge_network.network[0], old_model.edge_network.network[0])
        set_masked_data(self.model.edge_network.network[2], old_model.edge_network.network[2])
        set_masked_data(self.model.node_network.network[0], old_model.node_network.network[0])
        set_masked_data(self.model.node_network.network[2], old_model.node_network.network[2])       
    
    def fit_gen(self, train_generator, n_batches=1, n_epochs=1,
                valid_generator=None, n_valid_batches=1, verbose=0, 
                filename='checkpoint.pt'):
        """Runs batch training for a number of specified epochs."""
#         scheduler.step()
        epoch_start = len(self.train_losses)
        epoch_end = epoch_start + n_epochs
        if len(self.valid_losses) > 0:
            best_valid_loss = self.valid_losses[-1]
        else:
            best_valid_loss = 99999999
        for i in range(epoch_start, epoch_end):
            logger('Epoch %i' % i)
            start_time = timer()
            sum_loss = 0

            # Train the model
            print("training model...")
            self.model.train()
            print("computing losses for {} batches...".format(n_batches))
            for j in range(n_batches):
                batch_input, batch_target = next(train_generator)
                batch_loss = (self.training_step(batch_input, batch_target)
                              .cpu().data.item())
                print("batch {} out of {}".format(j+1, n_batches))
                print("batch_loss: ", batch_loss)
                sum_loss += batch_loss
#                 print("sum loss: ", sum_loss)
                if verbose > 0:
                    logger('  Batch %i loss %f' % (j, batch_loss))
            print("finished cycle")
            end_time = timer()
            avg_loss = sum_loss / n_batches
            print("avg_loss: ", avg_loss)
            self.train_losses.append(avg_loss)
            logger('  training loss %.3g time %gs' %
                   (avg_loss, (end_time - start_time)))

            # TODO: adapt this to new data scheme
            with torch.no_grad():
                # Evaluate the model on the validation set
                if (valid_generator is not None) and (n_valid_batches > 0):
                    self.model.eval()
                    valid_loss = 0
                    for j in range(n_valid_batches):
                        valid_input, valid_target = next(valid_generator)
                        valid_loss += (self.loss_func(self.model(valid_input), valid_target)
                                       .cpu().data.item())
                    valid_loss = valid_loss / n_valid_batches
                    self.valid_losses.append(valid_loss)
                    logger('  validate loss %.3g' % valid_loss)
                
                    #Save model checkpoint - modified
                    logger(' save checkpoint') 
                    is_best = valid_loss < best_valid_loss
                    best_valid_loss = min(valid_loss, best_valid_loss)
                    self.save_checkpoint({
                        'epoch': i + 1,
                        'state_dict': self.model.state_dict(),
                        'best_valid_loss': best_valid_loss,
                        'valid_losses': self.valid_losses,
                        'train_losses': self.train_losses,
                        'optimizer' : self.optimizer.state_dict(),
                    }, is_best, filename=filename)

    def predict(self, generator, n_batches, concat=True):
        with torch.no_grad():  
            self.model.eval()
            outputs = []
            for j in range(n_batches):
                test_input, test_target = next(generator)
                outputs.append(self.model(test_input))
            if concat:
                outputs = torch.cat(outputs)
            return outputs

In [8]:
# from GravNet import GravNetConv
from old_GravNet import GravNet

transform = T.Cartesian(cat=False)

def normalized_cut_2d(edge_index, pos):
    row, col = edge_index
    edge_attr = torch.norm(pos[row] - pos[col], p=2, dim=1)
    return normalized_cut(edge_index, edge_attr, num_nodes=pos.size(0))

class DRNOC(nn.Module):
#     This model iteratively contracts nearest neighbour graphs 
#     until there is one output node.
#     The latent space trained to group useful features at each level
#     of aggregration.
#     This allows single quantities to be regressed from complex point counts
#     in a location and orientation invariant way.
#     One encoding layer is used to abstract away the input features.
    def __init__(self, input_dim=5, hidden_dim=64, output_dim=1, k=16, aggr='add',
                 norm=torch.tensor([1.,1.,1.,1.])):
        super(DRNOC, self).__init__()

        self.datanorm = nn.Parameter(norm)
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.k = k
        start_width = 2 * hidden_dim
        middle_width = 3 * hidden_dim // 2

        self.inputnet =  nn.Sequential(
            nn.Linear(input_dim, hidden_dim//2),            
            nn.ELU(),
            nn.Linear(hidden_dim//2, hidden_dim),
            nn.ELU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ELU(),
        )        
        convnn1 = nn.Sequential(nn.Linear(start_width, middle_width),
                                nn.ELU(),
                                nn.Linear(middle_width, hidden_dim),                                             
                                nn.ELU()
                                )
        convnn2 = nn.Sequential(nn.Linear(start_width, middle_width),
                                nn.ELU(),
                                nn.Linear(middle_width, hidden_dim),                                             
                                nn.ELU()
                                )                
        
        # The edge convolutional operator from the “Dynamic Graph CNN for Learning on Point Clouds” paper
        self.edgeconv1 = EdgeConv(nn=convnn1, aggr=aggr)
        self.edgeconv2 = EdgeConv(nn=convnn2, aggr=aggr)
        
        self.output = nn.Sequential(nn.Linear(hidden_dim, hidden_dim),
                                    nn.ELU(),
                                    nn.Linear(hidden_dim, hidden_dim//2),
                                    nn.ELU(),                                    
                                    nn.Linear(hidden_dim//2, output_dim))
        
        
    def forward(self, data):
        
        # object condensation step
        print("starting object condensation step...")
        # define output predictions format
        def output_block(x,ids,energy_raw):
            p_beta    = nn.Sigmoid()(nn.Linear(x.size()[-1], 1)(x))

            p_tpos    = 10. * nn.Linear(x.size()[-1], 2)(x)
            p_ID      = nn.Softmax()(nn.Linear(x.size()[-1], 2)(x))

            p_E       = (nn.Linear(x.size()[-1], 1)(x))
            p_ccoords = 10. * nn.Linear(x.size()[-1], 2)(x)

            predictions=torch.cat([p_beta ,
                                       p_E    ,
                                       p_tpos   ,
                                       p_ID     ,
                                       p_ccoords,
                                       ids,
                                       energy_raw], dim=1)

            print('predictions',predictions.size())
            return predictions

        def OC_minimodel(x):
            # select features from keras layer x from index 0 to index 3
            # feature energy and position: f_E and f_pos
            energy_raw = x[...,0:3]
            ids = x[...,5:6]
            print(energy_raw.size())
            # re-center and re-scale
            x = nn.BatchNorm1d(x.size()[-1], momentum=0.6)(x)
            feat=[x]

            for i in range(6):
                #add global exchange and another dense here
                
# #                 mean = tf.reduce_mean(x, axis=1, keepdims=True)
# #                 mean = tf.tile(mean, [1, self.num_vertices, 1])
# #                 return tf.concat([x, mean], axis=-1)
#                 x = GlobalExchange()(x)
                x = nn.ELU(alpha=0.6)(nn.Linear(x.size()[-1], 64)(x))
                x = nn.ELU(alpha=0.6)(nn.Linear(64, 64)(x))
                x = nn.BatchNorm1d(64, momentum=0.6)(x)
                x = nn.ELU(alpha=0.6)(nn.Linear(x.size()[-1], 64)(x))

#                 x = GravNetConv(in_channels=x.size()[-1], 
#                          out_channels=128, 
#                          space_dimensions=4, 
#                          propagate_dimensions=64, k=self.k)(x)

#                 x = GravNet(n_blocks=4, final_dim=128, n_clusters=2,
#                             input_dim=64, out_dim=128, spatial_dim=4, dense_dim=64, n_neighbors=10)(x)

#                 x = GravNet(input_dim=64, out_dim=128, spatial_dim=4, n_neighbors=10)(x)
    
                x = nn.BatchNorm1d(x.size()[-1], momentum=0.6)(x)
                feat.append(nn.ELU(alpha=0.6)(nn.Linear(x.size()[-1], 32)(x)))
            
#             # without GravNet enforcing 4 spatial dimensions, feat is ragged and we cannot concatenate
#             x = torch.cat(feat)
            x = nn.ELU(alpha=0.6)(nn.Linear(x.size()[-1], 64)(x))
            preds = output_block(x, ids, energy_raw)
            return preds

        preds = OC_minimodel(data.x)
        print("alright buckaroo")
        print(preds.size())
#         print(preds)
        
        
        # dynamic reduction step
        print("startin dynamic reduction step...")
#         data.x = self.datanorm * data.x

        data.x = preds
        data.x = nn.BatchNorm1d(data.x.size()[-1], momentum=0.6)(data.x)
        data.x = self.inputnet(data.x)

        data.edge_index = to_undirected(knn_graph(data.x, self.k, data.batch, loop=False, flow=self.edgeconv1.flow))
        data.x = self.edgeconv1(data.x, data.edge_index)
        
        weight = normalized_cut_2d(data.edge_index, data.x)
        
        # replace with condensation-based clustering ?
        print("clustering to dimension: ", data.x.size(0))
        cluster = graclus(data.edge_index, weight, data.x.size(0))

        data.edge_attr = None
        data = max_pool(cluster, data)
        
        data.edge_index = to_undirected(knn_graph(data.x, self.k, data.batch, loop=False, flow=self.edgeconv2.flow))
        data.x = self.edgeconv2(data.x, data.edge_index)
        
        weight = normalized_cut_2d(data.edge_index, data.x)
        
        # replace with condensation-based clustering ?
        print("clustering to dimension: ", data.x.size(0))
        cluster = graclus(data.edge_index, weight, data.x.size(0))
        
        x, batch = max_pool_x(cluster, data.x, data.batch)
#         # it seems like the batch tensor (each batch corresponds to an event) at this point associates each
#         # point in x with the event it is from. It looks almost exactly like the condensation eventparticles
#         print(batch.size())
#         print(batch[0:40])
        x = global_max_pool(x, batch)
                
        x = self.output(x).squeeze(-1)
        return x, preds

In [9]:
import warnings
warnings.simplefilter('ignore')

# train_data = DataCollection("../data/train_data/dataCollection.djcdc")
# splits off 10% of the training dataset for validation. Can be used in the same way as train_data
val_data = train_data.split(0.9) 

# Set the batch size. 
# If the data is ragged in dimension 1 (see convert options), 
# then this is the maximum number of elements per batch, which could be distributed differently
# to individual examples. E.g., if the first example has 50 elements, the second 48, and the third 30,
# and the batch size is set to 100, it would return the first two examples (in total 99 elements) in 
# the first batch etc. This is helpful to avoid out-of-memory errors during training

batch_size = 500
train_data.setBatchSize(batch_size)         
    
# prepare the generator
train_data.invokeGenerator()
train_data.generator.shuffleFilelist()
# train_data.generator.prepareNextEpoch()

# this number can differ from epoch to epoch for ragged data!
n_train_batches = train_data.generator.getNBatches()

# training data generator
gen = train_data.generator

def genfunc():
    while(not gen.isEmpty()):
        d = gen.getBatch()
        yield d.transferFeatureListToNumpy() , d.transferTruthListToNumpy()

# Model config
hidden_dim = 20
n_features = 6
n_outputs = 12
# number of particles per event... 1-9
n_classes = 9
k = 10
n_epochs = 100
# valid_frac = 0.1
# test_frac = 0
# epoch_size = len(train_data)
# print(epoch_size, batch_size)

print('features ->', n_features)
print('classes ->', n_classes)
print('hidden_dim = %d' % hidden_dim)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # input_dim is n_features = 6
        # but if we feed predictions from OC into DRN instead of the features, then we use n_outputs = 12
        self.drnoc = DRNOC(input_dim=n_outputs, hidden_dim=hidden_dim,
                                           k=k,
                                           output_dim=n_classes, aggr='add',
                                           norm=torch.tensor([1./300.,1./200.,1./200.,1./100.,1./100.,1./100.]))

    def forward(self, data):
        DRN_logits, OC_logits = self.drnoc(data)
        return F.log_softmax(DRN_logits, dim=1), OC_logits

def print_model_summary(model):
    """Override as needed"""
    print(
        'Model: \n%s\nParameters: %i' %
        (model, sum(p.numel() for p in model.parameters()))
    )

device = torch.device('cpu')
model = Net().to(device)

# # we don't need a scheduler because we explicitly put in the n_train_batches
# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3)
# scheduler = CyclicLRWithRestarts(optimizer, batch_size, epoch_size, restart_period=400, t_mult=1.2, policy="cosine")

print_model_summary(model)

# loss function, for no_grad case
loss_func = nn.BCELoss()
# initiate the estimator
estim = Estimator(model, loss_func=loss_func, cuda=None, l1 = 0)
# fit the generator function to the estimator
estim.fit_gen(genfunc(), n_batches=n_train_batches, n_epochs=n_epochs, filename='checkpoint.pt')

features -> 6
classes -> 9
hidden_dim = 20
Model: 
Net(
  (drnoc): DRNOC(
    (inputnet): Sequential(
      (0): Linear(in_features=12, out_features=10, bias=True)
      (1): ELU(alpha=1.0)
      (2): Linear(in_features=10, out_features=20, bias=True)
      (3): ELU(alpha=1.0)
      (4): Linear(in_features=20, out_features=20, bias=True)
      (5): ELU(alpha=1.0)
    )
    (edgeconv1): EdgeConv(nn=Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ELU(alpha=1.0)
      (2): Linear(in_features=30, out_features=20, bias=True)
      (3): ELU(alpha=1.0)
    ))
    (edgeconv2): EdgeConv(nn=Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ELU(alpha=1.0)
      (2): Linear(in_features=30, out_features=20, bias=True)
      (3): ELU(alpha=1.0)
    ))
    (output): Sequential(
      (0): Linear(in_features=20, out_features=20, bias=True)
      (1): ELU(alpha=1.0)
      (2): Linear(in_features=20, out_features=10, bias=True)
    

batch 12 out of 2992
batch_loss:  2.1126961708068848
Applying single optimization step on batch
starting object condensation step...
torch.Size([7676, 3])
predictions torch.Size([7676, 12])
alright buckaroo
torch.Size([7676, 12])
startin dynamic reduction step...
clustering to dimension:  7676
clustering to dimension:  4037
DRN step loss:  tensor(2.1066, grad_fn=<NllLossBackward>)
OC step loss:  tensor(0., requires_grad=True)
total training step loss:  tensor(2.1066, grad_fn=<AddBackward0>)
batch 13 out of 2992
batch_loss:  2.106593370437622
Applying single optimization step on batch
starting object condensation step...
torch.Size([7735, 3])
predictions torch.Size([7735, 12])
alright buckaroo
torch.Size([7735, 12])
startin dynamic reduction step...
clustering to dimension:  7735
clustering to dimension:  4072
DRN step loss:  tensor(2.0728, grad_fn=<NllLossBackward>)
OC step loss:  tensor(0., requires_grad=True)
total training step loss:  tensor(2.0728, grad_fn=<AddBackward0>)
batch 14 

batch 29 out of 2992
batch_loss:  1.9064692258834839
Applying single optimization step on batch
starting object condensation step...
torch.Size([7499, 3])
predictions torch.Size([7499, 12])
alright buckaroo
torch.Size([7499, 12])
startin dynamic reduction step...
clustering to dimension:  7499
clustering to dimension:  3930
DRN step loss:  tensor(1.8867, grad_fn=<NllLossBackward>)
OC step loss:  tensor(0., requires_grad=True)
total training step loss:  tensor(1.8867, grad_fn=<AddBackward0>)
batch 30 out of 2992
batch_loss:  1.8866533041000366
Applying single optimization step on batch
starting object condensation step...
torch.Size([7573, 3])
predictions torch.Size([7573, 12])
alright buckaroo
torch.Size([7573, 12])
startin dynamic reduction step...
clustering to dimension:  7573
clustering to dimension:  3989
DRN step loss:  tensor(1.9079, grad_fn=<NllLossBackward>)
OC step loss:  tensor(0., requires_grad=True)
total training step loss:  tensor(1.9079, grad_fn=<AddBackward0>)
batch 31

clustering to dimension:  7740
clustering to dimension:  4060
DRN step loss:  tensor(1.6856, grad_fn=<NllLossBackward>)
OC step loss:  tensor(0., requires_grad=True)
total training step loss:  tensor(1.6856, grad_fn=<AddBackward0>)
batch 46 out of 2992
batch_loss:  1.6855642795562744
Applying single optimization step on batch
starting object condensation step...
torch.Size([7633, 3])
predictions torch.Size([7633, 12])
alright buckaroo
torch.Size([7633, 12])
startin dynamic reduction step...
clustering to dimension:  7633
clustering to dimension:  4006
DRN step loss:  tensor(1.7017, grad_fn=<NllLossBackward>)
OC step loss:  tensor(0., requires_grad=True)
total training step loss:  tensor(1.7017, grad_fn=<AddBackward0>)
batch 47 out of 2992
batch_loss:  1.7016711235046387
Applying single optimization step on batch
starting object condensation step...
torch.Size([7541, 3])
predictions torch.Size([7541, 12])
alright buckaroo
torch.Size([7541, 12])
startin dynamic reduction step...
clusteri

KeyboardInterrupt: 

In [None]:
print(estim.train_losses)
print(estim.valid_losses)

# Plot the loss
plt.figure()
plt.plot(estim.train_losses, label='training set')
plt.plot(estim.valid_losses, label='validation set')
plt.xlabel('Epoch')
plt.ylabel('Loss')
#plt.xlim(195,300)
#plt.ylim(.06,.08)
plt.legend(loc=0);