# CERN CMS Particle Tracking
## Introduction
In this notebook I will try to develop a **PyTorch** based model to predict particle collision from the European organisation CERN's CMS data.

## Importing Libraries
Lets import some important libraries and pytorch.

In [38]:
import numpy as np
import pandas as pd
import os
from math import ceil

import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader, TensorDataset, ConcatDataset, Sampler

import line_profiler
%load_ext line_profiler

DATASET_PATH = '../input/trackml/'

from tqdm import tqdm_notebook
print(os.listdir("../input"))
print(os.listdir("../input/trackml/"))
print(os.listdir(DATASET_PATH))
prefix='../input/trackml-particle-identification/'
import zipfile

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler
['trackml', 'trackml-particle-identification']
['my_tracks_all.npy', 'my_event000001001.npy', 'my_model.h5', 'my_model_h.h5']
['my_tracks_all.npy', 'my_event000001001.npy', 'my_model.h5', 'my_model_h.h5']


Now let us define some important helper functions like a timer to measure time while training, another function which returns the type of event at CMS from data.

In [17]:
from contextlib import contextmanager
from timeit import default_timer

@contextmanager
def elapsed_timer():
    start = default_timer()
    elapser = lambda: default_timer() - start
    yield lambda: elapser()
    end = default_timer()
    elapser = lambda: end-start

In [39]:
def get_event(event, filter=None):
    zf=zipfile.ZipFile('../input/trackml-particle-identification/train_1.zip','r')
    zipfile.ZipFile.namelist(zf)
    hits = pd.read_csv(zf.open('train_1/%s-hits.csv'%event))
    cells = pd.read_csv(zf.open('train_1/%s-cells.csv'%event))
    truth = pd.read_csv(zf.open('train_1/%s-truth.csv'%event))
    particles = pd.read_csv(zf.open('train_1/%s-particles.csv'%event))
    return hits, cells, truth, particles

Now let us define our model architecture.

In [5]:
def create_model(fs = 10):
    return nn.Sequential(
        nn.Linear(fs, 800),
        nn.ReLU(),
        nn.Linear(800, 400),
        nn.ReLU(),
        nn.Linear(400, 400),
        nn.ReLU(),
        nn.Linear(400, 400),
        nn.ReLU(),
        nn.Linear(400, 200),
        nn.ReLU(),
        nn.Linear(200, 1),
        nn.Sigmoid()
    )

## Prepare data
+ We will use 10 events for training.
* input: hit pair
* output: 1 if two hits have the same particle_id, 0 otherwise.
* feature size: 10 (5 per hit)

In [59]:
USE_GPU = True

TRAIN_1 = True
TRAIN_2 = True
TRAIN = TRAIN_1 or TRAIN_2
REDUCE_ON_PLATEAU = True

LOADING_MODEL = True
LOADING_MODEL_H = True

PRE_PROCESS = True
PRE_PROCESS_H = True

SAVING = True
LOADING_PREFIX = DATASET_PATH
EVENT_SIZE_PATH = 'event_rows.csv'
EVENT_SIZE_H_PATH = 'event_rows-h.csv'

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [19]:
def get_features(event_name):
    hits, cells, truth, particles = get_event(event_name)
    
    # Filter out un-used columns early
    hits = hits[['hit_id', 'x', 'y', 'z']]
    truth = truth[['particle_id', 'hit_id']]    
    

    # as_index=False so the group by retain the column name
    cell_by_hit_id = cells.groupby(['hit_id'], as_index=False)
    cell_count = cell_by_hit_id.value.count().rename(columns={'value':'cell_count'})
    charge_value = cell_by_hit_id.value.sum().rename(columns={'value':'charge_value'})
    
    # Scaling
    hits[['x', 'y', 'z']] /= 1000
    cell_count['cell_count'] /= 10
    
    truth = pd.merge(truth, cell_count, on='hit_id')
    truth = pd.merge(truth, charge_value, on='hit_id')
    truth = pd.merge(truth, hits, on='hit_id')
    # The columns of truth are as follow
    # ['particle_id', 'hit_id', 'x', 'y', 'z', 'cell_count', 'charge_value']
    return truth

def pre_process(event_name, print_size=True):
    features = get_features(event_name)
    
    columns_needed = ['x', 'y', 'z', 'cell_count', 'charge_value']
    columns_needed_all = [c + '_x' for c in columns_needed] + [c + '_y' for c in columns_needed] + ['label']

    # Get all the hits that's identified with a particle
    true_pairs = features[features.particle_id != 0]
    # Merge to create all hit pairs that's identified with the same particle
    true_pairs = pd.merge(true_pairs, true_pairs, on='particle_id')
    # Filter all the pairs that has the same hit_id
    true_pairs = true_pairs[true_pairs.hit_id_x != true_pairs.hit_id_y]
    # Add a new column to indicate this dataset is the true dataset
    true_pairs['label'] = 1
    # Filter the only columns needed
    true_pairs = true_pairs[columns_needed_all]
    
    FALSE_PAIR_RATIO = 3
    size = len(true_pairs) * FALSE_PAIR_RATIO
    p_id = features.particle_id.values
    # Generated random hit idx pairs
    i = np.random.randint(len(features), size=size)
    j = np.random.randint(len(features), size=size)
    # Get the hit idx pair that's either assoicated with particle id 0 or different particle id
    hit_idx = (p_id[i]==0) | (p_id[i]!=p_id[j])
    i, j = i[hit_idx], j[hit_idx]
    # Filter and create features with the correct order of the columns
    features = features[columns_needed]
    false_pairs = pd.DataFrame(
        np.hstack((features.values[i], features.values[j], np.zeros((len(i),1)))),
        columns=columns_needed_all)

    processed = pd.concat([true_pairs, false_pairs], axis=0)
    processed = processed.sample(frac=1).reset_index(drop=True)
    
    if print_size:
        # Create a DataFrame just to pretty-print ;)
        print(event_name)
        print(pd
              .DataFrame(data={
                  'True': ['{:,}'.format(len(true_pairs))],
                  'False': ['{:,}'.format(len(false_pairs))],
                  'Total': ['{:,}'.format(len(processed))]
              })
              .to_string(index=False))
    return processed

In [40]:
if PRE_PROCESS:
    event_rows = []
    for i in tqdm_notebook(range(10, 20)):
        event_name = 'event0000010%02d'%i
        file_name = '%s.feather' % event_name
        processed = pre_process(event_name)
        event_rows.append((file_name, len(processed.index)))
        processed.to_feather(file_name) # Save to disk
        print('saved %s' % file_name)

    pd.DataFrame(event_rows).to_csv(EVENT_SIZE_PATH, index=False)
    print('event rows saved')
    del processed
else:
    print('load event rows')
    event_rows = list(pd.read_csv(LOADING_PREFIX + EVENT_SIZE_PATH).itertuples(index=False, name=None))
    event_rows = [(LOADING_PREFIX + r[0], r[1]) for r in event_rows]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

event000001010
True      False      Total
905,342  2,715,801  3,621,143
saved event000001010.feather
event000001011
True      False      Total
1,049,940  3,149,570  4,199,510
saved event000001011.feather
event000001012
True      False      Total
975,962  2,927,653  3,903,615
saved event000001012.feather
event000001013
True      False      Total
937,140  2,811,171  3,748,311
saved event000001013.feather
event000001014
True      False      Total
1,138,386  3,414,881  4,553,267
saved event000001014.feather
event000001015
True      False      Total
1,096,946  3,290,572  4,387,518
saved event000001015.feather
event000001016
True      False      Total
1,054,472  3,163,160  4,217,632
saved event000001016.feather
event000001017
True      False      Total
1,125,976  3,377,673  4,503,649
saved event000001017.feather
event000001018
True      False      Total
787,588  2,362,522  3,150,110
saved event000001018.feather
event000001019
True      False      Total
1,099,946  3,299,557  4,399,503
saved e

In [41]:
from datetime import datetime
from feather import read_dataframe as feather_read
from multiprocessing import current_process
from threading import current_thread
import bisect

class FeatherCache():
    @staticmethod
    def cumsum(processed_rows):
        r, s = [], 0
        for row in processed_rows:
            l = row[1]
            r.append(l + s)
            s += l
        return r
    
    def __init__(self, processed_rows, cache_size=2, print_proc=False):
        self.processed_rows = processed_rows
        self.cumulative_sizes = self.cumsum(processed_rows)
        self.cache_size = cache_size
        self.print_proc = print_proc
        
        # warm up the loading by having two processed events loaded
        self.cache = {}
        for file_name, size in processed_rows[0:cache_size]:
            self.cache[file_name] = feather_read(file_name)
        
        self.time_stamps = {}
        

    def __len__(self):
        return self.cumulative_sizes[-1]
    
    @property
    def LRU_filename(self):
        least = None
        for file_name, time_stamp in self.time_stamps.items():
            if least is None:
                least = (file_name, time_stamp)
            elif time_stamp < least[1]:
                least = (file_name, time_stamp)
        return least[0]
    
    #TODO prefetch in another process when the file is loaded
    # https://stackoverflow.com/questions/45394783/multiprocess-reading-from-file
    def get_file_dataframe(self, file_name):
        if file_name in self.cache:
            # If in the cache, just get it
            self.time_stamps[file_name] = datetime.now()
            return self.cache[file_name]
        else:
            if self.print_proc:
                process_name = current_process().name
                thread_name = current_thread().name
                print('reading %s from thread %s, and process %s' % (file_name, thread_name, process_name))
            if len(self.cache) > self.cache_size:
                key = self.LRU_filename
                if self.print_proc:
                    print('delete %s' % key)
                del self.cache[key]
                del self.time_stamps[key]

            self.cache[file_name] = feather_read(file_name)
            self.time_stamps[file_name] = datetime.now()
            return self.cache[file_name]
                
    def get_map(self, indcies):
        # Map the indices back to to file_name and its corrsponding indcies
        file_dict = {}
        # Optimize for single dataset
        if (len(indcies) >= 2):
            front_idx = bisect.bisect_right(self.cumulative_sizes, indcies[0])
            back_idx = bisect.bisect_right(self.cumulative_sizes, indcies[-1])
            if front_idx == back_idx:
                file_name = self.processed_rows[front_idx][0]
                if front_idx == 0:
                    return {file_name : indcies}
            
        #else:
        for idx in indcies:
            dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
            if dataset_idx == 0:
                sample_idx = idx
            else:
                sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
            file_name = self.processed_rows[dataset_idx][0]
            file_dict.setdefault(file_name, []).append(sample_idx)
        return file_dict
    
    def get_items(self, indcies):
        d = self.get_map(indcies)
        ds = None
        for file_name in d:
            sample_idxs = d[file_name]
            f_ds = self.get_file_dataframe(file_name).iloc[sample_idxs]
            if ds is None:
                ds = f_ds
            else:
                ds = ds.append(f_ds, ignore_index=True)
        return ds
            
class FeatherDataset(Dataset):
    def __init__(self, feather_cache, to_items_fn=None):
        self.cache = feather_cache
        self.to_items_fn = to_items_fn
        
    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        return idx;
    
    def get_items(self, indcies):
        ds = self.cache.get_items(indcies)
        rows = torch.as_tensor(ds.values)
        return rows if self.to_items_fn is None else self.to_items_fn(rows)
    
    @property
    def collate_fn(self):
        return self.get_items

class DataframeDataset(Dataset):
    def __init__(self, dataframe, opti_seq=False, to_items_fn=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.dataframe = dataframe
        self.opti_seq = opti_seq
        self.to_items_fn = to_items_fn
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        return idx
    
    def get_items(self, indcies):
        if self.opti_seq and len(indcies) >= 2:
            rows = torch.as_tensor(self.dataframe.values[indcies[0]:indcies[-1]+1])
        else:
            rows = torch.as_tensor(self.dataframe.iloc[indcies].values)
        return rows if self.to_items_fn is None else self.to_items_fn(rows)
    
    @property
    def collate_fn(self):
        return self.get_items

In [42]:
class SequentialRangeSampler(Sampler):
    def __init__(self, data_source, num_samples=None):
        self.data_source = data_source
        self.num_samples = range(len(self.data_source)) if num_samples is None else num_samples

    def __iter__(self):
        return iter(self.num_samples)

    def __len__(self):
        return len(self.data_source)

In [43]:
def create_loaders(dataset, batch_size, validation_split):
    dataset_size = len(dataset)
    num_val = int(validation_split * dataset_size)
    num_train = dataset_size - num_val

    loader_train = DataLoader(dataset, batch_size=batch_size, num_workers=0, pin_memory=True,
                              sampler=SequentialRangeSampler(range(num_train)),
                              collate_fn=dataset.collate_fn)
    loader_val = DataLoader(dataset, batch_size=batch_size, num_workers=0, pin_memory=True,
                            sampler=SequentialRangeSampler(range(num_train, dataset_size)),
                            collate_fn=dataset.collate_fn)
    return loader_train, loader_val

In [44]:
if TRAIN_1:
    batch_size = 8000
    validation_split = .05 # 5%
    cache = FeatherCache(event_rows)
    dataset = FeatherDataset(cache, lambda rows : (rows[:, :-1], rows[:, -1].view(-1, 1)))
    loader_train, loader_val = create_loaders(dataset, batch_size, validation_split)

## Training the model
We have already defined model architecture, dataloader. Now let us define the accuracy and train 1 epoch function.

In [45]:
def check_accuracy(loader, model, thr=0.5):
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            y = y.view(-1).to(device=device, dtype=torch.uint8)
            scores = model(x)
            scores = (scores > thr).view(-1)
            num_correct += (scores == y).sum()
            num_samples += scores.size(0)
        acc = float(num_correct) / num_samples
        return (num_correct, num_samples, acc)

In [46]:
def train_model(model, optimizer, criterion, loader_train, loader_val, epochs=1, reduce_on_plateau=False, epoch_callback=None):
    with elapsed_timer() as elapser:
        model = model.to(device=device)  # move the model parameters to CPU/GPU
        total_second = 0
        if reduce_on_plateau:
            scheduler = ReduceLROnPlateau(optimizer, 'max', patience=5, threshold=1e-3, verbose=True)
        for e in tqdm_notebook(range(epochs)):
            begin_epoch = elapser()
            for t, (x, y) in enumerate(tqdm_notebook(loader_train, desc='Epoch %d' % e, leave=False)):
                model.train()  # put model to training mode
                x = x.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
                y = y.view(-1, 1).to(device=device, dtype=torch.float) # BCELoss only support float as y
                optimizer.zero_grad()
                scores = model(x)
                loss = criterion(scores, y)
                loss.backward()
                optimizer.step()

            num_correct, num_samples, acc = check_accuracy(loader_val, model)
            end_epoch = elapser()
            print('%.2fs - Epoch %d, Iteration %d, loss = %.4f, %d / %d correct (%.2f %%)' % (end_epoch - begin_epoch, e, t, loss.item(), num_correct, num_samples, acc * 100))
            if epoch_callback is not None:
                epoch_callback(num_correct, num_samples, acc, loss)
            if reduce_on_plateau:
                scheduler.step(acc)
    print('Total time: %.2fs' % elapser())

In [52]:
model_torch = create_model().to(device)

In [53]:
if TRAIN_1 and REDUCE_ON_PLATEAU:
    lr = -3
    optimizer = optim.Adam(model_torch.parameters(), lr=10**lr)
    criterion = nn.BCELoss()
    train_model(model_torch, optimizer, criterion, loader_train, loader_val, epochs=4, reduce_on_plateau=True)

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Epoch 0', max=4832, style=ProgressStyle(description_width='in…

126.26s - Epoch 0, Iteration 4831, loss = 0.0479, 2002823 / 2034212 correct (98.46 %)


HBox(children=(IntProgress(value=0, description='Epoch 1', max=4832, style=ProgressStyle(description_width='in…

125.77s - Epoch 1, Iteration 4831, loss = 0.0441, 2007592 / 2034212 correct (98.69 %)


HBox(children=(IntProgress(value=0, description='Epoch 2', max=4832, style=ProgressStyle(description_width='in…

118.16s - Epoch 2, Iteration 4831, loss = 0.0318, 2008918 / 2034212 correct (98.76 %)


HBox(children=(IntProgress(value=0, description='Epoch 3', max=4832, style=ProgressStyle(description_width='in…

116.36s - Epoch 3, Iteration 4831, loss = 0.0350, 2011454 / 2034212 correct (98.88 %)
Total time: 486.61s


Now let me save our model first.

In [54]:
if TRAIN_1 and SAVING:
    print('saving model')
    torch.save(model_torch.state_dict(), 'torch_model.pt')

saving model


We saved our model, now if we want to load it again, we'll just execute\
`the_model = model_torch()`\
`the_model.load_state_dict(torch.load('torch_model.pt'))`

## Hard Negative Mining

Our model landed an accuracy of 98.88% which is quite impressive but we need to decrease our false positives even more and ultimately boost model accuracy therefore I will now utilise Hard Negative Mining, ie. we will simply create more negative samples and train our model to see the results.

In [55]:
def predict(dataframe, model, batch_size=8000, num_worker=0):
    rows = torch.as_tensor(dataframe.values)
    num_elements = len(rows)
    num_batches = -(-num_elements // batch_size) # Round up
    model.eval()  # set model to evaluation mode
    scores = torch.zeros(num_elements, dtype=torch.float)
    with torch.no_grad():
        for i in range(num_batches):
            start = i * batch_size
            end = num_elements if i == num_batches - 1 else start + batch_size
            x_batch = rows[start:end]
            x_batch = x_batch.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            scores[start:end] = model(x_batch).view(-1)

    return scores

def predict_true(dataframe, model, batch_size=8000, thr=0.5):
    scores = predict(dataframe, model, batch_size)
    indices = (scores > thr).nonzero()[:, 0]
    return dataframe.iloc[indices]

In [56]:
def negative_mine(model, event_name, smaple_size=30000000, print_size=True):
    with elapsed_timer() as elapser: 
        hits, cells, truth, particles = get_event(event_name)

        # Filter out un-used columns early
        hits = hits[['hit_id', 'x', 'y', 'z']]
        truth = truth[['particle_id', 'hit_id']]    


        # as_index=False so the group by retain the column name
        cell_by_hit_id = cells.groupby(['hit_id'], as_index=False)
        cell_count = cell_by_hit_id.value.count().rename(columns={'value':'cell_count'})
        charge_value = cell_by_hit_id.value.sum().rename(columns={'value':'charge_value'})

        # Scaling
        hits[['x', 'y', 'z']] /= 1000
        cell_count['cell_count'] /= 10

        features = pd.merge(truth, cell_count, on='hit_id')
        features = pd.merge(features, charge_value, on='hit_id')
        features = pd.merge(features, hits, on='hit_id')
        # The columns of truth are as follow
        # ['particle_id', 'hit_id', 'x', 'y', 'z', 'cell_count', 'charge_value']

        columns_needed = ['x', 'y', 'z', 'cell_count', 'charge_value']
        columns_needed_all = [c + '_x' for c in columns_needed] + [c + '_y' for c in columns_needed]

        p_id = features.particle_id.values
        # Generated random hit idx pairs
        i = np.random.randint(len(features), size=smaple_size)
        j = np.random.randint(len(features), size=smaple_size)
        # Get the hit idx pair that's either assoicated with particle id 0 or different particle id
        hit_idx = (p_id[i]==0) | (p_id[i]!=p_id[j])
        i, j = i[hit_idx], j[hit_idx]
        # Filter and create features with the correct order of the columns
        features = features[columns_needed]
        false_pairs = pd.DataFrame(
            np.hstack((features.values[i], features.values[j])),
            columns=columns_needed_all)

        before_size = len(false_pairs)
        false_pairs = predict_true(false_pairs, model_torch).reset_index(drop=True)
        false_pairs['label'] = 0
        after_size = len(false_pairs)
        if print_size:
            print(event_name)
            print('%.2fs - Before: %s, After: %s, Percent Pass: %d%%' % (elapser(), '{:,}'.format(before_size), '{:,}'.format(after_size), after_size/before_size*100))
        return false_pairs

In [58]:
def preprocess_h():
    event_rows_h = []
    for idx, i in enumerate(tqdm_notebook(range(10,20))):
        event_name = 'event0000010%02d' % i
        file_name = '%s.feather' % event_name
        processed_negative = negative_mine(model_torch, event_name)
        with elapsed_timer() as elapser:
            processed = feather_read(event_rows[idx][0]) # read the path from event_rows loaded
            processed = processed.append(processed_negative, ignore_index=True)
            processed = processed.sample(frac=1).reset_index(drop=True)
            print('Read, append and re-sample: %.2fs' % elapser())
        event_rows_h.append((file_name, len(processed.index)))
        processed.to_feather(file_name) # Save to disk
        print('saved %s' % file_name)

    pd.DataFrame(event_rows_h).to_csv(EVENT_SIZE_H_PATH, index=False)
    print('event rows h saved')
    return event_rows_h

# if you skip step2, you still need to run step1 to get training data.
if LOADING_MODEL:
    print('load model')
    model_torch.load_state_dict(torch.load('torch_model.pt'))

# Preprocess
if PRE_PROCESS_H:
    event_rows_h = preprocess_h()
else:
        print('load event rows hard')
        event_rows_h = list(pd.read_csv(LOADING_PREFIX + EVENT_SIZE_H_PATH).itertuples(index=False, name=None))
        event_rows_h = [(LOADING_PREFIX + r[0], r[1]) for r in event_rows_h]

load model


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

event000001010
21.89s - Before: 29,997,177, After: 240,187, Percent Pass: 0%
Read, append and re-sample: 2.12s
saved event000001010.feather
event000001011
21.70s - Before: 29,997,486, After: 235,823, Percent Pass: 0%
Read, append and re-sample: 2.68s
saved event000001011.feather
event000001012
22.27s - Before: 29,997,333, After: 230,943, Percent Pass: 0%
Read, append and re-sample: 2.47s
saved event000001012.feather
event000001013
22.28s - Before: 29,997,320, After: 232,271, Percent Pass: 0%
Read, append and re-sample: 2.61s
saved event000001013.feather
event000001014
22.12s - Before: 29,997,619, After: 235,950, Percent Pass: 0%
Read, append and re-sample: 2.92s
saved event000001014.feather
event000001015
21.91s - Before: 29,997,637, After: 237,265, Percent Pass: 0%
Read, append and re-sample: 2.89s
saved event000001015.feather
event000001016
21.73s - Before: 29,997,525, After: 232,235, Percent Pass: 0%
Read, append and re-sample: 2.66s
saved event000001016.feather
event000001017
22.36

In [60]:
if TRAIN_2:
    batch_size = 8000
    validation_split = .05 # 5%
    cache = FeatherCache(event_rows_h[::-1]) # invert to switch it up?
    dataset = FeatherDataset(cache, lambda rows : (rows[:, :-1], rows[:, -1].view(-1, 1)))
    loader_train, loader_val = create_loaders(dataset, batch_size, validation_split)

In [62]:
if TRAIN_2 and REDUCE_ON_PLATEAU:
    lr = -3
    optimizer = optim.Adam(model_torch.parameters(), lr=10**lr)
    criterion = nn.BCELoss()
    train_model(model_torch, optimizer, criterion, loader_train, loader_val, epochs=5, reduce_on_plateau=True)

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Epoch 0', max=5111, style=ProgressStyle(description_width='in…

124.55s - Epoch 0, Iteration 5110, loss = 0.0652, 2089755 / 2151692 correct (97.12 %)


HBox(children=(IntProgress(value=0, description='Epoch 1', max=5111, style=ProgressStyle(description_width='in…

122.31s - Epoch 1, Iteration 5110, loss = 0.0607, 2093197 / 2151692 correct (97.28 %)


HBox(children=(IntProgress(value=0, description='Epoch 2', max=5111, style=ProgressStyle(description_width='in…

121.31s - Epoch 2, Iteration 5110, loss = 0.0569, 2094933 / 2151692 correct (97.36 %)


HBox(children=(IntProgress(value=0, description='Epoch 3', max=5111, style=ProgressStyle(description_width='in…

122.89s - Epoch 3, Iteration 5110, loss = 0.0539, 2095947 / 2151692 correct (97.41 %)


HBox(children=(IntProgress(value=0, description='Epoch 4', max=5111, style=ProgressStyle(description_width='in…

121.67s - Epoch 4, Iteration 5110, loss = 0.0513, 2097183 / 2151692 correct (97.47 %)
Total time: 612.77s


In [64]:
if TRAIN_2 and SAVING:
    torch.save(model_torch.state_dict(), 'torch_model_h.pt')
if TRAIN_2:
    del loader_train
    del loader_val

Wow, we are done the training!