#Data processing
Get the electron and photon dataset into colab

In [1]:
import requests
import io
url='https://cernbox.cern.ch/remote.php/dav/public-files/FbXw3V4XNyYB3oA/SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5'
with requests.Session() as session:
    r = session.get(url, stream=True)
    r.raise_for_status()
    with open('SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5', 'wb') as hd5:
        for chunk in r.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE):
            hd5.write(chunk)

In [2]:
url='https://cernbox.cern.ch/remote.php/dav/public-files/AtBT8y4MiQYFcgc/SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5'
with requests.Session() as session:
    r = session.get(url, stream=True)
    r.raise_for_status()
    with open('SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5', 'wb') as hd5:
        for chunk in r.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE):
            hd5.write(chunk)

Importing all the necessary libraries to process and plot the dataset. As well as necessary pytorch libraries to train the model. 

In [1]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve

In [2]:
data_electron = h5py.File('/content/SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5')
data_photon = h5py.File('/content/SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5')

In [3]:
datasets = [data_electron, data_photon]
X = np.concatenate([data['/X'][:75000] for data in datasets])
y = np.concatenate([data['/y'][:75000] for data in datasets])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:
class particleDataset(Dataset):
  def __init__(self, X, y):
    self.X = X
    self.y = y
  def __len__(self):
    return len(self.X)
  def __getitem__(self, index):
    # Select sample
    image = self.X[index]

    X = self.transform(image)
    return X, self.y[index]

  transform = transforms.Compose([
        transforms.ToTensor()])

In [39]:
batch_size = 64
transformed_dataset = particleDataset(X_train, y_train)
trainloader = DataLoader(transformed_dataset, batch_size, shuffle=True, num_workers=3, pin_memory=True)
transformed_test_dataset = particleDataset(X_test, y_test)
testloader = DataLoader(transformed_test_dataset, batch_size, shuffle=True, num_workers=3, pin_memory=True)



In [40]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [43]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=2, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU(inplace=True)
        
        self.fc1 = nn.Linear(in_features=128 * 8 * 8, out_features=256)
        self.relu4 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(in_features=256, out_features=2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        
        x = x.view(-1, 128 * 8 * 8)
        
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)
        return x


In [48]:
from sklearn.metrics import accuracy_score, roc_auc_score

criterion = nn.CrossEntropyLoss()
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

# training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    total = 0
    correct = 0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        labels = labels.type(torch.LongTensor)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(F.softmax(outputs, dim=1), 1)   # get predicted class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[Epoch %d, Batch %5d] loss: %.3f, accuracy: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100, correct / total))
            running_loss = 0.0
            total = 0
            correct = 0
    accuracy = correct / total
    print('Epoch %d accuracy: %.3f' % (epoch + 1, accuracy))
    # evaluate on test set
    model.eval()
    test_predictions = []
    test_targets = []
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(F.softmax(outputs, dim=1), 1)
            test_predictions.extend(predicted.tolist())
            test_targets.extend(labels.tolist())
    test_accuracy = accuracy_score(test_targets, test_predictions)
    test_roc_auc = roc_auc_score(test_targets, test_predictions)
    print('Epoch %d test accuracy: %.3f, test ROC AUC score: %.3f' % (epoch + 1, test_accuracy, test_roc_auc))
filepath = "/content/modelModule.pt"
torch.save(model.state_dict(), filepath)



[Epoch 1, Batch   100] loss: 0.691, accuracy: 0.526
[Epoch 1, Batch   200] loss: 0.677, accuracy: 0.580
[Epoch 1, Batch   300] loss: 0.667, accuracy: 0.593
[Epoch 1, Batch   400] loss: 0.663, accuracy: 0.604
[Epoch 1, Batch   500] loss: 0.660, accuracy: 0.609
[Epoch 1, Batch   600] loss: 0.661, accuracy: 0.604
[Epoch 1, Batch   700] loss: 0.652, accuracy: 0.618
[Epoch 1, Batch   800] loss: 0.660, accuracy: 0.605
[Epoch 1, Batch   900] loss: 0.655, accuracy: 0.615
[Epoch 1, Batch  1000] loss: 0.650, accuracy: 0.621
[Epoch 1, Batch  1100] loss: 0.652, accuracy: 0.617
[Epoch 1, Batch  1200] loss: 0.654, accuracy: 0.618
[Epoch 1, Batch  1300] loss: 0.649, accuracy: 0.622
[Epoch 1, Batch  1400] loss: 0.645, accuracy: 0.628
[Epoch 1, Batch  1500] loss: 0.647, accuracy: 0.624
[Epoch 1, Batch  1600] loss: 0.647, accuracy: 0.626
[Epoch 1, Batch  1700] loss: 0.653, accuracy: 0.619
[Epoch 1, Batch  1800] loss: 0.645, accuracy: 0.624
Epoch 1 accuracy: 0.627
Epoch 1 test accuracy: 0.629, test ROC A



[Epoch 2, Batch   100] loss: 0.642, accuracy: 0.630
[Epoch 2, Batch   200] loss: 0.641, accuracy: 0.635
[Epoch 2, Batch   300] loss: 0.638, accuracy: 0.628
[Epoch 2, Batch   400] loss: 0.642, accuracy: 0.636
[Epoch 2, Batch   500] loss: 0.633, accuracy: 0.645
[Epoch 2, Batch   600] loss: 0.634, accuracy: 0.645
[Epoch 2, Batch   700] loss: 0.628, accuracy: 0.652
[Epoch 2, Batch   800] loss: 0.632, accuracy: 0.648
[Epoch 2, Batch   900] loss: 0.627, accuracy: 0.650
[Epoch 2, Batch  1000] loss: 0.625, accuracy: 0.650
[Epoch 2, Batch  1100] loss: 0.620, accuracy: 0.660
[Epoch 2, Batch  1200] loss: 0.628, accuracy: 0.649
[Epoch 2, Batch  1300] loss: 0.628, accuracy: 0.649
[Epoch 2, Batch  1400] loss: 0.624, accuracy: 0.655
[Epoch 2, Batch  1500] loss: 0.618, accuracy: 0.662
[Epoch 2, Batch  1600] loss: 0.621, accuracy: 0.657
[Epoch 2, Batch  1700] loss: 0.622, accuracy: 0.660
[Epoch 2, Batch  1800] loss: 0.610, accuracy: 0.670
Epoch 2 accuracy: 0.659
Epoch 2 test accuracy: 0.668, test ROC A



[Epoch 3, Batch   100] loss: 0.614, accuracy: 0.668
[Epoch 3, Batch   200] loss: 0.619, accuracy: 0.663
[Epoch 3, Batch   300] loss: 0.610, accuracy: 0.672
[Epoch 3, Batch   400] loss: 0.612, accuracy: 0.674
[Epoch 3, Batch   500] loss: 0.609, accuracy: 0.674
[Epoch 3, Batch   600] loss: 0.605, accuracy: 0.678
[Epoch 3, Batch   700] loss: 0.604, accuracy: 0.682
[Epoch 3, Batch   800] loss: 0.610, accuracy: 0.677
[Epoch 3, Batch   900] loss: 0.611, accuracy: 0.671
[Epoch 3, Batch  1000] loss: 0.607, accuracy: 0.672
[Epoch 3, Batch  1100] loss: 0.607, accuracy: 0.677
[Epoch 3, Batch  1200] loss: 0.605, accuracy: 0.682
[Epoch 3, Batch  1300] loss: 0.614, accuracy: 0.669
[Epoch 3, Batch  1400] loss: 0.608, accuracy: 0.679
[Epoch 3, Batch  1500] loss: 0.605, accuracy: 0.682
[Epoch 3, Batch  1600] loss: 0.604, accuracy: 0.681
[Epoch 3, Batch  1700] loss: 0.600, accuracy: 0.687
[Epoch 3, Batch  1800] loss: 0.599, accuracy: 0.691
Epoch 3 accuracy: 0.675
Epoch 3 test accuracy: 0.691, test ROC A



[Epoch 4, Batch   100] loss: 0.604, accuracy: 0.677
[Epoch 4, Batch   200] loss: 0.606, accuracy: 0.680
[Epoch 4, Batch   300] loss: 0.598, accuracy: 0.681
[Epoch 4, Batch   400] loss: 0.595, accuracy: 0.693
[Epoch 4, Batch   500] loss: 0.588, accuracy: 0.695
[Epoch 4, Batch   600] loss: 0.603, accuracy: 0.682
[Epoch 4, Batch   700] loss: 0.602, accuracy: 0.684
[Epoch 4, Batch   800] loss: 0.599, accuracy: 0.680
[Epoch 4, Batch   900] loss: 0.592, accuracy: 0.688
[Epoch 4, Batch  1000] loss: 0.600, accuracy: 0.683
[Epoch 4, Batch  1100] loss: 0.591, accuracy: 0.697
[Epoch 4, Batch  1200] loss: 0.600, accuracy: 0.684
[Epoch 4, Batch  1300] loss: 0.592, accuracy: 0.692
[Epoch 4, Batch  1400] loss: 0.597, accuracy: 0.691
[Epoch 4, Batch  1500] loss: 0.587, accuracy: 0.695
[Epoch 4, Batch  1600] loss: 0.601, accuracy: 0.680
[Epoch 4, Batch  1700] loss: 0.600, accuracy: 0.690
[Epoch 4, Batch  1800] loss: 0.597, accuracy: 0.690
Epoch 4 accuracy: 0.686
Epoch 4 test accuracy: 0.691, test ROC A



[Epoch 5, Batch   100] loss: 0.585, accuracy: 0.700
[Epoch 5, Batch   200] loss: 0.595, accuracy: 0.690
[Epoch 5, Batch   300] loss: 0.597, accuracy: 0.686
[Epoch 5, Batch   400] loss: 0.599, accuracy: 0.678
[Epoch 5, Batch   500] loss: 0.591, accuracy: 0.692
[Epoch 5, Batch   600] loss: 0.582, accuracy: 0.698
[Epoch 5, Batch   700] loss: 0.584, accuracy: 0.703
[Epoch 5, Batch   800] loss: 0.600, accuracy: 0.685
[Epoch 5, Batch   900] loss: 0.595, accuracy: 0.687
[Epoch 5, Batch  1000] loss: 0.600, accuracy: 0.677
[Epoch 5, Batch  1100] loss: 0.587, accuracy: 0.696
[Epoch 5, Batch  1200] loss: 0.591, accuracy: 0.691
[Epoch 5, Batch  1300] loss: 0.584, accuracy: 0.695
[Epoch 5, Batch  1400] loss: 0.591, accuracy: 0.696
[Epoch 5, Batch  1500] loss: 0.595, accuracy: 0.690
[Epoch 5, Batch  1600] loss: 0.581, accuracy: 0.703
[Epoch 5, Batch  1700] loss: 0.586, accuracy: 0.695
[Epoch 5, Batch  1800] loss: 0.586, accuracy: 0.696
Epoch 5 accuracy: 0.688
Epoch 5 test accuracy: 0.699, test ROC A



[Epoch 6, Batch   100] loss: 0.581, accuracy: 0.699
[Epoch 6, Batch   200] loss: 0.582, accuracy: 0.699
[Epoch 6, Batch   300] loss: 0.589, accuracy: 0.691
[Epoch 6, Batch   400] loss: 0.593, accuracy: 0.688
[Epoch 6, Batch   500] loss: 0.589, accuracy: 0.694
[Epoch 6, Batch   600] loss: 0.583, accuracy: 0.701
[Epoch 6, Batch   700] loss: 0.584, accuracy: 0.700
[Epoch 6, Batch   800] loss: 0.584, accuracy: 0.698
[Epoch 6, Batch   900] loss: 0.582, accuracy: 0.697
[Epoch 6, Batch  1000] loss: 0.585, accuracy: 0.698
[Epoch 6, Batch  1100] loss: 0.590, accuracy: 0.690
[Epoch 6, Batch  1200] loss: 0.586, accuracy: 0.697
[Epoch 6, Batch  1300] loss: 0.568, accuracy: 0.714
[Epoch 6, Batch  1400] loss: 0.575, accuracy: 0.708
[Epoch 6, Batch  1500] loss: 0.580, accuracy: 0.707
[Epoch 6, Batch  1600] loss: 0.587, accuracy: 0.699
[Epoch 6, Batch  1700] loss: 0.583, accuracy: 0.700
[Epoch 6, Batch  1800] loss: 0.580, accuracy: 0.698
Epoch 6 accuracy: 0.685
Epoch 6 test accuracy: 0.688, test ROC A



[Epoch 7, Batch   100] loss: 0.588, accuracy: 0.692
[Epoch 7, Batch   200] loss: 0.579, accuracy: 0.701
[Epoch 7, Batch   300] loss: 0.585, accuracy: 0.699
[Epoch 7, Batch   400] loss: 0.585, accuracy: 0.697
[Epoch 7, Batch   500] loss: 0.582, accuracy: 0.699
[Epoch 7, Batch   600] loss: 0.562, accuracy: 0.717
[Epoch 7, Batch   700] loss: 0.569, accuracy: 0.710
[Epoch 7, Batch   800] loss: 0.578, accuracy: 0.704
[Epoch 7, Batch   900] loss: 0.579, accuracy: 0.706
[Epoch 7, Batch  1000] loss: 0.585, accuracy: 0.699
[Epoch 7, Batch  1100] loss: 0.582, accuracy: 0.698
[Epoch 7, Batch  1200] loss: 0.580, accuracy: 0.700
[Epoch 7, Batch  1300] loss: 0.577, accuracy: 0.705
[Epoch 7, Batch  1400] loss: 0.572, accuracy: 0.713
[Epoch 7, Batch  1500] loss: 0.578, accuracy: 0.695
[Epoch 7, Batch  1600] loss: 0.583, accuracy: 0.700
[Epoch 7, Batch  1700] loss: 0.576, accuracy: 0.703
[Epoch 7, Batch  1800] loss: 0.589, accuracy: 0.693
Epoch 7 accuracy: 0.697
Epoch 7 test accuracy: 0.703, test ROC A



[Epoch 8, Batch   100] loss: 0.563, accuracy: 0.719
[Epoch 8, Batch   200] loss: 0.579, accuracy: 0.701
[Epoch 8, Batch   300] loss: 0.571, accuracy: 0.711
[Epoch 8, Batch   400] loss: 0.576, accuracy: 0.704
[Epoch 8, Batch   500] loss: 0.570, accuracy: 0.710
[Epoch 8, Batch   600] loss: 0.582, accuracy: 0.701
[Epoch 8, Batch   700] loss: 0.572, accuracy: 0.700
[Epoch 8, Batch   800] loss: 0.577, accuracy: 0.707
[Epoch 8, Batch   900] loss: 0.573, accuracy: 0.711
[Epoch 8, Batch  1000] loss: 0.572, accuracy: 0.707
[Epoch 8, Batch  1100] loss: 0.580, accuracy: 0.703
[Epoch 8, Batch  1200] loss: 0.580, accuracy: 0.702
[Epoch 8, Batch  1300] loss: 0.571, accuracy: 0.708
[Epoch 8, Batch  1400] loss: 0.578, accuracy: 0.706
[Epoch 8, Batch  1500] loss: 0.581, accuracy: 0.697
[Epoch 8, Batch  1600] loss: 0.581, accuracy: 0.709
[Epoch 8, Batch  1700] loss: 0.576, accuracy: 0.706
[Epoch 8, Batch  1800] loss: 0.581, accuracy: 0.703
Epoch 8 accuracy: 0.700
Epoch 8 test accuracy: 0.709, test ROC A



[Epoch 9, Batch   100] loss: 0.560, accuracy: 0.719
[Epoch 9, Batch   200] loss: 0.576, accuracy: 0.704
[Epoch 9, Batch   300] loss: 0.576, accuracy: 0.702
[Epoch 9, Batch   400] loss: 0.576, accuracy: 0.709
[Epoch 9, Batch   500] loss: 0.570, accuracy: 0.715
[Epoch 9, Batch   600] loss: 0.571, accuracy: 0.709
[Epoch 9, Batch   700] loss: 0.581, accuracy: 0.700
[Epoch 9, Batch   800] loss: 0.567, accuracy: 0.714
[Epoch 9, Batch   900] loss: 0.575, accuracy: 0.714
[Epoch 9, Batch  1000] loss: 0.575, accuracy: 0.705
[Epoch 9, Batch  1100] loss: 0.568, accuracy: 0.713
[Epoch 9, Batch  1200] loss: 0.576, accuracy: 0.701
[Epoch 9, Batch  1300] loss: 0.572, accuracy: 0.709
[Epoch 9, Batch  1400] loss: 0.568, accuracy: 0.714
[Epoch 9, Batch  1500] loss: 0.575, accuracy: 0.699
[Epoch 9, Batch  1600] loss: 0.574, accuracy: 0.705
[Epoch 9, Batch  1700] loss: 0.568, accuracy: 0.713
[Epoch 9, Batch  1800] loss: 0.567, accuracy: 0.712
Epoch 9 accuracy: 0.708
Epoch 9 test accuracy: 0.706, test ROC A



[Epoch 10, Batch   100] loss: 0.567, accuracy: 0.708
[Epoch 10, Batch   200] loss: 0.567, accuracy: 0.712
[Epoch 10, Batch   300] loss: 0.563, accuracy: 0.718
[Epoch 10, Batch   400] loss: 0.569, accuracy: 0.715
[Epoch 10, Batch   500] loss: 0.571, accuracy: 0.707
[Epoch 10, Batch   600] loss: 0.570, accuracy: 0.707
[Epoch 10, Batch   700] loss: 0.573, accuracy: 0.708
[Epoch 10, Batch   800] loss: 0.568, accuracy: 0.708
[Epoch 10, Batch   900] loss: 0.566, accuracy: 0.716
[Epoch 10, Batch  1000] loss: 0.566, accuracy: 0.706
[Epoch 10, Batch  1100] loss: 0.570, accuracy: 0.708
[Epoch 10, Batch  1200] loss: 0.569, accuracy: 0.713
[Epoch 10, Batch  1300] loss: 0.579, accuracy: 0.698
[Epoch 10, Batch  1400] loss: 0.571, accuracy: 0.709
[Epoch 10, Batch  1500] loss: 0.572, accuracy: 0.705
[Epoch 10, Batch  1600] loss: 0.573, accuracy: 0.707
[Epoch 10, Batch  1700] loss: 0.570, accuracy: 0.712
[Epoch 10, Batch  1800] loss: 0.575, accuracy: 0.709
Epoch 10 accuracy: 0.715
Epoch 10 test accurac