In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import h5py
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
file_electron = "SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5"
file_proton = "SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5"


with h5py.File(file_electron, "r") as f1:
    X_elec = np.array(f1['X'][:])
    y_elec = np.array(f1['y'][:])
with h5py.File(file_proton, "r") as f2:
    X_prot = np.array(f2['X'][:])
    y_prot = np.array(f2['y'][:])

In [3]:
print(X_elec.shape)
print(X_prot.shape)

(249000, 32, 32, 2)
(249000, 32, 32, 2)


In [4]:
X = np.append(X_elec, X_prot, axis=0)
y = np.append(y_elec, y_prot)
X.shape

(498000, 32, 32, 2)

In [5]:
X = np.swapaxes(X, 3,1)
X.shape

(498000, 2, 32, 32)

In [6]:
y = torch.as_tensor(y)

In [7]:
X = torch.from_numpy(X)

In [8]:
dataset = torch.utils.data.TensorDataset(X, y)

In [10]:
train_data,val_data = torch.utils.data.random_split(dataset,[int(len(dataset)*0.9),int(len(dataset)*0.1)])

In [11]:
trainset = torch.utils.data.DataLoader(train_data, batch_size=256, shuffle=True, num_workers=4, pin_memory=True)
valset = torch.utils.data.DataLoader(val_data, batch_size=256, shuffle=True, num_workers=4, pin_memory=True)

In [12]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=2, out_channels=16, kernel_size=2, padding='same')
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 16 * 16, 64)
        self.out = nn.Linear(64,2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.out(x))
        return x

In [13]:
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()

In [14]:
from sklearn.metrics import roc_auc_score
for epoch in range(100):
    running_loss = 0.0
    val_loss = 0.0
    all_y_true = torch.zeros(0, dtype=torch.long, device='cuda')
    all_y_pred = torch.zeros(0, dtype=torch.long, device='cuda')
    all_y_true_val = torch.zeros(0, dtype=torch.long, device='cuda')
    all_y_pred_val = torch.zeros(0, dtype=torch.long, device='cuda')
    
    
    
    for i, data in enumerate(trainset, 0):
        X, y = data
        if torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
        optimizer.zero_grad()
        outputs = model(X)
        preds = outputs
        preds = preds.detach()
        _,preds = torch.max(preds, 1)
        all_y_true=torch.cat((all_y_true,y.view(-1)))
        all_y_pred=torch.cat((all_y_pred,preds.view(-1)))
        loss = criterion(outputs, y.long())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i%500 == 499:
            auc_score = roc_auc_score(all_y_true.cpu().numpy(), all_y_pred.cpu().numpy())
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 500:.3f} auc_score_train: {auc_score}')
            running_loss = 0.0
    
    with torch.no_grad():   
        for j, data in enumerate(valset,0):
            X_val,y_val = data
            if torch.cuda.is_available():
                X_val = X_val.cuda()
                y_val = y_val.cuda()
            val_outputs = model(X_val)
            loss_val = criterion(val_outputs, y_val.long())
            _,val_preds = torch.max(val_outputs, 1)
            all_y_true_val=torch.cat((all_y_true_val,y_val.view(-1)))
            all_y_pred_val=torch.cat((all_y_pred_val,val_preds.view(-1)))
            val_loss += loss_val.item()
    auc_score_val = roc_auc_score(all_y_true_val.cpu().numpy(), all_y_pred_val.cpu().numpy())
    print(f'val_loss: {val_loss/j:.3f} auc_score_val: {auc_score_val}')

[1,   500] loss: 0.689 auc_score_train: 0.5446973756680915
[1,  1000] loss: 0.677 auc_score_train: 0.5639866795837672
[1,  1500] loss: 0.668 auc_score_train: 0.5753346533364443
val_loss: 0.670 auc_score_val: 0.5964844172453844
[2,   500] loss: 0.665 auc_score_train: 0.6036049964386778
[2,  1000] loss: 0.663 auc_score_train: 0.6057784551359567
[2,  1500] loss: 0.661 auc_score_train: 0.6075812348264504
val_loss: 0.666 auc_score_val: 0.6106017600960504
[3,   500] loss: 0.660 auc_score_train: 0.6112778021791709
[3,  1000] loss: 0.660 auc_score_train: 0.6123604498275471
[3,  1500] loss: 0.658 auc_score_train: 0.6136197520054314
val_loss: 0.663 auc_score_val: 0.6149604132033533
[4,   500] loss: 0.658 auc_score_train: 0.6158627827419874
[4,  1000] loss: 0.658 auc_score_train: 0.615840777345405
[4,  1500] loss: 0.656 auc_score_train: 0.6168807770981042
val_loss: 0.661 auc_score_val: 0.6186998513441502
[5,   500] loss: 0.656 auc_score_train: 0.6184601164085904
[5,  1000] loss: 0.654 auc_score_t

As seen from the epoch metrics, we got:

Train AUC Score: 0.74

Validation AUC Score: 0.71