In [2]:
import torch
import torchvision
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import time
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

data_path = "../data/histograms/"

In [3]:
#variables
VAL_PCT = 0.1
N_EVENTS = 3000
EPOCHS = 100
CUDA = torch.device('cuda') 
MODEL_NAME = "signal_vs_bg-" + str(int(time.time()))
print(MODEL_NAME)

signal_vs_bg-1643290944


In [5]:
def load_pickle_file(path, file_name):
    return pickle.load(open(path + file_name, 'rb') )

class Net(nn.Module):
    def __init__(self): #initialises class Net
        super().__init__() #initialises inherited class nn.Module
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)
        self.pool1 = nn.MaxPool2d((2, 2))
        self.pool2 = nn.MaxPool2d((2, 2))
        self.pool3 = nn.MaxPool2d((2, 2))
# commenting out fc layers, replace value with our output
        #self.fc1 = nn.Linear(value, 512)
        self.fc1 = nn.Linear(4608, 512)
        self.fc2 = nn.Linear(512, 2)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        x = x.flatten(start_dim=1) # flattening out
        #print(x.shape) # printing the shape of the flattened output, replace "value" above with this value
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)


class CalorimeterDataset(Dataset):
    def __init__(self, images, labels):
            self.img_labels = labels
            self.images = images
            
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.img_labels[idx]
        return image, label


In [6]:
def load_data(input_files):
    val_size = int(N_EVENTS*VAL_PCT)
    train_size = int(N_EVENTS*(1-VAL_PCT))
    
    ECal_test = torch.tensor(list(load_pickle_file(data_path, input_files[0])[:-val_size]))).unsqueeze(3).float().cuda()
    labels_test = (torch.tensor(([[1, 0]]*train_size + [[0, 1]]*train_size)).cuda()).to(torch.float32)

    ECal_train = torch.cat([torch.tensor(list(load_pickle_file(data_path, input_files[0])[-val_size:])), 
                  torch.tensor(list(load_pickle_file(data_path, input_files[1])[-val_size:]))] ).unsqueeze(3).float().cuda()
    labels_train = (torch.tensor(([[1, 0]]*val_size + [[0, 1]]*val_size)).cuda()).to(torch.float32)
    
    train_dataset = CalorimeterDataset(ECal_train, labels_train)
    test_dataset = CalorimeterDataset(ECal_test, labels_test)
    
    return train_dataset, test_dataset

In [None]:
#load data
traindata, testdata = load_data(['ECAL_squark_inclusive_80x80_no_sat_level.pkl', 'ECAL_Z_vv_jj_80x80_no_sat_level.pkl'])