## First Attempt at building a deepsets architecture

In [1]:
import random

import pandas as pd
import numpy as np
import pickle
from set_dataloader import CCD
import time

# Import NN Packages
import torch
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing, metrics
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import seaborn as sns
import matplotlib.pyplot as plt

print(torch.__version__)


## Building a deepsets architecture


## Writing Dataset Utility to pass data in the right format

#### Steps:

1. Build a NN sampling an equal number of CCDs per 256 pixel and pass through deep sets for regression

2. Adapt NN for variable sized inputs

3. Adapt NN to use 64 inputs of size 2048 to then predict density at 256


In [61]:
# noinspection PyAttributeOutsideInit
class SetSequence(Dataset):
    """Processes and Returns a Dataset of Variable Sized Input Sets of Dimensions
    N = Number Pixels of that are returned
    M = Max Size of each Individual Set of CCDs


    """

    def __init__(self, num_pixels=10, max_ccds=30, var_set_len=False):

        with open('../../bricks_data/pixel2ccd_256_non_inclusive.pickle', 'rb') as f:
            self.pixel2ccd_dict = pickle.load(f)
            f.close()

        self.ccd = CCD()
        self.num_features = self.ccd.num_features

        # Dimensions
        self.num_pixels = num_pixels
        self.max_ccds = max_ccds
        self.var_set_len = var_set_len

        df_raw = pd.read_csv('../../bricks_data/dataset_geometric.csv')
        # Randomly Sampling Pixel Indices from Dataframe
        pixel_indices = random.sample(range(len(df_raw)), num_pixels)

        self.df = df_raw.iloc[pixel_indices]
        self.pix_ids = self.df.pixel_id.to_numpy()

        self.initialise_inputs()

        self.initialise_lengths()

        # Target
        self.label = np.random.rand(self.num_pixels * self.max_ccds)

        # Mask Variable Len Sets
        #self.set_max_set_len()

    def set_targets(self, gal_type):
        # Features and inputs:
        self.target = None
        self.target = self.df[gal_type].to_numpy()
        #print(self.target.shape)
        self.scaler_out = preprocessing.MinMaxScaler()
        self.target = self.scaler_out.fit_transform(self.target.reshape(-1, 1))
        #print(self.target.shape)

    def initialise_lengths(self):
        self.lengths = np.zeros(self.num_pixels, dtype=int)
        if self.var_set_len:
            for i, pix in enumerate(self.pix_ids):
                c = len(self.pixel2ccd_dict[pix])
                if c < self.max_ccds:
                    self.lengths[i] = c
                else:
                    self.lengths[i] = self.max_ccds

        else:
            self.lengths.fill(self.max_ccds)

    def initialise_inputs(self):
        #self.input = -1 * np.ones((self.num_pixels, self.max_ccds, self.num_features))
        self.input = np.zeros((self.num_pixels, self.max_ccds, self.num_features))

        # Iterate through the pixels
        for i, pix in enumerate(self.pix_ids):
            ids = self.pixel2ccd_dict[pix]
            random.shuffle(ids)
            #print(len(ids))
            ids = ids[:self.max_ccds]
            #print(len(ids))
            #print()
            x = self.ccd.get_ccds(ids)
            # Iterate through the CCDs for every pixel
            for j in range(len(ids)):
                self.input[i, j] = x[j]

    def set_max_set_len(self):
        self.index_matrix = -1 * np.ones((self.num_pixels, self.max_ccds), dtype=int)

        # Getting random labels for now, in the future this will be the output densities

        m = 0
        for i in range(self.num_pixels):

            for j in range(self.lengths[i]):
                ''' This code with label == 0 is not yet needed, but this masking will become necessary when I have
                    I have 64 subpixels per pixel and some of those are not covered by CCDs'''
                while self.label[m] == 0:
                    m += 1
                self.index_matrix[i, j] = m
                m += 1

        print(self.lengths)
        print(self.index_matrix)

    def __len__(self):
        return self.num_pixels

    def __getitem__(self, idx):
        x = torch.from_numpy(self.input[idx]).float()
        #x = x.unsqueeze(0)
        y = torch.tensor(self.target[idx, 0]).float()
        #print(y.shape)
        y = y.unsqueeze(-1)
        #print(y.shape)

        #l = torch.tensor(self.lengths[idx])
        l = self.lengths[idx]

        return x, y, l


""" Todo
1. Where to get the data from
2. Scaling --> import an already scaled dataset, this will have to be prepared but should be same for Neural Net
3. Combine larger and smaller dataset
4. Build 64 input channels instead of one, so one more dimension of tensors( NO of Pixels,no_of_subpixels,no_ccds, no_features)
"""

' Todo\n1. Where to get the data from\n2. Scaling --> import an already scaled dataset, this will have to be prepared but should be same for Neural Net\n3. Combine larger and smaller dataset\n4. Build 64 input channels instead of one, so one more dimension of tensors( NO of Pixels,no_of_subpixels,no_ccds, no_features)\n'

In [22]:
traindata = SetSequence(var_set_len=True)
traindata.set_targets('lrg')
x, y, l = traindata.__getitem__(3)
print(l)
print(y)
print(y.shape)



(10,)
(10, 1)
torch.Size([])
torch.Size([1])
9
tensor([1.])
torch.Size([1])


In [88]:
test = Test(num_pixels=1, max_ccds=5, ccd=ccd, p2p_dict=pixel2subpixel_dict, subpix_dict=subpixel2ccd_dict,
            var_set_len=True)

Trying to Build a Network Capable of Processing 64 Subpixels Simultaneously

In [8]:
ccd = CCD()

In [None]:
num_pixels = 1
max_ccds = 5
num_features = 9
#df_raw = pd.read_csv('../../bricks_data/dataset_geometric.csv')
# Randomly Sampling Pixel Indices from Dataframe
#pixel_indices = random.sample(range(len(df_raw)), num_pixels)
pix_ids = [1]
pixel2subpixel_dict = {1:[11,12,13,14], 2:[21,22,23,24]}
subpixel2ccd_dict = {11:[111,112,113,114,115],12:[121,122,123,124,125], 13:[131,132,133,134,135], 14:[141,142,143,144,145],
                     21:[211,212,213,214,215],22:[221,222,223,224,225], 23:[231,232,233,234,235], 24:[241,242,243,244,245]}
#df = df_raw.iloc[pixel_indices]
#pix_ids = df.pixel_id.to_numpy()

In [7]:
#self.input = -1 * np.ones((self.num_pixels, self.max_ccds, self.num_features))
input = np.zeros((num_pixels, 4, max_ccds, num_features))
# Iterate through the pixels
print("Pixids", pix_ids)
for i, pix in enumerate(pix_ids):

    subpix_ids = pixel2subpixel_dict[pix]
    subpix_ids = subpix_ids[:3]

    for j, subpix in enumerate(subpix_ids):
        ids = subpixel2ccd_dict[subpix]
        random.shuffle(ids)
        #print(len(ids))
        ids = ids[:max_ccds]
        print(ids)
        #print()
        x = ccd.get_ccds(ids)
        print(len(ids))
        # Iterate through the CCDs for every pixel
        for j in range(len(ids)):
            input[i, j] = x[j]

Pixids [1]
[113, 112, 114, 111, 115]


NameError: name 'ccd' is not defined

In [None]:
print(input.shape)
print(input)

## Building the Actual Network Architecture



In [66]:
from deepset_layers import InvLinear
class SetNet(nn.Module):
    def __init__(self, n_features=5, n_output=3, reduction='sum'):
        super(SetNet, self).__init__()

        # Takes an Input Tensor and applies transformations to last layer --> features
        # Output of Feature Layer: Tensor with Max.CCDs elements, which can now be passed to Set Layer
        self.feature_extractor = nn.Sequential(
            nn.Linear(n_features, 7),
            nn.ReLU(inplace=True),
            nn.Linear(7, 5),
            nn.ReLU(inplace=True),
            nn.Linear(5, n_output),
            nn.ReLU(inplace=True)
        )

        self.adder = InvLinear(3, 1, reduction=reduction, bias=True)

        # Invariant Layer Influenced by Code from DPernes, but adapted for the current regression task instead of CNN

    def forward(self, X, mask=None):
        y = self.feature_extractor(X)

        y = self.adder(y, mask=mask)
        return y


In [18]:
traindata = SetSequence(var_set_len=True)
traindata.set_targets('lrg')
x, y, l = traindata.__getitem__(3)
print(l)
print(y)
print(y.shape)

(10,)
(10, 1)
9
tensor([[0.5417]])
torch.Size([1, 1])


In [78]:
net = SetNet()
y = net.forward(x)
print(y)


INVLAYER: torch.Size([1, 30, 3])
tensor([[-8.7350]], grad_fn=<AddBackward0>)


In [80]:
#Work out masking logic
device = 'cpu'

x = x.to(device)
y = y.to(device)
l = l.to(device)


def get_mask(sizes, max_size):
    return (torch.arange(max_size).reshape(1, -1).to(sizes.device) < sizes.reshape(-1, 1))


mask = get_mask(l, x.shape[1])
print(mask)

tensor([[ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
          True,  True,  True, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False]])


###  Hyperparameters and Training Loops

In [33]:
# Defining Loss
criterion = nn.MSELoss()

#Defining Hyperparemeters
no_epochs = 100  #very low, but computational power not sufficient for more iterations
batch = 4
learning_rate = 0.001

#Using the Adam Method for Stochastic Optimisation
#optimiser = optim.Adam(model.parameters(), lr=learning_rate)

galaxy_types = ['lrg', 'elg', 'qso']
device = 'cpu'


def get_mask(sizes, max_size):
    return (torch.arange(max_size).reshape(1, -1).to(sizes.device) < sizes.reshape(-1, 1))


In [34]:
traindata = SetSequence(num_pixels=1000, var_set_len=True)

In [35]:
for gal in galaxy_types:
    model = SetNet(n_features=traindata.num_features, reduction='max').to(device)
    optimiser = optim.Adam(model.parameters(), lr=learning_rate)
    print("GALAXY TYPE: ", gal)
    print()
    traindata.set_targets(gal_type=gal)

    time_start = time.time()

    for epoch in range(no_epochs):
        loss_per_epoch = 0
        #loading the training data from trainset and shuffling for each epoch
        trainloader = torch.utils.data.DataLoader(traindata, batch_size=batch, shuffle=True)

        for i, (X, labels, set_sizes) in enumerate(trainloader):
            #Put Model into train mode
            model.train()

            #Extract inputs and associated labels from dataloader batch
            X = X.to(device)

            labels = labels.to(device)
            set_sizes = set_sizes.to(device)

            mask = get_mask(set_sizes, X.shape[1])

            #Predict outputs (forward pass)

            predictions = model(X, mask=mask)

            #Compute Loss
            loss = criterion(predictions, labels)

            #Zero-out the gradients before backward pass (pytorch stores the gradients)
            optimiser.zero_grad()
            #Backpropagation
            loss.backward()
            #Perform one step of gradient descent
            optimiser.step()
            #Append loss to the general loss for this one epoch
            loss_per_epoch += loss.item()

        if epoch % 10 == 0:
            print("Loss for Epoch", epoch, ": ", loss_per_epoch)

    time_end = time.time()
    time_passed = time_end - time_start
    print()
    print(f"{time_passed / 60:.5} minutes ({time_passed:.3} seconds) taken to train the model")
    print()

GALAXY TYPE:  lrg

Loss for Epoch 0 :  2942.1800325997174
Loss for Epoch 10 :  3.680534098879434
Loss for Epoch 20 :  2.884592419693945
Loss for Epoch 30 :  2.765893154341029
Loss for Epoch 40 :  2.5631455073598772
Loss for Epoch 50 :  2.684168670588406
Loss for Epoch 60 :  2.5554687872645445
Loss for Epoch 70 :  2.4053838056570385
Loss for Epoch 80 :  2.4015086796716787
Loss for Epoch 90 :  2.399972525483463

0.39641 minutes (23.8 seconds) taken to train the model

GALAXY TYPE:  elg

Loss for Epoch 0 :  27871.49012487009
Loss for Epoch 10 :  8.458283477695659
Loss for Epoch 20 :  4.994630630942993
Loss for Epoch 30 :  4.833354047266766
Loss for Epoch 40 :  4.84361100976821
Loss for Epoch 50 :  4.83717059326591
Loss for Epoch 60 :  4.820220751920715
Loss for Epoch 70 :  4.846069120801985
Loss for Epoch 80 :  4.8225392025779
Loss for Epoch 90 :  4.808886545091809

0.42237 minutes (25.3 seconds) taken to train the model

GALAXY TYPE:  qso

Loss for Epoch 0 :  14189.960423341021
Loss for 