### First attempt at building a Neural Network to learn a non-linear F(s)


In [57]:
import torch
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing
from torch.utils.data import DataLoader, Dataset
from torch.nn import functional as F
import torch.optim as optim
import time


print(torch.__version__)

1.9.0


In [58]:
#Preprocess Data
df = pd.read_csv('../bricks_data/dataset_geometric.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 268046 entries, 0 to 268045
Data columns (total 20 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   lrg_density        268046 non-null  float64
 1   elg_density        268046 non-null  float64
 2   qso_density        268046 non-null  float64
 3   stellar_density    268046 non-null  float64
 4   airmass_galaxy     268046 non-null  float64
 5   fwhm_galaxy        268046 non-null  float64
 6   ebv_galaxy         268046 non-null  float64
 7   ccdnphotom_galaxy  268046 non-null  float64
 8   ccdskysb_galaxy_g  268046 non-null  float64
 9   ccdskysb_galaxy_r  268046 non-null  float64
 10  ccdskysb_galaxy_z  268046 non-null  float64
 11  exptime_galaxy_g   268046 non-null  float64
 12  exptime_galaxy_r   268046 non-null  float64
 13  exptime_galaxy_z   268046 non-null  float64
 14  meansky_galaxy_g   268046 non-null  float64
 15  meansky_galaxy_r   268046 non-null  float64
 16  me

### Defining The Dataset Class Inheriting from Torch.dataset to be able to use a dataloader for training

In [76]:
class DensitySurvey(Dataset):
    def __init__(self, df, galaxy_type):
        self.data = df[0:1000]
        # Extracting Targets and Input
        if galaxy_type == "LRG":
            self.target = self.data['lrg_density'].to_numpy(copy=True)
        if galaxy_type == "ELG":
            self.target = self.data['elg_density'].to_numpy(copy=True)
        if galaxy_type == "QSO":
            self.target = self.data['qso_density'].to_numpy(copy=True)
        self.input = self.data.drop(columns=['lrg_density','elg_density','qso_density']).to_numpy(copy=True)

        # Scaling
        scaler = preprocessing.MinMaxScaler()
        self.input = scaler.fit_transform(self.input)
        self.target = scaler.fit_transform(self.target.reshape(-1, 1))
        print(self.input.shape)
        print(self.target.shape)


    def __len__(self):
        return len(self.target)

    def __getitem__(self, idx):
        return torch.from_numpy(self.input[idx]).float(), torch.tensor(self.target[idx]).float()


In [77]:
df = pd.read_csv('../bricks_data/dataset_geometric.csv')
train_df, test_df = train_test_split(df, test_size=0.33, random_state=44, shuffle=True)
traindata = DensitySurvey(train_df, 'LRG')
testdata = DensitySurvey(test_df, 'LRG')

(1000, 17)
(1000, 1)
(1000, 17)
(1000, 1)


In [78]:

print(traindata.__len__())
print(testdata.__len__())

x,y = traindata.__getitem__(3)

print(x.dtype, y.dtype)

1000
1000
torch.float32 torch.float32


### Define Model and Hyperparameters



In [61]:
class Net(nn.Module):
    def __init__(self, n_feature = 17, n_hidden = 10, n_output = 1):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(n_feature,n_hidden)
        #self.fc2 = nn.Linear(n_hidden,n_hidden)
        self.predict = nn.Linear(n_hidden,n_output)

    def forward(self,x):
        out = F.relu(self.fc1(x))
        out = self.predict(out)
        return out

device = 'cpu'

model = Net().to(device)



In [62]:
# Defining Loss
criterion = nn.MSELoss()

#Defining Hyperparemeters
no_epochs = 500 #very low, but computational power not sufficient for more iterations
batch = 1024
learning_rate = 0.001

#Using the Adam Method for Stochastic Optimisation
optimiser = optim.Adam(model.parameters(), lr=learning_rate)

In [79]:
time_start = time.time()

for epoch in range(no_epochs):
    loss_per_epoch = 0

    #loading the training data from trainset and shuffling for each epoch
    trainloader = torch.utils.data.DataLoader(traindata, batch_size=batch, shuffle = True)

    for i, batch_no in enumerate(trainloader, 0):

        #Put Model into train mode
        model.train()

        #Extract inputs and associated labels from dataloader batch
        inputs = batch_no[0].to(device)
        labels = batch_no[1].to(device)

        #Zero-out the gradients before backward pass (pytorch stores the gradients)
        optimiser.zero_grad()

        #Predict outputs (forward pass)
        predictions =  model(inputs)

        #Compute Loss
        loss = criterion(predictions, labels)

        #Backpropagation
        loss.backward()

        #Perform one step of gradient descent
        optimiser.step()

        #Append loss to the general loss for this one epoch
        loss_per_epoch += loss.item()
    if epoch % 10 == 0:
        print("Loss for Epoch", epoch, ": ", loss_per_epoch)

time_end = time.time()
time_passed = time_end - time_start
print()
print(f"{time_passed/60:.5} minutes ({time_passed:.3} seconds) taken to train the model")


Loss for Epoch 0 :  0.06564584374427795
Loss for Epoch 10 :  0.02809840440750122
Loss for Epoch 20 :  0.011636683717370033
Loss for Epoch 30 :  0.00951817724853754
Loss for Epoch 40 :  0.009426706470549107
Loss for Epoch 50 :  0.008790063671767712
Loss for Epoch 60 :  0.008521865122020245
Loss for Epoch 70 :  0.008186805993318558
Loss for Epoch 80 :  0.007976675406098366
Loss for Epoch 90 :  0.007786992471665144
Loss for Epoch 100 :  0.0076213921420276165
Loss for Epoch 110 :  0.007462238427251577
Loss for Epoch 120 :  0.0073154340498149395
Loss for Epoch 130 :  0.007178941275924444
Loss for Epoch 140 :  0.007050578016787767
Loss for Epoch 150 :  0.006928657181560993
Loss for Epoch 160 :  0.006811904720962048
Loss for Epoch 170 :  0.006700402125716209
Loss for Epoch 180 :  0.006594594102352858
Loss for Epoch 190 :  0.00649344502016902
Loss for Epoch 200 :  0.006397258955985308
Loss for Epoch 210 :  0.006305833347141743
Loss for Epoch 220 :  0.006218860857188702
Loss for Epoch 230 :  0.