### First attempt at building a Neural Network to learn a non-linear F(s)


In [None]:
import torch
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import sklearn.metrics
from sklearn import preprocessing
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import time


print(torch.__version__)

In [58]:
#Preprocess Data
df = pd.read_csv('../bricks_data/dataset_geometric.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 268046 entries, 0 to 268045
Data columns (total 20 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   lrg_density        268046 non-null  float64
 1   elg_density        268046 non-null  float64
 2   qso_density        268046 non-null  float64
 3   stellar_density    268046 non-null  float64
 4   airmass_galaxy     268046 non-null  float64
 5   fwhm_galaxy        268046 non-null  float64
 6   ebv_galaxy         268046 non-null  float64
 7   ccdnphotom_galaxy  268046 non-null  float64
 8   ccdskysb_galaxy_g  268046 non-null  float64
 9   ccdskysb_galaxy_r  268046 non-null  float64
 10  ccdskysb_galaxy_z  268046 non-null  float64
 11  exptime_galaxy_g   268046 non-null  float64
 12  exptime_galaxy_r   268046 non-null  float64
 13  exptime_galaxy_z   268046 non-null  float64
 14  meansky_galaxy_g   268046 non-null  float64
 15  meansky_galaxy_r   268046 non-null  float64
 16  me

### Defining The Dataset Class Inheriting from Torch.dataset to be able to use a dataloader for training

In [80]:
class DensitySurvey(Dataset):
    def __init__(self, df, galaxy_type):
        self.data = df#[0:1000]
        # Extracting Targets and Input
        if galaxy_type == "LRG":
            self.target = self.data['lrg_density'].to_numpy(copy=True)
        if galaxy_type == "ELG":
            self.target = self.data['elg_density'].to_numpy(copy=True)
        if galaxy_type == "QSO":
            self.target = self.data['qso_density'].to_numpy(copy=True)
        self.input = self.data.drop(columns=['lrg_density','elg_density','qso_density']).to_numpy(copy=True)

        # Scaling
        scaler = preprocessing.MinMaxScaler()
        self.input = scaler.fit_transform(self.input)
        self.target = scaler.fit_transform(self.target.reshape(-1, 1))
        print(self.input.shape)
        print(self.target.shape)


    def __len__(self):
        return len(self.target)

    def __getitem__(self, idx):
        return torch.from_numpy(self.input[idx]).float(), torch.tensor(self.target[idx]).float()


In [81]:
df = pd.read_csv('../bricks_data/dataset_geometric.csv')
train_df, test_df = train_test_split(df, test_size=0.33, random_state=44, shuffle=True)
traindata = DensitySurvey(train_df, 'LRG')
testdata = DensitySurvey(test_df, 'LRG')

(179590, 17)
(179590, 1)
(88456, 17)
(88456, 1)


In [82]:

print(traindata.__len__())
print(testdata.__len__())

x,y = traindata.__getitem__(3)

print(x.dtype, y.dtype)

179590
88456
torch.float32 torch.float32


### Define Model and Hyperparameters



In [83]:
class Net(nn.Module):
    def __init__(self, n_feature = 17, n_hidden = 10, n_output = 1):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(n_feature,n_hidden)
        #self.fc2 = nn.Linear(n_hidden,n_hidden)
        self.predict = nn.Linear(n_hidden,n_output)

    def forward(self,x):
        out = F.relu(self.fc1(x))
        out = self.predict(out)
        return out

device = 'cpu'

model = Net().to(device)



In [84]:
# Defining Loss
criterion = nn.MSELoss()

#Defining Hyperparemeters
no_epochs = 500 #very low, but computational power not sufficient for more iterations
batch = 1024
learning_rate = 0.001

#Using the Adam Method for Stochastic Optimisation
optimiser = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
time_start = time.time()

for epoch in range(no_epochs):
    loss_per_epoch = 0

    #loading the training data from trainset and shuffling for each epoch
    trainloader = torch.utils.data.DataLoader(traindata, batch_size=batch, shuffle = True)

    for i, batch_no in enumerate(trainloader, 0):

        #Put Model into train mode
        model.train()

        #Extract inputs and associated labels from dataloader batch
        inputs = batch_no[0].to(device)
        labels = batch_no[1].to(device)

        #Zero-out the gradients before backward pass (pytorch stores the gradients)
        optimiser.zero_grad()

        #Predict outputs (forward pass)
        predictions =  model(inputs)

        #Compute Loss
        loss = criterion(predictions, labels)

        #Backpropagation
        loss.backward()

        #Perform one step of gradient descent
        optimiser.step()

        #Append loss to the general loss for this one epoch
        loss_per_epoch += loss.item()
    if epoch % 10 == 0:
        print("Loss for Epoch", epoch, ": ", loss_per_epoch)

time_end = time.time()
time_passed = time_end - time_start
print()
print(f"{time_passed/60:.5} minutes ({time_passed:.3} seconds) taken to train the model")


Loss for Epoch 0 :  0.8100595716387033
Loss for Epoch 10 :  0.07056161339278333
Loss for Epoch 20 :  0.06039096834138036
Loss for Epoch 30 :  0.05801052146125585
Loss for Epoch 40 :  0.05740677454741672
Loss for Epoch 50 :  0.057137125622830354
Loss for Epoch 60 :  0.056651875740499236
Loss for Epoch 70 :  0.0563907187897712
Loss for Epoch 80 :  0.05639348727709148
Loss for Epoch 90 :  0.05624635229469277
Loss for Epoch 100 :  0.055991054730839096
Loss for Epoch 110 :  0.055804325587814674
Loss for Epoch 120 :  0.055838042666437104
Loss for Epoch 130 :  0.05575022878474556
Loss for Epoch 140 :  0.055506179080111906
Loss for Epoch 150 :  0.055427626764867455
Loss for Epoch 160 :  0.05517887548194267
Loss for Epoch 170 :  0.05539186035457533
Loss for Epoch 180 :  0.05497705929155927
Loss for Epoch 190 :  0.05512102137436159
Loss for Epoch 200 :  0.055056203171261586
Loss for Epoch 210 :  0.05486024085257668
Loss for Epoch 220 :  0.054864193734829314
Loss for Epoch 230 :  0.05484406650066

In [None]:
model.eval()
y_pred = np.array([])
testloader = torch.utils.data.DataLoader(testdata, batch_size=batch, shuffle=False)


for batch_no in testloader:

    #Split dataloader
    inputs = batch_no[0].to(device)
    labels = batch_no[1].to(device)

    #Forward pass through the trained network
    outputs = model(inputs)

    #Get predictions and append to label array + count number of correct and total
    y_pred = np.append(y_pred, outputs.numpy())

y_gold = testdata.target

In [None]:
print(metrics.r2_score(y_gold, y_pred))
print(metrics.mean_squared_error(y_gold, y_pred))