In [1]:
import pandas as pd
import torch
import os
import numpy as np
from torch.utils.data import DataLoader
from sklearn import metrics
import math

from datasets import DensitySurvey



### Neural Network Final Predictions


In [11]:

areas = ['north', 'south', 'des']
galaxies = ['lrg', 'elg', 'qso', 'glbg', 'rlbg']
device = 'cpu'
max_set_len = 0
NSIDE = 512

df_north = pd.read_csv(f'../regression/results/north_complete_{NSIDE}.csv')
df_south = pd.read_csv(f'../regression/results/south_complete_{NSIDE}.csv')
df_des = pd.read_csv(f'../regression/results/des_complete_{NSIDE}.csv')

df_north.columns




Index(['pixel_id', 'lrg', 'elg', 'qso', 'glbg', 'rlbg', 'exposures', 'hinh',
       'gaia', 'gaia12', 'sagitarius', 'stellar', 'EBV', 'airmass',
       'ccdskysb_g', 'ccdskysb_r', 'ccdskysb_z', 'ccdskycounts_g',
       'ccdskycounts_r', 'ccdskycounts_z', 'seeing_g', 'seeing_r', 'seeing_z',
       'lrg_lin', 'elg_lin', 'qso_lin', 'glbg_lin', 'rlbg_lin'],
      dtype='object')

In [12]:
print(len(df_des))
print(len(df_south))
print(len(df_north))

area =  'south'

df_train = pd.read_csv(f'data/{area}/{area}_512.csv')
print(df_train.columns)

338526
662847
363076
Index(['pixel_id', 'lrg', 'elg', 'qso', 'glbg', 'rlbg', 'exposures', 'hinh',
       'gaia', 'gaia12', 'sagitarius', 'stellar', 'EBV', 'airmass',
       'ccdskysb_g', 'ccdskysb_r', 'ccdskysb_z', 'ccdskycounts_g',
       'ccdskycounts_r', 'ccdskycounts_z', 'seeing_g', 'seeing_r', 'seeing_z'],
      dtype='object')


In [13]:
for area in areas:

    df_test = pd.read_csv(f'data/{area}/{area}_test_{NSIDE}.csv')
    df_train = pd.read_csv(f'data/{area}/{area}_{NSIDE}.csv')
    df_test = df_test.append(df_train)
    pixel_id = df_test.pixel_id

    df_test = df_test.drop(columns=['pixel_id', 'exposures'], axis=1, inplace=False)
    print(df_test.columns)

    for gal in galaxies:
        testdata = DensitySurvey(df_test, gal)

        best_val = -100
        for model in os.listdir(f"trained_models/{area}/{gal}"):
            val = float(model[:-3])
            best_val = max(val, best_val)
        print(f' Area: {area}. Gal: {gal}. Best Value: {best_val}.')
        model = torch.load(f"trained_models/{area}/{gal}/{best_val}.pt",
                           map_location=torch.device('cpu'))

        testloader = torch.utils.data.DataLoader(testdata, batch_size=128, shuffle=False)

        model.eval()
        y_pred = np.array([])
        y_gold = np.array([])

        with torch.no_grad():

            for i, (inputs, labels) in enumerate(testloader):
                # Split dataloader
                inputs = inputs.to(device)
                # Forward pass through the trained network
                outputs = model(inputs)

                # Get predictions and append to label array + count number of correct and total
                y_pred = np.append(y_pred, outputs.cpu().detach().numpy())
                y_gold = np.append(y_gold, labels.cpu().detach().numpy())

            print("Target", len(y_gold), np.isnan(y_gold).sum(), np.max(y_gold), np.min(y_gold), np.mean(y_gold))
            print(y_gold)
            print("Prediction", len(y_pred), np.isnan(y_pred).sum(), np.max(y_pred), np.min(y_pred), np.mean(y_pred))
            print(y_pred)

            r2, rmse, mae = 0, 0, 0

            try:
                r2 = metrics.r2_score(y_gold, y_pred)
                rmse = math.sqrt(metrics.mean_squared_error(y_gold, y_pred))
                mae = metrics.mean_absolute_error(y_gold, y_pred)

            except:
                print("++++++++++++++++++++")
                print("   NaN Predicted    ")
                print("++++++++++++++++++++")

            print()
            print(f" XXXXXX======== TRIAL {area} - {gal} ended")
            print()
            print("Test Set - R-squared: ", r2)
            print("Test Set - RMSE: ", rmse)
            print("Test Set - MAE: ", mae)
            print()
            print()
            print()

        ax = np.stack((pixel_id, y_pred), axis=1)
        df_deep = pd.DataFrame(ax, columns=['pixel_id', f'{gal}_nn'])

        df_deep.pixel_id = df_deep.pixel_id.astype(int)

        if area == 'north':
            df_north = df_north.merge(df_deep, how='inner', on='pixel_id')
        elif area == 'south':
            df_south = df_south.merge(df_deep, how='inner', on='pixel_id')
        else:
            df_des = df_des.merge(df_deep, how='inner', on='pixel_id')

Index(['lrg', 'elg', 'qso', 'glbg', 'rlbg', 'hinh', 'gaia', 'gaia12',
       'sagitarius', 'stellar', 'EBV', 'airmass', 'ccdskysb_g', 'ccdskysb_r',
       'ccdskysb_z', 'ccdskycounts_g', 'ccdskycounts_r', 'ccdskycounts_z',
       'seeing_g', 'seeing_r', 'seeing_z'],
      dtype='object')
 Area: north. Gal: lrg. Best Value: 0.009592869264212434.
Target 363076 0 234.0 0.0 8.49097709570448
[ 8. 15.  6. ...  7.  6.  9.]
Prediction 363076 0 10.455790519714355 8.265435218811035 8.536811191710575
[8.2656517  8.68347073 8.26563263 ... 8.26563835 8.26563168 8.26559258]


Test Set - R-squared:  0.008077364551054345
Test Set - RMSE:  4.824262797424032
Test Set - MAE:  3.2939039275141604



 Area: north. Gal: elg. Best Value: 0.1261564379060902.
Target 363076 0 295.0 1.0 31.380586984543182
[46. 27. 32. ... 36. 33. 35.]
Prediction 363076 0 214.868408203125 18.566471099853516 31.660478698949763
[30.07936287 26.44309616 31.5339489  ... 33.38516617 31.39669037
 35.61845398]


Test Set - R-squared:  0.

In [14]:
df_north.columns

Index(['pixel_id', 'lrg', 'elg', 'qso', 'glbg', 'rlbg', 'exposures', 'hinh',
       'gaia', 'gaia12', 'sagitarius', 'stellar', 'EBV', 'airmass',
       'ccdskysb_g', 'ccdskysb_r', 'ccdskysb_z', 'ccdskycounts_g',
       'ccdskycounts_r', 'ccdskycounts_z', 'seeing_g', 'seeing_r', 'seeing_z',
       'lrg_lin', 'elg_lin', 'qso_lin', 'glbg_lin', 'rlbg_lin', 'lrg_nn',
       'elg_nn', 'qso_nn', 'glbg_nn', 'rlbg_nn'],
      dtype='object')

In [16]:
print((df_north.head()))
print((df_south.head()))
print((df_des.head()))


   pixel_id  lrg  elg  qso  glbg  rlbg  exposures      hinh      gaia  \
0    198670    8   46    4    29    28   0.078947  0.073242  0.188406   
1    445227   15   27    4    27    37   0.110526  0.294469  0.173913   
2    599907    6   32    1    14    28   0.178947  0.043577  0.202899   
3    500230    7   22    6    19    15   0.073684  0.056774  0.028986   
4     85102    6   28    4    33    20   0.094737  0.287197  0.289855   

     gaia12  ...   lrg_lin    elg_lin   qso_lin   glbg_lin   rlbg_lin  \
0  0.102740  ...  8.071155  31.166327  4.551402  27.702373  24.785633   
1  0.222603  ...  8.676857  29.292303  3.824231  24.939559  28.788954   
2  0.065068  ...  8.212959  31.960310  3.258793  28.324873  25.500994   
3  0.133562  ...  8.174141  31.400109  4.287567  28.372862  29.368564   
4  0.345890  ...  8.696031  28.307094  6.241617  37.031751  20.782377   

     lrg_nn     elg_nn    qso_nn    glbg_nn    rlbg_nn  
0  8.265652  30.079363  4.541422  28.518593  23.623510  
1  8.683

In [15]:
print(len(df_des))
print(len(df_south))
print(len(df_north))

338526
662847
363076


In [17]:
df_north.to_csv(f'../regression/results/north_complete_512.csv', index=False)
df_south.to_csv(f'../regression/results/south_complete_512.csv', index=False)
df_des.to_csv(f'../regression/results/des_complete_512.csv', index=False)

In [8]:

print(len(df_north))

97537
