In [None]:
import pandas as pd
import torch
import os
import numpy as np
from torch.utils.data import DataLoader
from sklearn import metrics
import math

from datasets import DensitySurvey



### Neural Network Final Predictions


In [13]:

areas = ['north', 'south', 'des']
galaxies = ['lrg', 'elg', 'qso']
device = 'cpu'
max_set_len = 0

df_north = pd.read_csv(f'../regression/results/north.csv')
df_south = pd.read_csv(f'../regression/results/south.csv')
df_des = pd.read_csv(f'../regression/results/des.csv')

df_north.columns



Index(['pixel_id', 'lrg', 'elg', 'qso', 'exposures', 'stellar', 'EBV',
       'airmass', 'ccdskysb_g', 'ccdskysb_r', 'ccdskysb_z', 'exptime_g',
       'exptime_r', 'exptime_z', 'meansky_g', 'meansky_r', 'meansky_z',
       'seeing_g', 'seeing_r', 'seeing_z', 'lrg_lin', 'elg_lin', 'qso_lin',
       'lrg_deep', 'y_gold_x', 'elg_deep', 'y_gold_y', 'qso_deep', 'y_gold'],
      dtype='object')

In [14]:
for area in areas:

    df_test = pd.read_csv(f'data/{area}/{area}_test.csv')
    pixel_id = df_test.pixel_id

    df_test = df_test.drop(columns=['pixel_id', 'exposures'], axis=1, inplace=False)

    for gal in galaxies:
        testdata = DensitySurvey(df_test, gal)

        best_val = -100
        for model in os.listdir(f"trained_models/{area}/{gal}"):
            val = float(model[:-3])
            if val > best_val:
                best_val = val

        print(best_val)
        model = torch.load(f"trained_models/{area}/{gal}/{best_val}.pt",
                           map_location=torch.device('cpu'))

        testloader = torch.utils.data.DataLoader(testdata, batch_size=128, shuffle=False)

        model.eval()
        y_pred = np.array([])
        y_gold = np.array([])

        with torch.no_grad():

            for i, (inputs, labels) in enumerate(testloader):
                # Split dataloader
                inputs = inputs.to(device)
                # Forward pass through the trained network
                outputs = model(inputs)

                # Get predictions and append to label array + count number of correct and total
                y_pred = np.append(y_pred, outputs.cpu().detach().numpy())
                y_gold = np.append(y_gold, labels.cpu().detach().numpy())

            print("Target", len(y_gold), np.isnan(y_gold).sum(), np.max(y_gold), np.min(y_gold), np.mean(y_gold))
            print(y_gold)
            print("Prediction", len(y_pred), np.isnan(y_pred).sum(), np.max(y_pred), np.min(y_pred), np.mean(y_pred))
            print(y_pred)

            r2, rmse, mae = 0, 0, 0

            try:
                r2 = metrics.r2_score(y_gold, y_pred)
                rmse = math.sqrt(metrics.mean_squared_error(y_gold, y_pred))
                mae = metrics.mean_absolute_error(y_gold, y_pred)

            except:
                print("++++++++++++++++++++")
                print("   NaN Predicted    ")
                print("++++++++++++++++++++")

            print()
            print(f" XXXXXX======== TRIAL {area} - {gal} ended")
            print()
            print("Test Set - R-squared: ", r2)
            print("Test Set - RMSE: ", rmse)
            print("Test Set - MAE: ", mae)
            print()
            print()
            print()

        ax = np.stack((pixel_id, y_pred), axis=1)
        df_deep = pd.DataFrame(ax, columns=['pixel_id', f'{gal}_nn'])

        df_deep.pixel_id = df_deep.pixel_id.astype(int)

        if area == 'north':
            df_north = df_north.merge(df_deep, how='inner', on='pixel_id')
        elif area == 'south':
            df_south = df_south.merge(df_deep, how='inner', on='pixel_id')
        else:
            df_des = df_des.merge(df_deep, how='inner', on='pixel_id')

0.022140414501169103
Target 19508 0 131.0 1.0 35.25584375640763
[37. 27. 21. ... 27. 25. 32.]
Prediction 19508 0 42.653907775878906 24.04363441467285 35.74281108890226
[36.98944092 34.81783676 34.24811935 ... 34.66788483 34.66122437
 34.18857193]


Test Set - R-squared:  0.022140409882163592
Test Set - RMSE:  11.836388599511777
Test Set - MAE:  8.752876406398395



0.20695097948924068
Target 19508 0 285.0 1.0 125.01768505228624
[ 63. 156.  95. ... 132. 139. 111.]
Prediction 19508 0 167.42422485351562 0.0 126.70591145156226
[106.44763947 137.95901489 124.56479645 ... 127.24111938 155.86907959
 134.17895508]


Test Set - R-squared:  0.20695098179084814
Test Set - RMSE:  19.945838743882955
Test Set - MAE:  15.552962719673062



0.17315364401834488
Target 19508 0 314.0 1.0 153.39173672339552
[166. 146. 114. ... 168. 134. 150.]
Prediction 19508 0 179.1888427734375 55.79410171508789 152.84699135968646
[151.08296204 148.48658752 162.21138    ... 151.67658997 147.09042358
 140.25558472]


Test

In [15]:
print((df_north.head()))
print((df_south.head()))
print((df_des.head()))

df_north = df_north.drop(columns=['y_gold', 'y_gold_x', 'y_gold_y'], axis=1, inplace=False)
df_south = df_south.drop(columns=['y_gold', 'y_gold_x', 'y_gold_y'], axis=1, inplace=False)
df_des = df_des.drop(columns=['y_gold', 'y_gold_x', 'y_gold_y'], axis=1, inplace=False)

   pixel_id  lrg  elg  qso  exposures   stellar       EBV   airmass  \
0     20930   37   63  166   0.230769  0.271967  0.578446  0.447089   
1    128915   27  156  146   0.209790  0.062762  0.060792  0.066278   
2     22899   21   95  114   0.188811  0.092050  0.049091  0.371570   
3    105934   38  100  150   0.181818  0.104603  0.051809  0.056693   
4     14825   26  113  121   0.181818  0.083682  0.077518  0.410721   

   ccdskysb_g  ccdskysb_r  ...     qso_lin   lrg_deep  y_gold_x    elg_deep  \
0    0.762753    0.492800  ...  147.112149  38.373226      37.0  102.214256   
1    0.720468    0.591275  ...  145.337297  33.768410      27.0  132.624435   
2    0.772063    0.579600  ...  162.445987  34.356449      21.0  120.651459   
3    0.862060    0.616452  ...  154.163605  34.971714      38.0  124.865211   
4    0.817044    0.601693  ...  158.984019  34.321514      26.0  129.414261   

   y_gold_y    qso_deep  y_gold     lrg_nn      elg_nn      qso_nn  
0      63.0  166.713089   166

In [16]:
df_north.columns

Index(['pixel_id', 'lrg', 'elg', 'qso', 'exposures', 'stellar', 'EBV',
       'airmass', 'ccdskysb_g', 'ccdskysb_r', 'ccdskysb_z', 'exptime_g',
       'exptime_r', 'exptime_z', 'meansky_g', 'meansky_r', 'meansky_z',
       'seeing_g', 'seeing_r', 'seeing_z', 'lrg_lin', 'elg_lin', 'qso_lin',
       'lrg_deep', 'elg_deep', 'qso_deep', 'lrg_nn', 'elg_nn', 'qso_nn'],
      dtype='object')

In [17]:
df_north.to_csv(f'../regression/results/north.csv', index=False)
df_south.to_csv(f'../regression/results/south.csv', index=False)
df_des.to_csv(f'../regression/results/des.csv', index=False)