In [29]:
import pandas as pd
import torch
import os
import numpy as np
import pickle
from datasets import MultiSetSequence
from torch.utils.data import DataLoader
from sklearn import metrics
import math

from datasets import DensitySurvey


ModuleNotFoundError: No module named 'models.neural_network'; 'models' is not a package

### DeepSets Final Predictions


In [23]:

def get_mask(sizes, max_size):
    return (torch.arange(max_size).reshape(1, -1).to(sizes.device) < sizes.unsqueeze(2))

areas = ['north', 'south', 'des']
galaxies = ['lrg', 'elg', 'qso']
device = 'cpu'
max_set_len = 0

df_north = pd.read_csv(f'../regression/results/north.csv')
df_south = pd.read_csv(f'../regression/results/south.csv')
df_des = pd.read_csv(f'../regression/results/des.csv')


In [24]:
for area in areas:
    with open(f'data/{area}/{area}_test.pickle', 'rb') as f:
        testset = pickle.load(f)
        f.close()
    if area == "north":
        max_set_len = 30
    elif area == "south":
        max_set_len = 25
    else:
        max_set_len = 50
    df_test = pd.DataFrame.from_dict(testset, orient='index')

    testdata = MultiSetSequence(dict=df_test.to_dict(orient='index'), num_pixels=len(df_test),
                            max_ccds=max_set_len, num_features=6, test=True)

    pixel_id = testdata.pixel_id




    for gal in galaxies:
        testdata.set_targets(gal_type=gal)

        best_val = -100
        for model in os.listdir(f"trained_models/{area}/{gal}"):
            val = float(model[:-3])
            if val > best_val:
                best_val = val



        model = torch.load(f"trained_models/{area}/{gal}/{best_val}.pt",
                   map_location=torch.device('cpu'))

        testloader = torch.utils.data.DataLoader(testdata, batch_size=128, shuffle=False)

        model.eval()
        y_pred = np.array([])
        y_gold = np.array([])

        with torch.no_grad():
            for i, (X1, X2, labels, set_sizes) in enumerate(testloader):
                # Extract inputs and associated labels from dataloader batch
                X1 = X1.to(device)

                X2 = X2.to(device)

                labels = labels.to(device)

                set_sizes = set_sizes.to(device)

                mask = get_mask(set_sizes, X1.shape[2])
                # Predict outputs (forward pass)

                outputs = model(X1, X2, mask=mask)
                # Predict outputs (forward pass)
                # Get predictions and append to label array + count number of correct and total
                y_pred = np.append(y_pred, outputs.cpu().detach().numpy())
                y_gold = np.append(y_gold, labels.cpu().detach().numpy())


            r2 = metrics.r2_score(y_gold, y_pred)
            rmse = math.sqrt(metrics.mean_squared_error(y_gold, y_pred))
            mae = metrics.mean_absolute_error(y_gold,y_pred)


            print()
            print(f" XXXXXX======== TRIAL {area} - {gal} ended")
            print()
            print("Test Set - R-squared: ", r2)
            print("Test Set - RMSE: ", rmse)
            print("Test Set - MAE: ", mae)


        ax = np.stack((pixel_id, y_pred, y_gold), axis=1)
        df_deep = pd.DataFrame(ax, columns=['pixel_id', f'{gal}_deep', 'y_gold'])

        df_deep.pixel_id = df_deep.pixel_id.astype(int)

        if area == 'north':
            df_north = df_north.merge(df_deep, how='inner', on='pixel_id')
        elif area == 'south':
            df_south = df_south.merge(df_deep, how='inner', on='pixel_id')
        else:
            df_des = df_des.merge(df_deep, how='inner', on='pixel_id')



Test Set - R-squared:  0.019696157369817224
Test Set - RMSE:  11.851315081785096
Test Set - MAE:  8.72075124230638


Test Set - R-squared:  0.2768691500296936
Test Set - RMSE:  19.04504909213889
Test Set - MAE:  14.787118993408097


Test Set - R-squared:  0.1945514680883067
Test Set - RMSE:  26.072144124678687
Test Set - MAE:  20.29797913806312


Test Set - R-squared:  0.04468225648113022
Test Set - RMSE:  11.550285110350043
Test Set - MAE:  8.420855481319704


Test Set - R-squared:  0.2469150939116309
Test Set - RMSE:  20.181628619139744
Test Set - MAE:  15.66593639433148


Test Set - R-squared:  0.20945053735450814
Test Set - RMSE:  25.0158650292469
Test Set - MAE:  19.2415477733478


Test Set - R-squared:  0.004356522559152087
Test Set - RMSE:  10.173388972312711
Test Set - MAE:  7.783041953875313


Test Set - R-squared:  0.027948262968123805
Test Set - RMSE:  18.288621875466337
Test Set - MAE:  14.40807519022653


Test Set - R-squared:  0.0053126464604502655
Test Set - RMSE:  24.

In [None]:
print((df_north.head()))
print((df_south.head()))
print((df_des.head()))

In [None]:
df_north.to_csv(f'../regression/results/north.csv', index=False)
df_south.to_csv(f'../regression/results/south.csv', index=False)
df_des.to_csv(f'../regression/results/des.csv', index=False)