In [1]:
import pandas as pd
import torch
import os
import numpy as np
import pickle
from datasets import MultiSetSequence
from torch.utils.data import DataLoader
from sklearn import metrics
import math

### DeepSets Final Predictions


In [17]:

def get_mask(sizes, max_size):
    return (torch.arange(max_size).reshape(1, -1).to(sizes.device) < sizes.unsqueeze(2))

areas = ['north', 'south', 'des']
galaxies = ['lrg', 'elg', 'qso']
device = 'cpu'
max_set_len = 0

df_north = pd.read_csv(f'../regression/results/north.csv')
df_south = pd.read_csv(f'../regression/results/south.csv')
df_des = pd.read_csv(f'../regression/results/des.csv')


In [18]:
for area in areas:
    with open(f'data/{area}/{area}_test.pickle', 'rb') as f:
        testset = pickle.load(f)
        f.close()
    if area == "north":
        max_set_len = 30
    elif area == "south":
        max_set_len = 25
    else:
        max_set_len = 50
    df_test = pd.DataFrame.from_dict(testset, orient='index')

    testdata = MultiSetSequence(dict=df_test.to_dict(orient='index'), num_pixels=len(df_test),
                            max_ccds=max_set_len, num_features=6, test=True)

    pixel_id = testdata.pixel_id




    for gal in galaxies:
        testdata.set_targets(gal_type=gal)

        best_val = -100
        for model in os.listdir(f"trained_models/{area}/{gal}"):
            val = float(model[:-3])
            if val > best_val:
                best_val = val


        print(Gal)
        model = torch.load(f"trained_models/{area}/{gal}/{best_val}.pt",
                   map_location=torch.device('cpu'))

        testloader = torch.utils.data.DataLoader(testdata, batch_size=128, shuffle=False)

        model.eval()
        y_pred = np.array([])
        y_gold = np.array([])

        with torch.no_grad():
            for i, (X1, X2, labels, set_sizes) in enumerate(testloader):
                # Extract inputs and associated labels from dataloader batch
                X1 = X1.to(device)

                X2 = X2.to(device)

                labels = labels.to(device)

                set_sizes = set_sizes.to(device)

                mask = get_mask(set_sizes, X1.shape[2])
                # Predict outputs (forward pass)

                outputs = model(X1, X2, mask=mask)
                # Predict outputs (forward pass)
                # Get predictions and append to label array + count number of correct and total
                y_pred = np.append(y_pred, outputs.cpu().detach().numpy())
                y_gold = np.append(y_gold, labels.cpu().detach().numpy())

            print("Target", len(y_gold), np.isnan(y_gold).sum(), np.max(y_gold), np.min(y_gold), np.mean(y_gold))
            print(y_gold)
            print("Prediction", len(y_pred), np.isnan(y_pred).sum(), np.max(y_pred), np.min(y_pred), np.mean(y_pred))
            print(y_pred)

        ax = np.stack((pixel_id, y_pred), axis=1)
        df_deep = pd.DataFrame(ax, columns=['pixel_id', f'{gal}_deep'])

        df_deep.pixel_id = df_deep.pixel_id.astype(int)

        if area == 'north':
            df_north = df_north.merge(df_deep, how='inner', on='pixel_id')
        elif area == 'south':
            df_south = df_south.merge(df_deep, how='inner', on='pixel_id')
        else:
            df_des = df_des.merge(df_deep, how='inner', on='pixel_id')

Target 19505 0 131.0 1.0 35.255216611125356
[37. 27. 21. ... 27. 25. 32.]
Prediction 19505 0 43.500221252441406 24.518848419189453 35.38854326259415
[38.37322617 33.76840973 34.35644913 ... 33.31462097 34.19351578
 34.62304688]
Target 19505 0 285.0 1.0 125.01307357087926
[ 63. 156.  95. ... 132. 139. 111.]
Prediction 19505 0 189.4333953857422 7.6724371910095215 124.49812652558066
[100.60153961 131.92521667 117.16901398 ... 123.48097992 168.7237854
 126.50543213]
Target 19505 0 314.0 1.0 153.38815688285055
[166. 146. 114. ... 168. 134. 150.]
Prediction 19505 0 182.63925170898438 36.04914855957031 152.0958230418323
[166.71308899 140.50054932 159.87458801 ... 148.9777832  142.07981873
 136.76513672]
Target 37139 0 129.0 1.0 33.86528985702361
[51. 35. 31. ... 28. 22. 36.]
Prediction 37139 0 42.77854919433594 -1.321862816810608 33.89562346916158
[33.69371414 33.75234985 35.29465485 ... 33.07236099 35.03711319
 33.90889359]
Target 37139 0 274.0 1.0 130.19173914214167
[ 97. 150. 113. ... 133.

FileNotFoundError: [Errno 2] No such file or directory: 'trained_models/des/elg/1.0.pt'

In [None]:
print((df_north.head()))
print((df_south.head()))
print((df_des.head()))