In [1]:
from astropy.table import Table
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import h5py
import matplotlib.pyplot as plt
from gaiaxpy import generate, PhotometricSystem
import pandas as pd

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

class ResBlock(nn.Module):
    def __init__(self, nodes):
        super(ResBlock, self).__init__()
        self.res_block1 = nn.Sequential(
            nn.Linear(nodes,nodes),
            nn.BatchNorm1d(nodes),
            nn.LeakyReLU(),
        )
        self.res_block2 = nn.Sequential(
            nn.Linear(nodes,nodes),
            nn.BatchNorm1d(nodes),
        )
        self.lrelu = nn.LeakyReLU()
    
    def forward(self, x):
        res = x
        x = self.res_block1(x)
        x = self.res_block2(x)
        x = x + res
        output = self.lrelu(x)
        return output
        
class ResNetMcK(nn.Module):
    def __init__(self):
        super(ResNetMcK, self).__init__()
        self.input_block = nn.Sequential(
            nn.Linear(3,16),
            nn.LeakyReLU(),
        )
        self.blocklist = nn.ModuleList([
            ResBlock(16),
            ResBlock(16),
            nn.Linear(16,32),
            ResBlock(32),
            ResBlock(32),
            nn.Linear(32,64),
            ResBlock(64),
            ResBlock(64),
            nn.Linear(64,128),
            ResBlock(128),
            ResBlock(128),
        ])
        self.output_block = nn.Sequential(
            nn.Linear(128,110),
        )
        
    def forward(self,x):
        x = self.input_block(x)
        for i, _ in enumerate(self.blocklist):
            x = self.blocklist[i](x)
        logits = self.output_block(x)
        return logits
    
# defining the Dataset class
class train_set(Dataset):
    def __init__(self,file):
        fn = h5py.File(file, 'r')
        self.f = fn
        
        # get data
        dset = fn['group_1']['data']
        self.x = torch.Tensor(dset[:].T)
        
        # get label
        ydset = self.f['group_1']['label']
        self.y = torch.Tensor(ydset[:].T)
        # torch.from_numpy(y[index]) does not work since y is doubles and not floats.
        
        # get error in label # comment out for non-error label runs
        errdset = self.f['group_1']['e_label']
        self.err = torch.Tensor(errdset[:].T)
        
    def __len__(self):
        return self.f['group_1']['data'].shape[1]
  
    def __getitem__(self, index):
        xg = self.x[index]
        yg = self.y[index]
        errg = self.err[index]
        return (xg,yg,errg)

class valid_set(Dataset):
    def __init__(self,file):
        fn = h5py.File(file, 'r')
        self.f = fn
        
        # get data
        dset = self.f['group_2']['data']
        self.x = torch.Tensor(dset[:].T)
        
        # get label
        ydset = self.f['group_2']['label']
        self.y = torch.Tensor(ydset[:].T)
        # torch.from_numpy(y[index]) does not work since y is doubles and not floats.
        
        # get error in label # comment out for non-error label runs
        errdset = self.f['group_2']['e_label']
        self.err = torch.Tensor(errdset[:].T)
        
    def __len__(self):
        return self.f['group_2']['data'].shape[1]
  
    def __getitem__(self, index):
        xg = self.x[index]
        yg = self.y[index]
        errg = self.err[index]
        return (xg,yg,errg)

cuda


In [3]:
# training_data = train_set("/arc/home/aydanmckay/mydataelabelssmallscalecuts.h5")
data = valid_set("/arc/home/aydanmckay/smallcutdataMinMaxscaled.h5")
loaded_data = DataLoader(
    data,
    batch_size=32,
    num_workers=0
    # shuffle=True
)

In [4]:
model = ResNetMcK()
model.load_state_dict(torch.load("/arc/home/aydanmckay/torchresmodel/modelL2smallminmaxscalecutsbl32lr-2wd-5SGDep100new.pth"))
model.eval()

ResNetMcK(
  (input_block): Sequential(
    (0): Linear(in_features=3, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (blocklist): ModuleList(
    (0): ResBlock(
      (res_block1): Sequential(
        (0): Linear(in_features=16, out_features=16, bias=True)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.01)
      )
      (res_block2): Sequential(
        (0): Linear(in_features=16, out_features=16, bias=True)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (lrelu): LeakyReLU(negative_slope=0.01)
    )
    (1): ResBlock(
      (res_block1): Sequential(
        (0): Linear(in_features=16, out_features=16, bias=True)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.01)
      )
      (res_block2): Sequential(
        (0): Linear(in_featur

In [5]:
model = model.to(device)

In [6]:
preds = []
covbs = []
covrs = []
with torch.no_grad():
    for X, y, z in loaded_data:
        X = X.to(device)
        y = y.to(device)
        z = z.to(device)
        pred = model(X)
        for prediction,err in zip(pred,z):
            covbp = np.zeros((55,55))
            covrp = np.zeros((55,55))
            preds.append(prediction)
            for it in range(len(err[:55])):
                covbp[it][it] += err[it].item()**2
                covrp[it][it] += err[it+55].item()**2
            covbs.append(covbp)
            covrs.append(covrp)

In [7]:
np.array(covrs).shape

(5000, 55, 55)

In [8]:
phot_system_list = [
    PhotometricSystem.Gaia_2,
    PhotometricSystem.Gaia_DR3_Vega,
    PhotometricSystem.PanSTARRS1,
    PhotometricSystem.PanSTARRS1_Std,
    PhotometricSystem.Pristine,
    PhotometricSystem.SDSS,
    PhotometricSystem.SDSS_Std
]

In [9]:
df = pd.DataFrame(
    {'source_id':range(len(preds)),
     'bp_coefficients':[pred.to('cpu').numpy()[:55] for pred in preds],
     'bp_standard_deviation':[np.std(pred.to('cpu').numpy()[:55]) for pred in preds],
     'bp_coefficient_covariances':covbs,
     'rp_coefficients':[pred.to('cpu').numpy()[55:] for pred in preds],
     'rp_coefficient_covariances':covrs,
     'rp_standard_deviation':[np.std(pred.to('cpu').numpy()[55:]) for pred in preds]
    }
)

In [10]:
df

Unnamed: 0,source_id,bp_coefficients,bp_standard_deviation,bp_coefficient_covariances,rp_coefficients,rp_coefficient_covariances,rp_standard_deviation
0,0,"[0.11549715, 0.6457986, 0.5829673, 0.5896184, ...",0.142289,"[[50.824411354017, 0.0, 0.0, 0.0, 0.0, 0.0, 0....","[0.115278445, 0.25866365, 0.40418762, 0.295571...","[[10.894251866433535, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.132995
1,1,"[0.28943467, 0.47958648, 0.68495864, 0.5950905...",0.142061,"[[57.57570784600466, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.15441775, 0.19169451, 0.3763274, 0.23017275...","[[8.049284034164032, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.137343
2,2,"[0.09411571, 0.63932323, 0.5929025, 0.57041156...",0.143110,"[[0.6034642279553424, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.07055383, 0.24059075, 0.40049303, 0.2832369...","[[0.26975468997284224, 0.0, 0.0, 0.0, 0.0, 0.0...",0.135151
3,3,"[0.09390832, 0.6432359, 0.5975714, 0.5659211, ...",0.142467,"[[3.5874154757670027, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.08537214, 0.24853323, 0.41257766, 0.2599623...","[[1.365683548072468, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.136568
4,4,"[0.13518468, 0.65541154, 0.55439645, 0.5796229...",0.136897,"[[75.07698531556889, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.1342433, 0.251002, 0.39932472, 0.30270752, ...","[[22.05784515259529, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.130991
...,...,...,...,...,...,...,...
4995,4995,"[0.16444227, 0.6039651, 0.5875473, 0.62569624,...",0.138416,"[[39.98701003183669, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.113045216, 0.2236172, 0.39736676, 0.2671815...","[[8.878970670616582, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.135343
4996,4996,"[0.1221245, 0.64475393, 0.58829796, 0.5815501,...",0.139949,"[[4.884835755780159, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.11831984, 0.24775244, 0.40371132, 0.2956777...","[[3.958926802659576, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.132444
4997,4997,"[0.17808446, 0.6091752, 0.56570894, 0.63006234...",0.134402,"[[78.68567875133886, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.12341917, 0.22041796, 0.39520437, 0.2762978...","[[20.18509114127687, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.134646
4998,4998,"[0.15799472, 0.6941131, 0.5986909, 0.5964516, ...",0.134743,"[[0.38581238603241275, 0.0, 0.0, 0.0, 0.0, 0.0...","[0.13345876, 0.2508405, 0.40015098, 0.30653876...","[[1.2997614199214382, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.138937


In [11]:
f = '/arc/home/aydanmckay/XpContinuousMeanSpectrum_407725-409897.csv'

In [12]:
# synthetic_photometry = generate(f, photometric_system=phot_system_list)
synthetic_photometry = generate(df, photometric_system=PhotometricSystem.Pristine)
synthetic_photometry

                              

Unnamed: 0,source_id,Pristine_mag_CaHK,Pristine_flux_CaHK,Pristine_flux_error_CaHK
0,0,19.707080,9.537800e-19,4.880670e-19
1,1,19.895344,8.019410e-19,5.837317e-19
2,2,19.723688,9.393012e-19,5.826567e-20
3,3,19.710632,9.506644e-19,1.201282e-19
4,4,19.699126,9.607926e-19,5.204475e-19
...,...,...,...,...
4995,4995,19.724773,9.383630e-19,4.387080e-19
4996,4996,19.694206,9.651560e-19,1.274293e-19
4997,4997,19.710657,9.506426e-19,5.469702e-19
4998,4998,19.659228,9.967559e-19,5.183099e-20


In [13]:
data = Table.read('/arc/home/aydanmckay/gaiahike/bp_rp_apogee.fits').to_pandas()

In [14]:
with pd.option_context('mode.use_inf_as_na', True):
    data.dropna(inplace=True)
data.shape

(481813, 230)

In [15]:
blabels = ["bp_1", "bp_2", "bp_3", "bp_4", "bp_5", "bp_6", "bp_7",
          "bp_8", "bp_9", "bp_10", "bp_11", "bp_12", "bp_13", "bp_14",
          "bp_15", "bp_16", "bp_17", "bp_18", "bp_19", "bp_20", "bp_21",
          "bp_22", "bp_23", "bp_24", "bp_25", "bp_26", "bp_27", "bp_28",
          "bp_29", "bp_30", "bp_31", "bp_32", "bp_33", "bp_34", "bp_35",
          "bp_36", "bp_37", "bp_38", "bp_39", "bp_40", "bp_41", "bp_42",
          "bp_43", "bp_44", "bp_45", "bp_46", "bp_47", "bp_48", "bp_49",
          "bp_50", "bp_51", "bp_52", "bp_53", "bp_54", "bp_55"]
rlabels = ["rp_1",
          "rp_2", "rp_3", "rp_4", "rp_5", "rp_6", "rp_7", "rp_8",
          "rp_9", "rp_10", "rp_11", "rp_12", "rp_13", "rp_14", "rp_15",
          "rp_16", "rp_17", "rp_18", "rp_19", "rp_20", "rp_21", "rp_22",
          "rp_23", "rp_24", "rp_25", "rp_26", "rp_27", "rp_28", "rp_29",
          "rp_30", "rp_31", "rp_32", "rp_33", "rp_34", "rp_35", "rp_36",
          "rp_37", "rp_38", "rp_39", "rp_40", "rp_41", "rp_42", "rp_43",
          "rp_44", "rp_45", "rp_46", "rp_47", "rp_48", "rp_49", "rp_50",
          "rp_51", "rp_52", "rp_53", "rp_54", "rp_55"]
eblabels = ["bpe_1", "bpe_2", "bpe_3", "bpe_4", "bpe_5", "bpe_6", "bpe_7",
          "bpe_8", "bpe_9", "bpe_10", "bpe_11", "bpe_12", "bpe_13", "bpe_14",
          "bpe_15", "bpe_16", "bpe_17", "bpe_18", "bpe_19", "bpe_20", "bpe_21",
          "bpe_22", "bpe_23", "bpe_24", "bpe_25", "bpe_26", "bpe_27", "bpe_28",
          "bpe_29", "bpe_30", "bpe_31", "bpe_32", "bpe_33", "bpe_34", "bpe_35",
          "bpe_36", "bpe_37", "bpe_38", "bpe_39", "bpe_40", "bpe_41", "bpe_42",
          "bpe_43", "bpe_44", "bpe_45", "bpe_46", "bpe_47", "bpe_48", "bpe_49",
          "bpe_50", "bpe_51", "bpe_52", "bpe_53", "bpe_54", "bpe_55"]
erlabels = ["rpe_1",
          "rpe_2", "rpe_3", "rpe_4", "rpe_5", "rpe_6", "rpe_7", "rpe_8",
          "rpe_9", "rpe_10", "rpe_11", "rpe_12", "rpe_13", "rpe_14", "rpe_15",
          "rpe_16", "rpe_17", "rpe_18", "rpe_19", "rpe_20", "rpe_21", "rpe_22",
          "rpe_23", "rpe_24", "rpe_25", "rpe_26", "rpe_27", "rpe_28", "rpe_29",
          "rpe_30", "rpe_31", "rpe_32", "rpe_33", "rpe_34", "rpe_35", "rpe_36",
          "rpe_37", "rpe_38", "rpe_39", "rpe_40", "rpe_41", "rpe_42", "rpe_43",
          "rpe_44", "rpe_45", "rpe_46", "rpe_47", "rpe_48", "rpe_49", "rpe_50",
          "rpe_51", "rpe_52", "rpe_53", "rpe_54", "rpe_55"]

In [16]:
for label in eblabels:
    data = data[(data[label] < 10) & (data[label] > -10)]
for label in erlabels:
    data = data[(data[label] < 10) & (data[label] > -10)]
data.shape

(319316, 230)

In [17]:
data = data.sample(n=50000,random_state=42)

In [18]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = MinMaxScaler()

In [55]:
bpscaler = []
rpscaler = []
for blabel,rlabel in zip(blabels,rlabels):
    scaled = scaler.fit(data[[blabel]])
    bpscaler.append(scaled)
    scaled = scaler.fit(data[[rlabel]])
    rpscaler.append(scaled)
bpscaler = np.array(bpscaler)
rpscaler = np.array(rpscaler)

In [56]:
bpscaler

array([MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler(), MinMaxScaler(),
       MinMaxScaler(), MinMaxScaler(), MinMaxScaler()], dtype=object)

In [27]:
preds = np.array([pred.to('cpu').numpy() for pred in preds]).T

In [29]:
rpnewpred = []
bpnewpred = []
for bpred,rpred,bscale,rscale in zip(preds[:55],preds[55:],bpscaler,rpscaler):
    bpnewpred.append(bscale.inverse_transform([bpred]).flatten())
    rpnewpred.append(rscale.inverse_transform([rpred]).flatten())

In [33]:
bpnews = np.array(bpnewpred).T
rpnews = np.array(rpnewpred).T
rpnews.shape

(5000, 55)

In [42]:
len(preds.T)

5000

In [50]:
dfnew = pd.DataFrame(
    {'source_id':range(len(preds.T)),
     'bp_coefficients':list(bpnews),
     'bp_standard_deviation':[np.std(bp) for bp in bpnews],
     'bp_coefficient_covariances':covbs,
     'rp_coefficients':list(rpnews),
     'rp_coefficient_covariances':covrs,
     'rp_standard_deviation':[np.std(rp) for rp in rpnews]
    }
)

In [51]:
dfnew

Unnamed: 0,source_id,bp_coefficients,bp_standard_deviation,bp_coefficient_covariances,rp_coefficients,rp_coefficient_covariances,rp_standard_deviation
0,0,"[-1.7995198262838867, 0.397804912836069, 0.137...",0.589579,"[[50.824411354017, 0.0, 0.0, 0.0, 0.0, 0.0, 0....","[-1.800426036216557, -1.2063038572284386, -0.6...","[[10.894251866433535, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.551069
1,1,"[-1.078802953730228, -0.2909015463627904, 0.56...",0.588635,"[[57.57570784600466, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.6382507943417406, -1.4837930936006165, -0....","[[8.049284034164032, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.569084
2,2,"[-1.8881146435509997, 0.3709738871835929, 0.17...",0.592982,"[[0.6034642279553424, 0.0, 0.0, 0.0, 0.0, 0.0,...","[-1.9857442077567118, -1.2811896328814198, -0....","[[0.26975468997284224, 0.0, 0.0, 0.0, 0.0, 0.0...",0.560005
3,3,"[-1.888973990137012, 0.38718625887268376, 0.19...",0.590317,"[[3.5874154757670027, 0.0, 0.0, 0.0, 0.0, 0.0,...","[-1.9243439621040264, -1.2482796470244717, -0....","[[1.365683548072468, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.565876
4,4,"[-1.7179437985113009, 0.4376364112406939, 0.01...",0.567237,"[[75.07698531556889, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.7218444466339713, -1.2380501625920275, -0....","[[22.05784515259529, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.542766
...,...,...,...,...,...,...,...
4995,4995,"[-1.596713815882594, 0.22446609938794854, 0.15...",0.573531,"[[39.98701003183669, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.8096795096446623, -1.3515202171470149, -0....","[[8.878970670616582, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.560798
4996,4996,"[-1.7720591400132073, 0.3934761967524255, 0.15...",0.579883,"[[4.884835755780159, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.7878239019109994, -1.251514885019433, -0.6...","[[3.958926802659576, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.548787
4997,4997,"[-1.5401868643340026, 0.24605435759456484, 0.0...",0.556899,"[[78.68567875133886, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.7666946143069928, -1.3647763698972004, -0....","[[20.18509114127687, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.557909
4998,4998,"[-1.6234295046028477, 0.5979977205334466, 0.20...",0.558313,"[[0.38581238603241275, 0.0, 0.0, 0.0, 0.0, 0.0...","[-1.7250951822574059, -1.2387193390756601, -0....","[[1.2997614199214382, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.575689


In [52]:
synthetic_photometry = generate(dfnew, photometric_system=PhotometricSystem.Pristine)
synthetic_photometry

                              

Unnamed: 0,source_id,Pristine_mag_CaHK,Pristine_flux_CaHK,Pristine_flux_error_CaHK
0,0,,-2.550025e-19,2.022325e-18
1,1,,-8.841532e-19,2.418715e-18
2,2,,-3.149958e-19,2.414261e-19
3,3,,-2.679119e-19,4.977557e-19
4,4,,-2.259454e-19,2.156494e-18
...,...,...,...,...
4995,4995,,-3.188833e-19,1.817803e-18
4996,4996,,-2.078657e-19,5.280083e-19
4997,4997,,-2.680024e-19,2.266392e-18
4998,4998,,-7.693021e-20,2.147637e-19
