# The Simplest Analytic Emulator

Authors: Phil Marshall (@drphilmarshall), Ji Won Park (@jiwoncpark)
Created: July 19, 2019
Last run: July 19, 2019
Goals: 
- Visualize an emulated Object catalog from a very simple analytic emulator, to serve as the baseline for any ML-based emulator.

In [1]:
import torch
import numpy as np
import json
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
np.random.seed(2809)
torch.manual_seed(2809)
torch.cuda.manual_seed(2809)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device=='cuda':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')
print("device: ", device)
    
args = json.load(open("args.txt"))

############
# Data I/O #
############

from derp_data import DerpData
import itertools

# X base columns
truth_cols = list('ugrizy') + ['ra_truth', 'dec_truth', 'redshift', 'star',]
truth_cols += ['mag_true_%s_lsst' %bp for bp in 'ugrizy']
truth_cols += ['size_bulge_true', 'size_minor_bulge_true', 'ellipticity_1_bulge_true', 'ellipticity_2_bulge_true', 'bulge_to_total_ratio_i']
truth_cols += ['size_disk_true', 'size_minor_disk_true', 'ellipticity_1_disk_true', 'ellipticity_2_disk_true',]
opsim_cols = ['m5_flux', 'PSF_sigma2', 'filtSkyBrightness_flux', 'airmass', 'n_obs']
# Y base columns
drp_cols = ['extendedness', 'ra_obs', 'dec_obs', 'Ixx', 'Ixy', 'Iyy', 'IxxPSF', 'IxyPSF', 'IyyPSF', ]
drp_cols_prefix = ['cModelFlux_', 'psFlux_']
drp_cols_suffix = []
#drp_cols_suffix = ['_ext_photometryKron_KronFlux_instFlux', '_base_CircularApertureFlux_70_0_instFlux', 
drp_cols += [t[0] + t[1] for t in list(itertools.product(drp_cols_prefix, list('ugrizy')))]
drp_cols += [t[1] + t[0] for t in list(itertools.product(drp_cols_suffix, list('ugrizy')))]


# Define dataset
data = DerpData(data_path='raw_data/obj_master_tract4850.csv',
    data_path2=None,
    X_base_cols=truth_cols + opsim_cols, 
    Y_base_cols=drp_cols, 
    args=args, ignore_null_rows=True, save_to_disk=True)
if not args['data_already_processed']:
    data.export_metadata_for_eval(device_type=device.type)
# Read metadata if reading processed data from disk:
data_meta = json.load(open("data_meta.txt"))

X_cols = data_meta['X_cols']
Y_cols = data_meta['Y_cols']
train_indices = data_meta['train_indices']
val_indices = data_meta['val_indices']
X_dim = data_meta['X_dim']
Y_dim = data_meta['Y_dim']

from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader

# Split train vs. val
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# Define dataloader
kwargs = {'num_workers': 1, 'pin_memory': True} if device=='cuda' else {}
train_loader = DataLoader(data, batch_size=args['batch_size'], sampler=train_sampler, **kwargs)
val_loader = DataLoader(data, batch_size=args['batch_size'], sampler=val_sampler, **kwargs)

device:  cuda


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Deleting null rows: 70355 --> 67840
Standardized X except:  ['star', 'mag_true_u_lsst', 'mag_true_g_lsst', 'mag_true_r_lsst', 'mag_true_i_lsst', 'mag_true_z_lsst', 'mag_true_y_lsst', 'u_flux', 'g_flux', 'r_flux', 'i_flux', 'z_flux', 'y_flux']
Standardized Y except:  []
X has null columns:  []
Y has null columns:  []
Overall star frac: 0.21
Training star frac: 0.21
Validation star frac: 0.21
Saving processed data to disk...


In [4]:
data_meta['Y_cols']

['extendedness',
 'ra_offset',
 'dec_offset',
 'Ixx',
 'Ixy',
 'Iyy',
 'IxxPSF',
 'IxyPSF',
 'IyyPSF',
 'cModelFlux_u',
 'cModelFlux_g',
 'cModelFlux_r',
 'cModelFlux_i',
 'cModelFlux_z',
 'cModelFlux_y',
 'psFlux_u',
 'psFlux_g',
 'psFlux_r',
 'psFlux_i',
 'psFlux_z',
 'psFlux_y']

In [None]:
import models
import solver

trainval_data = DerpData()
val_data = 

analytic = models.Analytic()
mean, logvar = analytic(X_val)
sample = solver.sample(mean, logvar)


In [None]:
visualizing the sampled catalog, 1D, 2D
