# LRG selection correlations with systematics 

Use LASSO to do variable selection and find correlations of LRG density with systematics.

In [None]:
import pandas as pd
import numpy as np
import healpy as hp
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib.lines as lines

from astropy.table import Table, vstack, hstack, join
from astropy.coordinates import SkyCoord

from scipy.stats import binned_statistic, iqr

from sklearn_my.linear_model import SGDRegressor, LinearRegression, LassoCV, Lasso
from sklearn_my.preprocessing import StandardScaler
from sklearn_my.model_selection import GridSearchCV
from sklearn_my.metrics import mean_squared_error, median_absolute_error
from tqdm.notebook import trange, tqdm

Load the data

In [None]:
randoms_counts_dir = Path('/global/cfs/cdirs/desi/users/rongpu/data/imaging_sys/randoms_stats/0.49.0/resolve/counts')
randoms_systematics_dir = Path('/global/cfs/cdirs/desi/users/rongpu/data/imaging_sys/randoms_stats/0.49.0/resolve/systematics')
target_densities_dir = Path('/global/cfs/cdirs/desi/users/rongpu/data/imaging_sys/density_maps/1.0.0/resolve')
stardens_dir = Path('/global/cfs/cdirs/desi/users/rongpu/useful/healpix_maps')
output_dir = Path("./")

parameters

In [None]:
min_nobs = 1
maskbits_dict = {'LRG': [1, 8, 9, 11, 12, 13], 'ELG': [1, 11, 12, 13], 'QSO': [1, 8, 9, 11, 12, 13], 'BGS_ANY': [1, 13], 'BGS_BRIGHT': [1, 13]}

min_pix_frac = 0.6  # minimum fraction of pixel area to be used

nside = 256
target_class = 'LRG'
field = 'north' # 'south'

xnames_fit_dict = {
                   'LRG':['EBV', 'psfdepth_w1mag_ebv', 'galdepth_gmag_ebv', 'galdepth_rmag_ebv', 'galdepth_zmag_ebv', 'PSFSIZE_G', 'PSFSIZE_R', 'PSFSIZE_Z', 'stardens_log',"cos(l)","cos(b)"],
                   }
npix = hp.nside2npix(nside)
pix_area = hp.pixelfunc.nside2pixarea(nside, degrees=True)

In [None]:
target_class = target_class.lower()

maskbits = maskbits_dict[target_class.upper()]
xnames_fit = xnames_fit_dict[target_class.upper()]

maskbits = ''.join([str(tmp) for tmp in maskbits])
density = Table.read(target_densities_dir / f'density_map_{target_class}_{field}_nside_{nside}_minobs_{min_nobs}_maskbits_{maskbits}.fits')
maps = Table.read(randoms_counts_dir / f'counts_{field}_nside_{nside}_minobs_{min_nobs}_maskbits_{maskbits}.fits')
maps = maps[maps['n_randoms']>0]
maps1 = Table.read(randoms_systematics_dir / f'systematics_{field}_nside_{nside}_minobs_{min_nobs}_maskbits_{maskbits}.fits')
maps1.remove_columns(['RA', 'DEC'])
maps = join(maps, maps1, join_type='inner', keys='HPXPIXEL')
maps = join(maps, density[['HPXPIXEL', 'n_targets']], join_type='outer', keys='HPXPIXEL').filled(0)

mask = maps['FRACAREA']>min_pix_frac
maps = maps[mask]
mask = maps['DEC']>-30  # Remove the southern part of DES
maps = maps[mask]
maps['density'] = maps['n_targets'] / (pix_area * maps['FRACAREA'])

# Load stellar density map
stardens = np.load(stardens_dir / f'pixweight-dr7.1-0.22.0_stardens_{nside}_ring.npy')
maps['stardens'] = stardens[maps['HPXPIXEL']]
maps['stardens_log'] = np.log10(maps['stardens']).astype('>f8')

#put in galactic long and lat
coords = SkyCoord(ra = maps['RA'], dec =maps['DEC'], unit = "deg")
maps["cos(l)"] = np.cos(coords.galactic.l.radian)
maps["cos(b)"] = np.cos(coords.galactic.b.radian)



## Calculate the Predicted Surface Density LASSO

### Analysis for BASS+MzLS Region

In [None]:
data = maps[xnames_fit].to_pandas()
scaler = StandardScaler()
data_scaled = scaler.fit_transform(np.array(data))

In [None]:
#Weighted LASSO

#CV
num_alphas=1000
alphas=np.logspace(-5,10,num_alphas)
#Find fraction of Non zero coefficients
coeffs = np.zeros((num_alphas, len(data.columns)))
r2 = np.zeros(num_alphas)
for i, alpha in enumerate(tqdm(alphas)):
    lasso = Lasso(alpha=alpha)
    lasso.fit(data_scaled, maps["density"], sample_weight=maps["FRACAREA"])
    r2[i]=lasso.score(data_scaled, maps["density"], sample_weight=maps["FRACAREA"])
    coeffs[i] = lasso.coef_
frac = (coeffs!=0).sum(axis=1)/len(data.columns)


Select $\alpha$ which maximizes $R^2$ while decreasing the fraction of predictors chosen.

In [None]:
fig, ax0 = plt.subplots(1,1, figsize=(12,8))
ax1 = ax0.twinx()
ax0.plot(alphas,frac, label="Fraction of Non-zero coefficients")
ax1.plot(alphas,r2,c="C1", label=r"$R^{2}$")
ax0.set_xscale("log")
ax0.set_xlabel(r"alpha", fontsize=30)
plt.legend()

In [None]:
#Weighted LASSO
lasso_bm = Lasso()

#CV
num_alphas=1000
alphas=np.logspace(-5,10,num_alphas)
cv = LassoCV(n_jobs=-1, alphas=alphas)
cv.fit(data_scaled, maps["density"], sample_weight=maps["FRACAREA"])

In [None]:
plt.plot(alphas, np.mean(cv.mse_path_, axis=-1))
plt.xscale("log")