# Load and import

In [1]:
%run load_data.ipynb
print (dir())

['DataFrame', 'In', 'Math', 'Out', 'SIZE_RATIO_CUT', 'SNR_CUT', 'Series', 'T_CUT', 'Table', '_', '__', '___', '__builtin__', '__builtins__', '__doc__', '__loader__', '__name__', '__package__', '__spec__', '_dh', '_i', '_i1', '_ih', '_ii', '_iii', '_oh', 'bottleneck', 'display', 'exit', 'filter_by_snr', 'fits', 'get_df_list', 'get_ipython', 'h5py', 'i', 'index_2_3', 'index_2_3_true', 'index_2_3_true_negative', 'index_2_3_true_positive', 'index_4_5', 'index_4_5_true', 'index_4_5_true_negative', 'index_4_5_true_positive', 'load_mcal', 'load_true', 'mcal_df_list', 'mcal_df_list_negative', 'mcal_df_list_positive', 'np', 'numexpr', 'pd', 'photdf', 'plt', 'quit', 'tqdm', 'true_df_list']


### read gold dfs (gold maps and delta_g)

In [2]:
g_gold_list_positive=[None]*4
for i in range(4):
    path='/home/cz136/project/balrog/data/gold_g/mcal{}.pkl'.format(i+2)
    g_gold_list_positive[i]=pd.read_pickle(path)

### read gold maps

In [3]:
gold_all=pd.read_pickle('/home/cz136/project/balrog/data/gold_g/gold.pkl')

In [4]:
import treecorr
import numpy as np
import pandas as pd
import healpy as hp
from tqdm import tqdm

pd.set_option('display.float_format', lambda x: '%.3e' % x)
np.set_printoptions(formatter={'all':lambda x: '%.3e'% x})

# Constants

In [5]:
NSIDE=256 #or 512
NPIX=hp.nside2npix(NSIDE)
print ("The number of pixels is: {}.".format(NPIX))
G_INDEX=np.arange(NPIX)

The number of pixels is: 786432.


# data manipulating

### get an df with nan rows dropped

In [6]:
gold_all_not_null=gold_all[gold_all['ebv_planck13'].notnull()]
# g_gold_list_negative_not_null=[df[df['ebv_planck13'].notnull()] for df in g_gold_list_negative]

In [7]:
gold_columns=list(gold_all_not_null.columns)[:-8]

In [8]:
final_df_positive_list=[None]*4
for i,catalog in enumerate(mcal_df_list_positive):
    final_df_positive_list[i]=pd.concat([catalog,photdf[['true_cm_g_1','true_cm_g_2']]],axis=1,join='inner')

In [9]:
final_df_negative_list=[None]*4
for i,catalog in enumerate(mcal_df_list_negative):
    final_df_negative_list[i]=pd.concat([catalog,photdf[['true_cm_g_1','true_cm_g_2']]],axis=1,join='inner')

### function for get delta_g columns

In [10]:
def get_delta_g(catalog):
    R11 = catalog['R11'].mean()
    R22 = catalog['R22'].mean()
    catalog['calculated_g_1']=catalog.apply(lambda x:x['e_1']/R11,axis=1)
    catalog['calculated_g_2']=catalog.apply(lambda x:x['e_2']/R22,axis=1)
    catalog['delta_g1']=catalog.apply(lambda x:x['calculated_g_1']-x['true_cm_g_1'],axis=1)
    catalog['delta_g2']=catalog.apply(lambda x:x['calculated_g_2']-x['true_cm_g_2'],axis=1)

In [11]:
for catalog in final_df_positive_list:
    get_delta_g(catalog)
for catalog in final_df_negative_list:
    get_delta_g(catalog)

### function to get correlation functions

In [110]:
def get_gg(catalog):
    print(len(catalog))
    delta_g1=catalog['delta_g1'].to_numpy()
    delta_g2=catalog['delta_g2'].to_numpy()
    ra=catalog['ra'].to_numpy()
    dec=catalog['dec'].to_numpy()
    
    scat = treecorr.Catalog( g1 = delta_g1, g2   = delta_g2, 
                                 ra = ra, dec  = dec, 
                                 ra_units='deg', dec_units='deg')
    
    gg = treecorr.GGCorrelation(nbins=20, min_sep=2.5, max_sep=250, sep_units='arcmin', bin_slop=0.2)
    gg.process(scat)
    
    gg_dict={'gg':gg, 'length':len(catalog), 'g1avg':np.mean(catalog['delta_g1']), 'g2avg':np.mean(catalog['delta_g2']),
                'g1sigma':np.std(catalog['delta_g1']),'g2sigma':np.std(catalog['delta_g2'])}
    
    return(gg_dict)

In [94]:
def get_kg(catalog,key):
    delta_g1=catalog['delta_g1'].to_numpy()
    delta_g2=catalog['delta_g2'].to_numpy()
    k=catalog[key].to_numpy()
    ra=catalog['ra'].to_numpy()
    dec=catalog['dec'].to_numpy()
    
    kcat= treecorr.Catalog( k=k,
                                 ra=ra, dec=dec, 
                                 ra_units='deg', dec_units='deg')
    
    gcat = treecorr.Catalog( g1=delta_g1, g2=delta_g2,
                                 ra=ra, dec=dec, 
                                 ra_units='deg', dec_units='deg')
    
    kg = treecorr.KGCorrelation(nbins=20, min_sep=2.5, max_sep=250, sep_units='arcmin', bin_slop=0.2)
    kg.process(kcat,gcat)
    return(kg)

In [113]:
gg_positive = [get_gg(catalog) for catalog in final_df_positive_list]
gg_negative = [get_gg(catalog) for catalog in final_df_negative_list]

272818
272818
215145
215145
270876
270876
213617
213617


### function for getting healpix indexes

In [15]:
def get_healpix(df):
    print ("The length of the df is: {}.".format(len(df)))
    print ("The ratio of the length of the catagory to the total pixel number is: {:.2%}".format(len(df)/NPIX))
    df['ipix']=df.apply(lambda x:hp.ang2pix(NSIDE,np.pi/2-np.deg2rad(x['dec']),np.deg2rad(x['ra']),nest=True),axis=1)
    print("The propotion of rendered objects is: {:.2%}".format(np.sum(df['ipix']!=0)/len(df)))
    print ("Resetting indexes")
    return (df)

In [16]:
df_w_ipix_positive=[get_healpix(df) for df in final_df_positive_list]
df_w_ipix_negative=[get_healpix(df) for df in final_df_negative_list]

The length of the df is: 272818.
The ratio of the length of the catagory to the total pixel number is: 34.69%
The propotion of rendered objects is: 100.00%
Resetting indexes
The length of the df is: 272818.
The ratio of the length of the catagory to the total pixel number is: 34.69%
The propotion of rendered objects is: 100.00%
Resetting indexes
The length of the df is: 215145.
The ratio of the length of the catagory to the total pixel number is: 27.36%
The propotion of rendered objects is: 100.00%
Resetting indexes
The length of the df is: 215145.
The ratio of the length of the catagory to the total pixel number is: 27.36%
The propotion of rendered objects is: 100.00%
Resetting indexes
The length of the df is: 270876.
The ratio of the length of the catagory to the total pixel number is: 34.44%
The propotion of rendered objects is: 100.00%
Resetting indexes
The length of the df is: 270876.
The ratio of the length of the catagory to the total pixel number is: 34.44%
The propotion of ren

In [17]:
#index for gold map with valid pixels
map_ipix=gold_all_not_null.index.to_numpy()

In [18]:
def fill_gold_values_to_mcal(df):
    ##drop balrog objects that does not have a corresponding gold map healpix index
    drop_mask=np.isin(df['ipix'].to_numpy(),map_ipix)
    df_new=df[drop_mask]
    #index for gold map values
    gold_index=df_new['ipix'].to_numpy()
    #balrog indexes for df_new
    bal_index=df_new.index
#     print(gold_index)
#     print(len(gold_index))
    #get gold values by ipix 
    gold_all_not_null_reindexed=gold_all_not_null.loc[gold_index]
    #reset the index of gold df to balrog index
    gold_all_not_null_reindexeded=gold_all_not_null_reindexed.set_index(bal_index)
#     print(len(df_new),len(gold_all_not_null_reindexeded))
#     df_new_joined=pd.concat([df_new,gold_all_not_null_reindexed],axis=1)
    #concatenate gold values with mcal catalogues on balrog index
    joined=pd.concat([df_new,gold_all_not_null_reindexeded],axis=1)
#     print(len(temp))
    return (joined)

In [19]:
filled_df_positive=[fill_gold_values_to_mcal(df) for df in final_df_positive_list]
filled_df_negative=[fill_gold_values_to_mcal(df) for df in final_df_negative_list]

In [20]:
key_list=gold_all.columns

In [21]:
def get_kg_for_key(key):
    return (lambda df:get_kg(df,key))

In [46]:
def get_kg_for_indi_df(df):
    print(len(df))
    kg_dict={}
    for key in key_list:
        kg_dict[key]={"corrfunc":get_kg_for_key(key)(df),"length":len(df),"kaverage":np.mean(df[key]),"ksigma":np.std(df[key]),
                      'g1avg':np.mean(df['delta_g1']),'g1sigma':np.std(df['delta_g1'])} 
    return(kg_dict)

In [47]:
kg_positive=[get_kg_for_indi_df(df) for df in filled_df_positive]
kg_negative=[get_kg_for_indi_df(df) for df in filled_df_negative]

215727
215727
169940
169940
214531
214531
169167
169167


# Plotting

In [114]:
def plt_gg(gg,title=None):
    corrfunc=gg['gg']
    length = gg['length']
    g1avg = gg['g1avg']
    g2avg = gg['g2avg']
    g1sigma=gg['g1sigma']
    g2sigma=gg['g2sigma']
    
#     gg_dict={'gg':gg, 'length':len(catalog), 'g1avg':np.mean(catalog['delta_g1']), 'g2_avg':np.mean(catalog['delta_g1']),
#                 'g1sigma':np.std(catalog['delta_g1']),'g2sigma':np.std(catalog['delta_g2'])} 
        
    textstr = '\n'.join((
        r'$n=%.2e$' % (length, ),
        r'$g1_{avg}=%.2e$' % (g1avg, ),
        r'$\sigma_{g1}=%.2e$' % (g1sigma, ),
        r'$g2_{avg}=%.2e$' % (g2avg, ),
        r'$\sigma_{g2}=%.2e$' % (g2sigma, )))
    
    
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

    fig,ax = plt.subplots()
    ax.errorbar(corrfunc.meanr,corrfunc.xip,yerr=np.sqrt(corrfunc.varxip))
    plt.xscale('log')
    plt.yscale('log')
    plt.title(title+'_xip')
    plt.xlabel('$\\theta$/arcsec')
    ax.text(0.80, 0.95, textstr, transform=ax.transAxes, fontsize=5,
            verticalalignment='top', bbox=props)
    plt.savefig('/home/cz136/project/balrog/image/corr/{}_xip'.format(title),dpi=500)
    plt.close()
    
    fig,ax = plt.subplots()
    plt.errorbar(corrfunc.meanr,corrfunc.xim,yerr=np.sqrt(corrfunc.varxim))
    plt.xscale('log')
    plt.yscale('log')
    plt.title(title+'_xim')
    plt.xlabel(r"$\theta$ /arcsec")
    ax.text(0.80, 0.95, textstr, transform=ax.transAxes, fontsize=5,
            verticalalignment='top', bbox=props)
    plt.savefig('/home/cz136/project/balrog/image/corr/{}_xim'.format(title),dpi=500)
    plt.close()

In [106]:
def plt_kg(kg,key,title=None):
    corrfunc = kg['corrfunc']
    kavg = kg['kaverage']
    ksig = kg['ksigma']
    length=kg['length']
    g1avg=kg['g1avg']
    g1sigma=kg['g1sigma']
    
    textstr = '\n'.join((
        r'$n=%.2e$' % (length, ),
        r'$\kappa_{avg}=%.2e$' % (kavg, ),
        r'$\sigma_k=%.2e$' % (ksig, ),
        r'$g1_{avg}=%.2e$' % (g1avg, ),
        r'$\sigma_{g1}=%.2e$' % (g1sigma, )))
    
    fig,ax = plt.subplots()
    ax.errorbar(corrfunc.meanr,corrfunc.xi,yerr=np.sqrt(corrfunc.varxi))
    plt.xscale('log')
    plt.yscale('log')
    
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

    # place a text box in upper left in axe80808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080s coords
    ax.text(0.80, 0.95, textstr, transform=ax.transAxes, fontsize=5,
            verticalalignment='top', bbox=props)
   
    
    plt.title(title)
    plt.xlabel(r"$\theta$ /arcsec")
    plt.savefig('/home/cz136/project/balrog/image/corr/{}'.format(title),dpi=500)
    plt.close()

In [115]:
for i,gg in enumerate(gg_positive):
    plt_gg(gg, title='gg_mcal_{}_positive'.format(i+2))
for i,gg in enumerate(gg_negative):
    plt_gg(gg, title='gg_mcal_{}_negative'.format(i+2))

In [108]:
def plot_indi_kg_dict(kg_dict,title=None):
    for key,value in kg_dict.items():
        plt_kg(value,key,title=title+'_'+str(key))

In [109]:
for i,kg_dict in enumerate(kg_positive):
    plot_indi_kg_dict(kg_dict,title='kg_mcal_{}_positive'.format(i+2))
for i,kg_dict in enumerate(kg_negative):
    plot_indi_kg_dict(kg_dict,title='kg_mcal_{}_negative'.format(i+2))