## Observational overdensities

Comparison to observed overdensities.

In [177]:
import pandas as pd
import numpy as np

import pickle as pcl

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)

from methods import bhattacharyya
from methods import plotit

from astropy.cosmology import Planck13

from methods import z_distort
from methods import factor_h
from methods import get_protoclusters

from overdensity_cylinder import overdensity_cylinder

h = 0.673
L = 480.279 / h # box side length

N = 100000    # number of random regions

directory = '/lustre/scratch/astro/cl478/protoclusters_data/'
out_directory = 'output/'

### Load data

In [178]:
redshifts = ['3p10','3p95']  #,'8p22','8p93','9p72'] '2p07', ,'5p03','5p92','6p97'
zees = [float(z.replace('p','.')) for z in redshifts]

# gals_sfr = [None] * len(redshifts)
gals_mstar = [None] * len(redshifts)

for i, z in enumerate(redshifts):
    print i, z
    
#     gals_sfr[i] = pd.read_csv('%shenriques2015a_z%s_sfr_r200.csv'%(directory,z), 
#                               skiprows=122, skipfooter=1, engine='python')
    
    gals_mstar[i] = pd.read_csv('%shenriques2015a_z%s_stellarMass_r200.csv'%(directory,z), 
                                skiprows=122, skipfooter=1, engine='python')

0 3p10
1 3p95


In [179]:
gals_mstar10 = [None] * len(redshifts)

for i, z in enumerate(zees):
        
    gals_mstar10[i] = z_distort(factor_h(gals_mstar[i], h), z, L)

    gals_mstar10[i] = gals_mstar10[i][gals_mstar10[i]['zn_stellarMass'] >= 1]

#         print "Filling in NaN values..."
    gals_mstar10[i].ix[np.isnan(gals_mstar10[i]['z0_haloId']), 'z0_haloId'] = -1
    gals_mstar10[i].ix[np.isnan(gals_mstar10[i]['z0_centralId']), 'z0_centralId'] = -1
    gals_mstar10[i].ix[np.isnan(gals_mstar10[i]['z0_central_mcrit200']), 'z0_central_mcrit200'] = 0
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


### Franck & McGaugh +16

Write protocluster candidate data in to a dataframe for ease of use. Redshift depth given by $z \pm \sigma_{z}$.

In [180]:
name = ['CCPC-z27-002','CCPC-z29-001','CCPC-z29-002','CCPC-z30-001','CCPC-z30-003','CCPC-z31-003',
        'CCPC-z31-004','CCPC-z31-005','CCPC-z32-002','CCPC-z33-002','CCPC-z35-001','CCPC-z36-001']

redshift = [2.772,2.918,2.919,3.035,3.096,3.133,3.146,3.152,3.234,3.372,3.597,3.644]
deltaz = [0.007,0.005,0.009,0.005,0.008,0.008,0.006,0.007,0.003,0.008,0.003,0.003]

In [181]:
dat = pd.DataFrame(data=[redshift, deltaz]).T
dat.index = name
dat.columns = ['redshift','deltaz']

Calculate $\Delta z$ in $\mathrm{cMpc}$

In [182]:
dat['deltac'] = (Planck13.comoving_distance(dat.redshift+dat.deltaz) - \
                 Planck13.comoving_distance(dat.redshift-dat.deltaz)) / 2 # * Planck13.h

Add empty columns for calculated protocluster probabilities, and nearest redshift snapshot in Millennium.

In [237]:
# dat['protocluster probability'] = pd.Series()
# dat['high mass probability'] = pd.Series()
# dat['part probability'] = pd.Series()
dat['dgal'] = [11.02,11.21,12.91,18.78,12.28,9.80,7.59,17.77,13.11,7.44,10.18,23.50]
dat['nearest snap'] = [3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.95,3.95]

dat

Unnamed: 0,redshift,deltaz,deltac,nearest snap,dgal
CCPC-z27-002,2.772,0.007,7.452322,3.1,11.02
CCPC-z29-001,2.918,0.005,5.039118,3.1,11.21
CCPC-z29-002,2.919,0.009,9.067083,3.1,12.91
CCPC-z30-001,3.035,0.005,4.828797,3.1,18.78
CCPC-z30-003,3.096,0.008,7.559564,3.1,12.28
CCPC-z31-003,3.133,0.008,7.461377,3.1,9.8
CCPC-z31-004,3.146,0.006,5.570522,3.1,7.59
CCPC-z31-005,3.152,0.007,6.485285,3.1,17.77
CCPC-z32-002,3.234,0.003,2.701361,3.1,13.11
CCPC-z33-002,3.372,0.008,6.874197,3.1,7.44


#### Calculate overdensity data

Need to calculate overdensity statistics for random regions over the whole box in order to get probabilities. 

First, find the coordinates of all protoclusters

In [8]:
pc_coods = [None] * len(gals_mstar10)
pc_coods[0] = get_protoclusters(gals_mstar10[0], L)
pc_coods[1] = get_protoclusters(gals_mstar10[1], L)

Use to find limiting completeness and purity, and calculate overdensity stats.

In [217]:
coods = pd.DataFrame(np.random.rand(N,3) * L, columns=['zn_x','zn_y','zn_z']) # generate random regions

out_stats = {str(idx): {'stats': None, 'clim': None, 'plim': None, 
                        'clim_std': None, 'plim_std': None} for idx in dat.index}

gal_idx = [0,0,0,0,0,0,0,0,0,0,1,1]

for didx, gidx in zip(dat.index, gal_idx):
    print didx
    
    print 'finding clim and plim'
    c_p = overdensity_cylinder(gals_mstar10[0], 
                        pc_coods[gidx][0], R=10, dc=dat.ix[didx]['deltac'], L=L, pc_stats=True)
    
    out_stats[didx]['clim'] = (np.percentile(c_p[:,1], 95), np.median(c_p[:,1]), np.percentile(c_p[:,1], 5))
    out_stats[didx]['plim'] = (np.percentile(c_p[:,2], 95), np.median(c_p[:,2]), np.percentile(c_p[:,2], 5))
    
    print 'calculating stats'
    out_stats[didx]['stats'] = overdensity_cylinder(gals_mstar10[gidx], coods, R=10, 
                            dc=dat.ix[didx]['deltac'], L=L, pc_stats=True, verbose=True)

CCPC-z27-002
finding clim and plim
calculating stats
Building KDtree...
0.1 %
10.1 %
20.1 %
30.1 %
40.1 %
50.1 %
60.1 %
70.1 %
80.1 %
90.1 %
CCPC-z29-001
finding clim and plim
calculating stats
Building KDtree...
0.1 %
10.1 %
20.1 %
30.1 %
40.1 %
50.1 %
60.1 %
70.1 %
80.1 %
90.1 %
CCPC-z29-002
finding clim and plim
calculating stats
Building KDtree...
0.1 %
10.1 %
20.1 %
30.1 %
40.1 %
50.1 %
60.1 %
70.1 %
80.1 %
90.1 %
CCPC-z30-001
finding clim and plim
calculating stats
Building KDtree...
0.1 %
10.1 %
20.1 %
30.1 %
40.1 %
50.1 %
60.1 %
70.1 %
80.1 %
90.1 %
CCPC-z30-003
finding clim and plim
calculating stats
Building KDtree...
0.1 %
10.1 %
20.1 %
30.1 %
40.1 %
50.1 %
60.1 %
70.1 %
80.1 %
90.1 %
CCPC-z31-003
finding clim and plim
calculating stats
Building KDtree...
0.1 %
10.1 %
20.1 %
30.1 %
40.1 %
50.1 %
60.1 %
70.1 %
80.1 %
90.1 %
CCPC-z31-004
finding clim and plim
calculating stats
Building KDtree...
0.1 %
10.1 %
20.1 %
30.1 %
40.1 %
50.1 %
60.1 %
70.1 %
80.1 %
90.1 %
CCPC-z31-005


In [220]:
# pcl.dump(out_stats, open('out_stats_obs.p','w'))
out_stats = pcl.load(open('out_stats_obs.p','r'))

In [241]:
for idx in dat.index:
    
    labs, labels = label(out_stats[idx]['stats'], 
                                 clim=out_stats[idx]['clim'][2],
                                 plim=out_stats[idx]['plim'][2])

    bins, binLimits, agg, agg_total, fracs = binit(out_stats[idx]['stats'], labs, labels)
    
    dgal = dat.ix[idx]['dgal']
    keys =  labels[:4]
    
    print idx, "\nClim:", out_stats[idx]['clim'], "\nPlim:", out_stats[idx]['plim'], "\nP(dgal): %.3f"%\
    np.sum([fracs[key] for key in keys], axis=0)[np.where(binLimits < dgal)[0].max()], "\n"

CCPC-z27-002 
Clim: (1.0, 1.0, 0.80000000000000004) 
Plim: (1.0, 0.88888888888888884, 0.54545454545454541) 
P(dgal): 0.661 

CCPC-z29-001 
Clim: (1.0, 1.0, 0.67333333333333334) 
Plim: (1.0, 1.0, 0.63636363636363635) 
P(dgal): 0.383 

CCPC-z29-002 
Clim: (1.0, 1.0, 0.82085561497326198) 
Plim: (1.0, 0.8571428571428571, 0.5) 
P(dgal): 0.918 

CCPC-z30-001 
Clim: (1.0, 1.0, 0.66666666666666663) 
Plim: (1.0, 1.0, 0.66666666666666663) 
P(dgal): 0.851 

CCPC-z30-003 
Clim: (1.0, 1.0, 0.80000000000000004) 
Plim: (1.0, 0.88888888888888884, 0.54545454545454541) 
P(dgal): 0.763 

CCPC-z31-003 
Clim: (1.0, 1.0, 0.80000000000000004) 
Plim: (1.0, 0.88888888888888884, 0.54545454545454541) 
P(dgal): 0.661 

CCPC-z31-004 
Clim: (1.0, 1.0, 0.7142857142857143) 
Plim: (1.0, 1.0, 0.61805555555555558) 
P(dgal): 0.354 

CCPC-z31-005 
Clim: (1.0, 1.0, 0.75) 
Plim: (1.0, 0.92307692307692313, 0.58333333333333337) 
P(dgal): 0.891 

CCPC-z32-002 
Clim: (1.0, 0.80000000000000004, 0.33333333333333331) 
Plim: (1.0, 