**Importing libraries**

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Defining constants**

In [2]:
omega_m = 0.272
omega_l = 1 - omega_m
H_o = 70.4                 #Hubble constant in km s^−1 Mpc^−1
f = omega_m**0.545


**Defining Functions**

In [3]:
def H(z):                           #hubble parameter (in km s^-1 Mpc^-1) using Eq. 4.33 in Peter's book
    return np.sqrt( H_o**2 * ( (1+z)**3 * omega_m + omega_l ) ) 


**Reading galaxies dataset**

In [4]:
%%time
z = 0.42
# data_address = '../input/magneticumsnap027z042-massfiltered/'
data_address = 'Data/'
df_gal = pd.read_csv(data_address + 'massive_galaxies.csv')             #massive_galaxies.csv contains galaxies with mass greater than 1.8 ×10^{11} h^{−1} M_sun as done by Tanimura et al. (2020)
print(df_gal.head())
df_gal.shape

    x[kpc/h]   y[kpc/h]   z[kpc/h]     m[Msol/h]
0  416658.59  455771.69  72710.742  9.539830e+12
1  416819.56  455645.44  72682.742  6.425460e+11
2  416375.28  455986.09  72565.555  5.747420e+11
3  417055.16  455752.81  72579.039  7.524050e+11
4  416468.00  455417.97  73096.898  8.804670e+11
CPU times: user 50.3 ms, sys: 12.6 ms, total: 62.9 ms
Wall time: 63.8 ms


(93097, 4)

**Reading clusters dataset**

In [5]:
%%time

df_clusters = pd.read_csv(data_address + 'massive_clusters.csv')  #massive_clusters.csv contains clusters with M_500c greater than 10^13.5 h^{-1} M_sun as done by Tanimura et al. (2020)

print(df_clusters.head())
df_clusters.shape

   UID   x[kpc/h]   y[kpc/h]    z[kpc/h]  x_pix  y_pix  i_sector  \
0    0  416658.59  455771.69   72710.742  10666   4716        18   
1    1  457001.00  220719.88  279137.470  11699  10733        32   
2    2  546352.06  317522.00  322111.530  13986   8255        26   
3    3  178209.08  431001.19  432031.660   4562   5350        15   
4    4  232882.92  440987.19  452662.310   5961   5094        16   

   m500c[Msol/h]  r500c[kpc/h]  gas_frac  star_frac   T[kev]  Lx[1e44erg/s]  \
0   7.438200e+14       1346.37  0.128781   0.026921  8.21176        25.9788   
1   4.723990e+14       1157.33  0.125627   0.030589  6.49616        15.9667   
2   5.932360e+14       1248.60  0.133647   0.029565  6.64855        32.3791   
3   7.220310e+14       1333.15  0.126156   0.026663  8.29691        24.3332   
4   6.558890e+14       1291.09  0.125098   0.027383  7.06222        24.6993   

        Y500c  M_sat/M_cD  cshift[kpc]  vx[km/s]  vy[km/s]  vz[km/s]  
0  125.124350    0.146322          0.0  -484.

(6080, 19)

**Specifying prefactors for Eq. 1 of Tanimura et al. (2020)**

In [6]:
# %%time
a = 1/(1+z)
H(z)
print(a)

0.7042253521126761


**Computing density contrast for the Eq. 1**

In [7]:
%%time
for clus_x, clus_y, clus_z in zip(df_clusters['x[kpc/h]'], df_clusters['y[kpc/h]'], df_clusters['z[kpc/h]']):
    df_gal_selected = df_gal[(df_gal['x[kpc/h]'] >= (clus_x - 120000)) & (df_gal['x[kpc/h]'] <= (clus_x + 120000))
                            & (df_gal['y[kpc/h]'] >= (clus_y - 120000)) & (df_gal['y[kpc/h]'] <= (clus_y + 120000)) 
                             & (df_gal['z[kpc/h]'] >= (clus_z - 120000)) & (df_gal['z[kpc/h]'] <= (clus_z + 120000))]
        
    gals_in_cell = np.empty((48, 48, 48))
    gals_in_cell[:] = np.NaN
    
    for i, cell_x_low in enumerate(np.arange(clus_x - 120000, clus_x + 115001, 5000)):
        for j, cell_y_low in enumerate(np.arange(clus_y - 120000, clus_y + 115001, 5000)):
            for k, cell_z_low in enumerate(np.arange(clus_z - 120000, clus_z + 115001, 5000)):

                x_condition = (df_gal_selected['x[kpc/h]'] >= (cell_x_low)) & (df_gal_selected['x[kpc/h]'] < (cell_x_low + 5000))
                y_condition = (df_gal_selected['y[kpc/h]'] >= (cell_y_low)) & (df_gal_selected['y[kpc/h]'] < (cell_y_low + 5000)) 
                z_condition = (df_gal_selected['z[kpc/h]'] >= (cell_z_low)) & (df_gal_selected['z[kpc/h]'] < (cell_z_low + 5000))
    
                count = len(df_gal_selected[ x_condition & y_condition & z_condition ])
                gals_in_cell[i][j][k] = count
                
#         print(i)
        
    break
    
import os
duration = 1  # seconds
freq = 440  # Hz
os.system('play -nq -t alsa synth {} sine {}'.format(duration, freq)) 

df_gal_selected

CPU times: user 3min 10s, sys: 649 ms, total: 3min 11s
Wall time: 3min 12s


Unnamed: 0,x[kpc/h],y[kpc/h],z[kpc/h],m[Msol/h]
0,416658.59,455771.69,72710.742,9.539830e+12
1,416819.56,455645.44,72682.742,6.425460e+11
2,416375.28,455986.09,72565.555,5.747420e+11
3,417055.16,455752.81,72579.039,7.524050e+11
4,416468.00,455417.97,73096.898,8.804670e+11
...,...,...,...,...
93002,472120.56,353170.09,140795.610,1.805510e+11
93009,509923.62,400768.66,15343.682,1.838470e+11
93032,493037.16,569709.44,174671.220,1.867950e+11
93037,476074.31,346173.25,68664.602,1.810640e+11


**Checking if galaxies in cells match galaxies in the first cluster box**

In [8]:
print(np.nansum(gals_in_cell))
print(np.nanmax(gals_in_cell))

4401.0
8.0


**First cluster box containing 48 x 48 x 48 cells**

In [9]:
gals_in_cell

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

**Estimating time for four nested loops**

In [10]:
import  numpy as np
import time
a = np.arange(6000)
b = np.arange(48)
c = np.arange(48)
d = np.arange(48)

t = time.time()
for _ in a:
    for _ in b:
        for _ in c:
            for _ in d:
                end = time.time() - t
print(end)

KeyboardInterrupt: 

**Trying while loop for computing density contrast**

In [11]:
%%time
for clus_x, clus_y, clus_z in zip(df_clusters['x[kpc/h]'], df_clusters['y[kpc/h]'], df_clusters['z[kpc/h]']):
    df_gal_selected = df_gal[(df_gal['x[kpc/h]'] >= (clus_x - 120000)) & (df_gal['x[kpc/h]'] <= (clus_x + 120000))
                            & (df_gal['y[kpc/h]'] >= (clus_y - 120000)) & (df_gal['y[kpc/h]'] <= (clus_y + 120000)) 
                             & (df_gal['z[kpc/h]'] >= (clus_z - 120000)) & (df_gal['z[kpc/h]'] <= (clus_z + 120000))]
    
   
    gals_in_cell = np.empty((48, 48, 48))
    gals_in_cell[:] = np.NaN
    

    cell_x_low = np.arange(clus_x - 120000, clus_x + 115001, 5000)
    cell_y_low = np.arange(clus_y - 120000, clus_y + 115001, 5000)
    cell_z_low = np.arange(clus_z - 120000, clus_z + 115001, 5000)
    k = 0
    i = 0
    j = 0
    while(k<=47):
        if (i == 47):
            i = 0
            j = j + 1
            if (j == 47):
                j = 0
                k = k + 1
                
        else:
            x_condition = (df_gal_selected['x[kpc/h]'] >= (cell_x_low[i])) & (df_gal_selected['x[kpc/h]'] < (cell_x_low[i] + 5000))
            y_condition = (df_gal_selected['y[kpc/h]'] >= (cell_y_low[j])) & (df_gal_selected['y[kpc/h]'] < (cell_y_low[j] + 5000)) 
            z_condition = (df_gal_selected['z[kpc/h]'] >= (cell_z_low[k])) & (df_gal_selected['z[kpc/h]'] < (cell_z_low[k] + 5000))

            count = len(df_gal_selected[ x_condition & y_condition & z_condition ])
            gals_in_cell[i][j][k] = count
        i = i + 1
    break
        

        
    break
    
import os
duration = 1  # seconds
freq = 440  # Hz
os.system('play -nq -t alsa synth {} sine {}'.format(duration, freq)) 

df_gal_selected

CPU times: user 3min 2s, sys: 880 ms, total: 3min 3s
Wall time: 3min 3s


Unnamed: 0,x[kpc/h],y[kpc/h],z[kpc/h],m[Msol/h]
0,416658.59,455771.69,72710.742,9.539830e+12
1,416819.56,455645.44,72682.742,6.425460e+11
2,416375.28,455986.09,72565.555,5.747420e+11
3,417055.16,455752.81,72579.039,7.524050e+11
4,416468.00,455417.97,73096.898,8.804670e+11
...,...,...,...,...
93002,472120.56,353170.09,140795.610,1.805510e+11
93009,509923.62,400768.66,15343.682,1.838470e+11
93032,493037.16,569709.44,174671.220,1.867950e+11
93037,476074.31,346173.25,68664.602,1.810640e+11


**Implementing another strategy to reduced comutation time for density contrast**

In [12]:
%%time
for clus_x, clus_y, clus_z in zip(df_clusters['x[kpc/h]'], df_clusters['y[kpc/h]'], df_clusters['z[kpc/h]']):
    
    df_gal_selected = df_gal[(df_gal['x[kpc/h]'] >= (clus_x - 120000)) & (df_gal['x[kpc/h]'] <= (clus_x + 120000))
                            & (df_gal['y[kpc/h]'] >= (clus_y - 120000)) & (df_gal['y[kpc/h]'] <= (clus_y + 120000)) 
                             & (df_gal['z[kpc/h]'] >= (clus_z - 120000)) & (df_gal['z[kpc/h]'] <= (clus_z + 120000))]
    
   
    gals_in_cell = np.empty((48, 48, 48))
    gals_in_cell[:] = np.NaN
    

    cell_x_low = np.arange(clus_x - 120000, clus_x + 115001, 5000)
    cell_y_low = np.arange(clus_y - 120000, clus_y + 115001, 5000)
    cell_z_low = np.arange(clus_z - 120000, clus_z + 115001, 5000)
    k = 0
    i = 0
    j = 0

                
    x_condition = (df_gal_selected['x[kpc/h]'] >= (cell_x_low)) & (df_gal_selected['x[kpc/h]'] < (cell_x_low + 5000))
    y_condition = (df_gal_selected['y[kpc/h]'] >= (cell_y_low)) & (df_gal_selected['y[kpc/h]'] < (cell_y_low + 5000)) 
    z_condition = (df_gal_selected['z[kpc/h]'] >= (cell_z_low)) & (df_gal_selected['z[kpc/h]'] < (cell_z_low + 5000))

    count = len(df_gal_selected[ x_condition & y_condition & z_condition ])
    gals_in_cell[i][j][k] = count

        

        
    
import os
duration = 1  # seconds
freq = 440  # Hz
os.system('play -nq -t alsa synth {} sine {}'.format(duration, freq)) 

df_gal_selected

ValueError: Lengths must match to compare