In [8]:
import pandas as pd
import math
import numpy as np
from astropy.coordinates import SkyCoord
from astropy.io import fits
import healpy as hp
import matplotlib.pyplot as plt
import time
import sys
from pixell import enmap, enplot, reproject, utils, curvedsky, wcsutils
from matplotlib import cm
from scipy.optimize import curve_fit
from scipy.linalg import sqrtm
from tqdm import tqdm

In [None]:
amf = pd.read_csv('data/dr9_main_complete.csv', header=None)
amf.columns = ['amf_no','amf_ra','amf_dec','amf_z','amf_lk','amf_rh','amf_r200','amf_rc','amf_c','amf_str_rh','bax_id','mcxc_id','red_id','whl_id']

c = SkyCoord(amf['amf_ra'],amf['amf_dec'],frame='icrs',unit='deg')
amf['glat'] = c.galactic.b.degree
amf['glon'] = c.galactic.l.degree
amf['cat'] = 0
amf_whl = amf[(amf.whl_id!='-9999')]

In [None]:
amf

In [None]:
hdul = fits.open("data/galaxy_clusters_desdr2.fits")
data = hdul[1].data
zou1 = pd.DataFrame({'ra':data['RA_PEAK'],
                    'dec':data['DEC_PEAK'],
                    'z':data['PHOTO_Z_PEAK'],
                    'rh':data['RICHNESS']})
c = SkyCoord(zou1['ra'],zou1['dec'],frame='icrs',unit='deg')
zou1['glat'] = c.galactic.b.degree
zou1['glon'] = c.galactic.l.degree
zou1['cat'] = 1

In [None]:
hdul = fits.open("data/galaxy_clusters_desidr9.fits")
data = hdul[1].data
zou2 = pd.DataFrame({'ra':data['RA_PEAK'],
                    'dec':data['DEC_PEAK'],
                    'z':data['PHOTO_Z_PEAK'],
                    'rh':data['RICHNESS']})
c = SkyCoord(zou2['ra'],zou2['dec'],frame='icrs',unit='deg')
zou2['glat'] = c.galactic.b.degree
zou2['glon'] = c.galactic.l.degree
zou2['cat'] = 2

In [None]:
hdul = fits.open("data/galaxy_clusters_hscpdr3_wide.fits")
data = hdul[1].data
zou3 = pd.DataFrame({'ra':data['RA_PEAK'],
                    'dec':data['DEC_PEAK'],
                    'z':data['PHOTO_Z_PEAK'],
                    'rh':data['RICHNESS']})
c = SkyCoord(zou3['ra'],zou3['dec'],frame='icrs',unit='deg')
zou3['glat'] = c.galactic.b.degree
zou3['glon'] = c.galactic.l.degree
zou3['cat'] = 3

In [None]:
zou = pd.concat([zou1,zou2,zou3])

In [None]:
# Mass in 10^14
wh22 = pd.read_csv('data/cluster_DESunWISE.dat', sep='\s+', header=None, usecols=[3,4,5,10,11])
wh22.columns = ['ra','dec','z','rh','m']
c = SkyCoord(wh22['ra'],wh22['dec'],frame='icrs',unit='deg')
wh22['glat'] = c.galactic.b.degree
wh22['glon'] = c.galactic.l.degree
wh22['cat'] = 4

In [None]:
# dcl.z = np.array(dcl.z).byteswap().newbyteorder()

In [None]:
amf0 = amf.rename(columns={'amf_ra':'amf_ra','amf_dec':'amf_dec','amf_z':'z','amf_rh':'rh'})[['amf_ra','amf_dec','z','rh','cat']]
zou0 = zou.rename(columns={'ra':'amf_ra','dec':'amf_dec'})[['amf_ra','amf_dec','z','rh','cat']]
wh220 = wh22.rename(columns={'ra':'amf_ra','dec':'amf_dec'})[['amf_ra','amf_dec','z','rh','cat']]

union = pd.concat([amf0,zou0,wh220])#.to_csv('../../data/union.csv')

In [None]:
union

## Nearest Neighbors ##

### DES, WH22 Intersection ###

In [28]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'df' with columns 'x' and 'y'

cut = pd.read_csv("data/union3.csv", skiprows = 0)
des = cut[cut.cat == 1]
wh22 = cut[cut.cat == 4]
# des_wh22 = cut[cut.cat.isin([1,4])].reset_index(drop = True)

des = des[(des.z > 0.6) & (des.z < 1)]
wh22 = wh22[(wh22.z > 0.6) & (wh22.z < 1)]
des["INDEX"] = des.index
wh22["INDEX"] = wh22.index

In [29]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have two DataFrames named 'dfval3' and 'catalogs' with columns 'amf_ra', 'amf_dec', and 'z'
coords1 = des[['amf_ra', 'amf_dec', "z"]].values
coords2 = wh22[['amf_ra', 'amf_dec', "z"]].values

# Create k-d trees from the coordinates
tree1 = KDTree(coords1)
tree2 = KDTree(coords2)

# Query the second k-d tree with the coordinates of the first DataFrame
# Set the distance_upper_bound to your desired threshold (e.g., 0.0833)
distances, indices = tree2.query(coords1, k=1, distance_upper_bound=0.0833)

# Check if there's any point in the second DataFrame within the distance threshold
des['Match'] = distances <= 0.0833

# Get the matched indices in the second DataFrame
matched_indices = indices[distances <= 0.0833]

# Create a new DataFrame containing the matched rows from the second DataFrame
matched_catalogs = wh22.iloc[matched_indices]

# Merge the matched rows from the second DataFrame with the first DataFrame
result = des[des['Match']].reset_index(drop=True).join(matched_catalogs.reset_index(drop=True), rsuffix='_matched')

In [30]:
result

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,INDEX,Match,amf_ra_matched,amf_dec_matched,z_matched,rh_matched,cat_matched,INDEX_matched
0,2.728994,4.852877,0.833884,118.069723,1,46489,True,2.71738,4.84389,0.8476,43.74,4,705670
1,4.327177,5.021294,0.831561,87.429972,1,46492,True,4.32032,5.02819,0.7620,26.98,4,707510
2,9.944415,4.902296,0.762599,234.021091,1,46512,True,9.93727,4.90089,0.7074,49.01,4,714693
3,15.425087,4.823281,0.842940,70.928463,1,46535,True,15.41820,4.81137,0.8289,17.54,4,722243
4,20.700431,5.257738,0.751069,146.615125,1,46550,True,20.68896,5.26780,0.7554,31.02,4,730234
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16184,62.918055,-65.790095,0.675214,166.303226,1,133413,True,62.92653,-65.78198,0.6096,43.31,4,793229
16185,327.510240,-65.802663,0.612675,174.555625,1,133417,True,327.51297,-65.80315,0.6168,50.94,4,834401
16186,50.486371,-66.608892,0.689917,114.049078,1,133419,True,50.50767,-66.59171,0.7080,49.96,4,778092
16187,54.450779,-67.175674,0.755606,177.119563,1,133424,True,54.41912,-67.16213,0.7683,37.11,4,783128


In [31]:
des_int = result.loc[:, ["amf_ra", "amf_dec", "z", "rh", "cat", "INDEX"]]
des_int

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,INDEX
0,2.728994,4.852877,0.833884,118.069723,1,46489
1,4.327177,5.021294,0.831561,87.429972,1,46492
2,9.944415,4.902296,0.762599,234.021091,1,46512
3,15.425087,4.823281,0.842940,70.928463,1,46535
4,20.700431,5.257738,0.751069,146.615125,1,46550
...,...,...,...,...,...,...
16184,62.918055,-65.790095,0.675214,166.303226,1,133413
16185,327.510240,-65.802663,0.612675,174.555625,1,133417
16186,50.486371,-66.608892,0.689917,114.049078,1,133419
16187,54.450779,-67.175674,0.755606,177.119563,1,133424


In [32]:
wh22_int = result.loc[:, ["amf_ra_matched", "amf_dec_matched", "z_matched", "rh_matched", "cat_matched", "INDEX_matched"]]
wh22_int = wh22_int.rename(columns={"amf_ra_matched": "amf_ra", "amf_dec_matched": "amf_dec", "z_matched": "z", "rh_matched": "rh", "cat_matched": "cat", "INDEX_matched": "INDEX"})
wh22_int

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,INDEX
0,2.71738,4.84389,0.8476,43.74,4,705670
1,4.32032,5.02819,0.7620,26.98,4,707510
2,9.93727,4.90089,0.7074,49.01,4,714693
3,15.41820,4.81137,0.8289,17.54,4,722243
4,20.68896,5.26780,0.7554,31.02,4,730234
...,...,...,...,...,...,...
16184,62.92653,-65.78198,0.6096,43.31,4,793229
16185,327.51297,-65.80315,0.6168,50.94,4,834401
16186,50.50767,-66.59171,0.7080,49.96,4,778092
16187,54.41912,-67.16213,0.7683,37.11,4,783128


In [33]:
des_int = des_int.drop_duplicates()
wh22_int = wh22_int.drop_duplicates()

In [34]:
des_int = des_int.iloc[wh22_int.index,:]
des_int

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,INDEX
0,2.728994,4.852877,0.833884,118.069723,1,46489
1,4.327177,5.021294,0.831561,87.429972,1,46492
2,9.944415,4.902296,0.762599,234.021091,1,46512
3,15.425087,4.823281,0.842940,70.928463,1,46535
4,20.700431,5.257738,0.751069,146.615125,1,46550
...,...,...,...,...,...,...
16184,62.918055,-65.790095,0.675214,166.303226,1,133413
16185,327.510240,-65.802663,0.612675,174.555625,1,133417
16186,50.486371,-66.608892,0.689917,114.049078,1,133419
16187,54.450779,-67.175674,0.755606,177.119563,1,133424


In [35]:
des_int = des_int.rename(columns={"INDEX": "u3index"})
wh22_int = wh22_int.rename(columns={"INDEX": "u3index"})
des_int

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,u3index
0,2.728994,4.852877,0.833884,118.069723,1,46489
1,4.327177,5.021294,0.831561,87.429972,1,46492
2,9.944415,4.902296,0.762599,234.021091,1,46512
3,15.425087,4.823281,0.842940,70.928463,1,46535
4,20.700431,5.257738,0.751069,146.615125,1,46550
...,...,...,...,...,...,...
16184,62.918055,-65.790095,0.675214,166.303226,1,133413
16185,327.510240,-65.802663,0.612675,174.555625,1,133417
16186,50.486371,-66.608892,0.689917,114.049078,1,133419
16187,54.450779,-67.175674,0.755606,177.119563,1,133424


In [36]:
# des_int.to_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_1.csv", index = False)
# wh22_int.to_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_4.csv", index = False)

### Old Method ###

In [90]:
# Test

new1 = pd.read_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_1.csv", skiprows = 0)
new2 = pd.read_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_4.csv", skiprows = 0)
un = pd.read_csv("data/union3.csv", skiprows = 0)

des = cut[cut.cat == 1]
des = des[(des.z > 0.6) & (des.z < 1)]
wh22 = cut[cut.cat == 4]
wh22 = wh22[(wh22.z > 0.6) & (wh22.z < 1)]


# This method is faulty (assumes no duplicate)

# cond_ra = des['amf_ra'].isin(new1['amf_ra'])
# cond_dec = des['amf_dec'].isin(new1['amf_dec'])
# cond_z = des['z'].isin(new1['z'])


# intersect1 = des[cond_ra & cond_dec & cond_z]
# des = des.drop(des[cond_ra & cond_dec & cond_z].index)

In [104]:
save2 = new2.iloc[new2.drop_duplicates().index, :]
save1 = new1.iloc[save2.index, :]
save1 = save1.reset_index(drop = True)
save2 = save2.reset_index(drop = True)

In [107]:
# save1.to_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_1.csv", index = False)
# save2.to_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_4.csv", index = False)

In [None]:
# This method is faulty (assumes no duplicate)

# cond_ra = wh22['amf_ra'].isin(new2['amf_ra'])
# cond_dec = wh22['amf_dec'].isin(new2['amf_dec'])
# cond_z = wh22['z'].isin(new2['z'])

# intersect2 = wh22[cond_ra & cond_dec & cond_z]
# wh22 = wh22.drop(wh22[cond_ra & cond_dec & cond_z].index)

In [None]:
intersect1

In [None]:
xor = pd.concat([des, wh22]).reset_index(drop=True)
xor

In [None]:
# xor.to_csv("output/Locations/Intersections/DES_WH22/des_wh22_xor.csv", index = False)

In [None]:
coords = des_wh22[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)
indices

In [None]:
# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5

# To get intersection, not XOR
des_wh22['Match'] = distances[:,1] <= 1e-2
des_wh22 = des_wh22.reset_index(drop = True)

dupes = pd.Series(indices[:,1])
dupes = dupes[dupes != len(des_wh22)]
dupes = dupes[dupes < len(des)]
print(dupes)

print(des_wh22)
# changes dupes to dupes.index to go back and forth
des_wh22 = des_wh22.drop(dupes.index, axis = "index")
des_wh22 = des_wh22[des_wh22["Match"]]

des_wh22

In [None]:
des_wh22 = des_wh22.reset_index(drop = True)
des_wh22 = des_wh22.drop("Match", axis = "columns")
# des_wh22.to_csv("output/Locations/des_wh22_AND_1.csv", index = False)
des_wh22

In [None]:
# Only check to make sure nothings in the intersection anymore

coords = des_wh22[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)
check = pd.Series(indices[:,1])
check = check[check != len(des_wh22)]
des_wh22

### Finding DES, WH22 and DESI Intersection ###

In [9]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'df' with columns 'x' and 'y'

cut = pd.read_csv("data/union3.csv", skiprows = 0)
des_wh22_1 = pd.read_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_1.csv", skiprows = 0)
des_wh22_4 = pd.read_csv("output/Locations/Intersections/DES_WH22/des_wh22_AND_4.csv", skiprows = 0)
desi = cut[cut.cat == 2]

desi = desi[(desi.z > 0.6) & (desi.z < 1)].reset_index(drop = True)
desi

Unnamed: 0,amf_ra,amf_dec,z,rh,cat
0,135.576658,84.282097,0.672224,71.125599,2
1,159.441977,84.470371,0.774722,131.296879,2
2,169.336282,84.745055,0.739466,186.789902,2
3,170.173343,84.497271,0.739765,123.796251,2
4,209.629132,84.575575,0.714163,61.195763,2
...,...,...,...,...,...
208343,65.460255,-66.866127,0.772272,90.605660,2
208344,65.332630,-66.820162,0.742248,75.869274,2
208345,66.479394,-67.250996,0.704453,94.599666,2
208346,67.524140,-66.458386,0.674120,69.780400,2


In [10]:
# First with des_wh22_1

import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have two DataFrames named 'dfval3' and 'catalogs' with columns 'amf_ra', 'amf_dec', and 'z'
coords1 = des_wh22_1[['amf_ra', 'amf_dec', "z"]].values
coords2 = desi[['amf_ra', 'amf_dec', "z"]].values

# Create k-d trees from the coordinates
tree1 = KDTree(coords1)
tree2 = KDTree(coords2)

# Query the second k-d tree with the coordinates of the first DataFrame
# Set the distance_upper_bound to your desired threshold (e.g., 0.0833)
distances1, indices1 = tree2.query(coords1, k=1, distance_upper_bound=0.0833)

# Check if there's any point in the second DataFrame within the distance threshold
des_wh22_1['Match'] = distances1 <= 0.0833

# Get the matched indices in the second DataFrame
matched_indices1 = indices1[distances1 <= 0.0833]

# Create a new DataFrame containing the matched rows from the second DataFrame
matched_catalogs1 = desi.iloc[matched_indices1]

# Merge the matched rows from the second DataFrame with the first DataFrame
result1 = des_wh22_1[des_wh22_1['Match']].reset_index(drop=True).join(matched_catalogs1.reset_index(drop=True), rsuffix='_matched')

In [11]:
result1

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,Match,amf_ra_matched,amf_dec_matched,z_matched,rh_matched,cat_matched
0,2.728994,4.852877,0.833884,118.069723,1,True,2.721607,4.852760,0.805465,143.960152,2
1,4.327177,5.021294,0.831561,87.429972,1,True,4.326650,5.020821,0.754092,119.686829,2
2,20.700431,5.257738,0.751069,146.615125,1,True,20.699110,5.257552,0.741717,171.301707,2
3,26.671792,4.966540,0.682545,116.932637,1,True,26.671772,4.970312,0.644349,132.362643,2
4,26.601518,4.943122,0.651660,134.416565,1,True,26.601518,4.943105,0.669574,150.861958,2
...,...,...,...,...,...,...,...,...,...,...,...
11978,59.616275,-66.277765,0.735681,187.867872,1,True,59.628661,-66.275150,0.715404,164.499426,2
11979,59.273860,-66.415007,0.755883,243.069950,1,True,59.281612,-66.409949,0.677330,272.032345,2
11980,62.918055,-65.790095,0.675214,166.303226,1,True,62.948170,-65.784144,0.639626,171.185616,2
11981,327.510240,-65.802663,0.612675,174.555625,1,True,327.522346,-65.807406,0.609801,191.053604,2


In [12]:
matched_catalogs1

Unnamed: 0,amf_ra,amf_dec,z,rh,cat
107112,2.721607,4.852760,0.805465,143.960152,2
105809,4.326650,5.020821,0.754092,119.686829,2
105929,20.699110,5.257552,0.741717,171.301707,2
105956,26.671772,4.970312,0.644349,132.362643,2
105955,26.601518,4.943105,0.669574,150.861958,2
...,...,...,...,...,...
208302,59.628661,-66.275150,0.715404,164.499426,2
208300,59.281612,-66.409949,0.677330,272.032345,2
208312,62.948170,-65.784144,0.639626,171.185616,2
208325,327.522346,-65.807406,0.609801,191.053604,2


In [13]:
# Next with des_wh22_4

import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have two DataFrames named 'dfval3' and 'catalogs' with columns 'amf_ra', 'amf_dec', and 'z'
coords1 = des_wh22_4[['amf_ra', 'amf_dec', "z"]].values
coords2 = desi[['amf_ra', 'amf_dec', "z"]].values

# Create k-d trees from the coordinates
tree1 = KDTree(coords1)
tree2 = KDTree(coords2)

# Query the second k-d tree with the coordinates of the first DataFrame
# Set the distance_upper_bound to your desired threshold (e.g., 0.0833)
distances4, indices4 = tree2.query(coords1, k=1, distance_upper_bound=0.0833)

# Check if there's any point in the second DataFrame within the distance threshold
des_wh22_4['Match'] = distances4 <= 0.0833

# Get the matched indices in the second DataFrame
matched_indices4 = indices4[distances4 <= 0.0833]

# Create a new DataFrame containing the matched rows from the second DataFrame
matched_catalogs4 = desi.iloc[matched_indices4]

# Merge the matched rows from the second DataFrame with the first DataFrame
result4 = des_wh22_4[des_wh22_4['Match']].reset_index(drop=True).join(matched_catalogs4.reset_index(drop=True), rsuffix='_matched')

In [14]:
result4

Unnamed: 0,amf_ra,amf_dec,z,rh,cat,Match,amf_ra_matched,amf_dec_matched,z_matched,rh_matched,cat_matched
0,2.71738,4.84389,0.8476,43.74,4,True,2.721607,4.852760,0.805465,143.960152,2
1,4.32032,5.02819,0.7620,26.98,4,True,4.326650,5.020821,0.754092,119.686829,2
2,9.93727,4.90089,0.7074,49.01,4,True,9.900494,4.915872,0.652483,148.372279,2
3,20.68896,5.26780,0.7554,31.02,4,True,20.699110,5.257552,0.741717,171.301707,2
4,26.60727,4.93842,0.6555,34.35,4,True,26.601518,4.943105,0.669574,150.861958,2
...,...,...,...,...,...,...,...,...,...,...,...
12239,59.62865,-66.27515,0.7234,39.46,4,True,59.628661,-66.275150,0.715404,164.499426,2
12240,59.26735,-66.41239,0.7050,66.26,4,True,59.281612,-66.409949,0.677330,272.032345,2
12241,62.92653,-65.78198,0.6096,43.31,4,True,62.948170,-65.784144,0.639626,171.185616,2
12242,327.51297,-65.80315,0.6168,50.94,4,True,327.522346,-65.807406,0.609801,191.053604,2


In [80]:
desi_4 = result4.iloc[:, [6,7,8,9,10]].copy().drop_duplicates()
desi_1 = result1.iloc[:, [6,7,8,9,10]].copy().drop_duplicates()
desi_4

Unnamed: 0,amf_ra_matched,amf_dec_matched,z_matched,rh_matched,cat_matched
0,2.721607,4.852760,0.805465,143.960152,2
1,4.326650,5.020821,0.754092,119.686829,2
2,9.900494,4.915872,0.652483,148.372279,2
3,20.699110,5.257552,0.741717,171.301707,2
4,26.601518,4.943105,0.669574,150.861958,2
...,...,...,...,...,...
12238,59.674976,-66.459469,0.771601,168.480632,2
12239,59.628661,-66.275150,0.715404,164.499426,2
12240,59.281612,-66.409949,0.677330,272.032345,2
12241,62.948170,-65.784144,0.639626,171.185616,2


In [81]:
# desi_4["copyIndex"] = desi_4.index
desi_1["copyIndex"] = desi_1.index
all_matched = desi_1.merge(desi_4, how = "inner").drop_duplicates()
# all_matched
# wh22_all_matched = result4.iloc[all_matched["copyIndex"], [0, 1, 2, 3, 4]]
# wh22_all_matched
des_all_matched = result1.iloc[all_matched["copyIndex"], [0, 1, 2, 3, 4]]
des_all_matched

Unnamed: 0,amf_ra,amf_dec,z,rh,cat
0,2.728994,4.852877,0.833884,118.069723,1
1,4.327177,5.021294,0.831561,87.429972,1
2,20.700431,5.257738,0.751069,146.615125,1
4,26.601518,4.943122,0.651660,134.416565,1
6,27.913383,5.134172,0.715573,116.080368,1
...,...,...,...,...,...
11978,59.616275,-66.277765,0.735681,187.867872,1
11979,59.273860,-66.415007,0.755883,243.069950,1
11980,62.918055,-65.790095,0.675214,166.303226,1
11981,327.510240,-65.802663,0.612675,174.555625,1


In [83]:
all_matched = all_matched.drop("copyIndex", 1)
all_matched

  all_matched = all_matched.drop("copyIndex", 1)


Unnamed: 0,amf_ra_matched,amf_dec_matched,z_matched,rh_matched,cat_matched
0,2.721607,4.852760,0.805465,143.960152,2
1,4.326650,5.020821,0.754092,119.686829,2
2,20.699110,5.257552,0.741717,171.301707,2
3,26.601518,4.943105,0.669574,150.861958,2
4,27.915005,5.130558,0.673122,152.592179,2
...,...,...,...,...,...
10810,59.628661,-66.275150,0.715404,164.499426,2
10811,59.281612,-66.409949,0.677330,272.032345,2
10812,62.948170,-65.784144,0.639626,171.185616,2
10813,327.522346,-65.807406,0.609801,191.053604,2


In [84]:
all_matched = all_matched.rename(columns={"amf_ra_matched": "amf_ra", "amf_dec_matched": "amf_dec", "z_matched": "z", "rh_matched": "rh", "cat_matched": "cat"})
all_matched

Unnamed: 0,amf_ra,amf_dec,z,rh,cat
0,2.721607,4.852760,0.805465,143.960152,2
1,4.326650,5.020821,0.754092,119.686829,2
2,20.699110,5.257552,0.741717,171.301707,2
3,26.601518,4.943105,0.669574,150.861958,2
4,27.915005,5.130558,0.673122,152.592179,2
...,...,...,...,...,...
10810,59.628661,-66.275150,0.715404,164.499426,2
10811,59.281612,-66.409949,0.677330,272.032345,2
10812,62.948170,-65.784144,0.639626,171.185616,2
10813,327.522346,-65.807406,0.609801,191.053604,2


In [88]:
all_matched

Unnamed: 0,amf_ra,amf_dec,z,rh,cat
0,2.721607,4.852760,0.805465,143.960152,2
1,4.326650,5.020821,0.754092,119.686829,2
2,20.699110,5.257552,0.741717,171.301707,2
3,26.601518,4.943105,0.669574,150.861958,2
4,27.915005,5.130558,0.673122,152.592179,2
...,...,...,...,...,...
10810,59.628661,-66.275150,0.715404,164.499426,2
10811,59.281612,-66.409949,0.677330,272.032345,2
10812,62.948170,-65.784144,0.639626,171.185616,2
10813,327.522346,-65.807406,0.609801,191.053604,2


In [89]:
# des_all_matched.to_csv("output/Locations/Intersections/DES_WH22_DESI/des_wh22_desi_AND_1.csv", index = False)
# wh22_all_matched.to_csv("output/Locations/Intersections/DES_WH22_DESI/des_wh22_desi_AND_4.csv", index = False)
# all_matched.to_csv("output/Locations/Intersections/DES_WH22_DESI/des_wh22_desi_AND_2.csv", index = False)

### Old Method ###

In [31]:
# ra_match = matched_catalogs4["amf_ra"].isin(matched_catalogs1["amf_ra"])
# dec_match = matched_catalogs4["amf_dec"].isin(matched_catalogs1["amf_dec"])
# z_match = matched_catalogs4["z"].isin(matched_catalogs1["z"])
# rh_match = matched_catalogs4["rh"].isin(matched_catalogs1["rh"])

# all_matched = matched_catalogs4[ra_match & dec_match & z_match & rh_match]
# all_matched

Unnamed: 0,amf_ra,amf_dec,z,rh,cat
107112,2.721607,4.852760,0.805465,143.960152,2
105809,4.326650,5.020821,0.754092,119.686829,2
105929,20.699110,5.257552,0.741717,171.301707,2
105955,26.601518,4.943105,0.669574,150.861958,2
105955,26.601518,4.943105,0.669574,150.861958,2
...,...,...,...,...,...
208302,59.628661,-66.275150,0.715404,164.499426,2
208300,59.281612,-66.409949,0.677330,272.032345,2
208312,62.948170,-65.784144,0.639626,171.185616,2
208325,327.522346,-65.807406,0.609801,191.053604,2


In [None]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Check for intersection of above, AND DESI as well.

cut = pd.read_csv("data/union3.csv", skiprows = 0)

des = cut[cut.cat == 1]
desi = cut[cut.cat == 2]
des_wh22 = pd.read_csv("output/Locations/des_wh22_AND_1.csv", skiprows = 0)

# If in the redshift range needed (I'm making it generalized for now, so running without it):

# desi = desi[(desi.z > 0.6) & (desi.z < 1)].reset_index(drop = True)
# des_wh22 = des_wh22[(des_wh22.z > 0.6) & (des_wh22.z < 1)].reset_index(drop = True)


# Remove duplicates in DESI (also saved below)

desi_coords = desi[['amf_ra', 'amf_dec','z']].values
desi_tree = KDTree(desi_coords)
desi_distances, desi_indices = desi_tree.query(desi_coords, k=2, distance_upper_bound=1e-2)

desi_dupes = pd.Series(desi_indices[:,1], index = desi.index)
desi_dupes = desi_dupes[desi_dupes != len(desi)]

desi = desi.drop(desi_dupes.index, axis = "index")

des_wh22_desi = pd.concat([des_wh22, desi])
des_wh22_desi

In [None]:
# Now find intersection of DES, WH22, and DESI

coords = des_wh22_desi[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)
indices

In [None]:
# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5

# To get intersection, not XOR
des_wh22_desi['Match'] = distances[:,1] <= 1e-2

dupes = pd.Series(indices[:,1], index = des_wh22_desi.index)
dupes = dupes[dupes != len(des_wh22_desi)]
dupes = dupes[dupes < len(des_wh22)]
print(dupes)

print(des_wh22_desi)
des_wh22_desi = des_wh22_desi.drop(dupes, axis = "index")
des_wh22_desi = des_wh22_desi[des_wh22_desi["Match"]]

test = des_wh22_desi
des_wh22_desi

In [None]:
modified = test.drop("Match", axis = "columns").reset_index()
total = pd.read_csv("output/Locations/des_wh22_desi_AND_2.csv", skiprows = 0)
# modified[modified.cat != 2]
print(total)
print(modified)
result = modified.merge(total, how = "inner")
result

indic = result["index"]
indic

In [None]:
jam = dupes.loc[indic]
jam

In [None]:
result = test1.merge(test2, how = "inner", left_index=True, right_index=True)
result

In [None]:
des_wh22_desi = des_wh22_desi.reset_index(drop = True)
# des_wh22_desi = des_wh22_desi.drop("Match", axis = "columns")
# des_wh22_desi.to_csv("output/Locations/des_wh22_desi_AND_2.csv", index = False)
des_wh22_desi

#### Checking DES, WH22, DESI for duplicates ####

In [None]:
# Remove duplicates in DES

des_coords = des[['amf_ra', 'amf_dec','z']].values
des_tree = KDTree(des_coords)
des_distances, des_indices = des_tree.query(des_coords, k=2, distance_upper_bound=1e-2)

des['Match'] = des_distances[:,1] <= 1e-2
des = des.reset_index(drop = True)

des_dupes = pd.Series(des_indices[:,1])
des_dupes = des_dupes[des_dupes != len(des)]

des = des.drop(des_dupes.index, axis = "index")
des

# END RESULT --> there are no duplicates in DES.

In [None]:
# Remove duplicates in WH22

wh22_coords = wh22[['amf_ra', 'amf_dec','z']].values
wh22_tree = KDTree(wh22_coords)
wh22_distances, wh22_indices = wh22_tree.query(wh22_coords, k=2, distance_upper_bound=1e-2)

wh22['Match'] = wh22_distances[:,1] <= 1e-2
wh22 = wh22.reset_index(drop = True)

wh22_dupes = pd.Series(wh22_indices[:,1])
wh22_dupes = wh22_dupes[wh22_dupes != len(wh22)]

wh22 = wh22.drop(wh22_dupes.index, axis = "index")
wh22

# END RESULT --> there are no duplicates in WH22.

In [None]:
# Remove duplicates in DESI

desi_coords = desi[['amf_ra', 'amf_dec','z']].values
desi_tree = KDTree(desi_coords)
desi_distances, desi_indices = desi_tree.query(desi_coords, k=2, distance_upper_bound=1e-2)

desi['Match'] = desi_distances[:,1] <= 1e-2
desi = desi.reset_index(drop = True)

desi_dupes = pd.Series(desi_indices[:,1])
desi_dupes = desi_dupes[desi_dupes != len(desi)]

desi = desi.drop(desi_dupes.index, axis = "index")
desi

# END RESULT --> there are SOME duplicates in DESI.

## Universal Reference ##

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'df' with columns 'x' and 'y'
df = union.copy()
coords = df[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)

# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5
df['Match'] = distances[:, 1] <= 1e-2

In [None]:
df[df['Match']].round(1).drop_duplicates(subset=['amf_ra','amf_dec','z'])

In [None]:
deswh = union[(union.cat.isin([1,4]))]

import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'deswh' with columns 'x' and 'y'
coords = deswh[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)

# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5
deswh['Match'] = distances[:, 1] <= 1e-2

In [None]:
deswh[deswh['Match']].round(1).drop_duplicates(subset=['amf_ra','amf_dec','z'])

In [None]:
hscwh = union[(union.cat.isin([3,4]))]

import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# Assuming you already have a DataFrame named 'hscwh' with columns 'x' and 'y'
coords = hscwh[['amf_ra', 'amf_dec','z']].values

# Create a k-d tree from the coordinates
tree = KDTree(coords)

# Query the k-d tree for nearest neighbors within 0.5 distance
# Note that each point will find itself as the nearest neighbor with distance 0
distances, indices = tree.query(coords, k=2, distance_upper_bound=1e-2)

# Check if there's any point within 0.5 distance for each row, excluding the point itself (hence k=2)
# We can do this by checking if the second smallest distance is <= 0.5
hscwh['Match'] = distances[:, 1] <= 1e-2

In [None]:
hscwh[hscwh['Match']].round(1).drop_duplicates(subset=['amf_ra','amf_dec','z'])