In [1]:
import os
from tqdm import tqdm_notebook

In [2]:
def load_PSZcatalog():
    from astropy.table import Table                                                       
    from numpy import append as npappend                                             

    datapath = './../planckClusters/catalogs/'
    
    ps1 = Table.read(f'{datapath}/PSZ1v2.1.fits')
    ps2 = Table.read(f'{datapath}/PSZ2v1.fits')

    # convert to pandas
    df1 = ps1.to_pandas()
    df2 = ps2.to_pandas()

    # clean up strings -- not required
    df1 = df1.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    df2 = df2.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

    # merge the catalogs together
    df_m = df1.merge(df2, how='outer', left_on='INDEX', right_on='PSZ', suffixes=('_PSZ1', '_PSZ2'))
    
    # get the columns that we want
    cols = df_m.columns[[0, 1, 4, 5, 8, 29, 33, 34, 37, 38, 40, 51]]
    df_final = df_m[cols]

    # remerge to find bits that were missing                                        
    df_final_bigger = df_final.merge(df2, how='left', left_on='INDEX_PSZ1',         
                                 right_on='PSZ')
    # fill in nans                                                                  
    for col in ['NAME', 'RA', 'DEC', 'SNR', 'REDSHIFT', 'INDEX']:                   
        df_final_bigger[col+'_PSZ2'] = df_final_bigger[col+'_PSZ2'].fillna(df_final_bigger[col])
    # fill in nans                                                                  
    for col in ['NAME', 'RA', 'DEC', 'SNR', 'REDSHIFT', 'INDEX']:
        df_final_bigger[col+'_PSZ2'] = df_final_bigger[col+'_PSZ2'].fillna(df_final_bigger[col])
    for col in ['NAME', 'RA', 'DEC']:
        df_final_bigger[col] = df_final_bigger[col+'_PSZ2'].fillna(df_final_bigger[col+'_PSZ1'])

    df_final_bigger = df_final_bigger[npappend(df_final_bigger.columns[:12].values, ['NAME', 'RA', 'DEC'])]

    return df_final_bigger

In [3]:
data = load_PSZcatalog()
PS1_dir = f'{os.environ["HOME"]}/Projects/planckClusters/data/extern/PS1'
SDSS_dir = f'{os.environ["HOME"]}/Projects/planckClusters/data/extern/SDSS'
outpath = './data_full'

for name in tqdm_notebook(data['NAME'], total=len(data['NAME'])):
    name = name.replace(' ', '_')

    if not os.path.isdir(f'{PS1_dir}/{name}'):
        continue
    
    #name = ''.join(e for e in name if e.isalnum())
    if not os.path.isdir(f'{outpath}/{name}'):
        os.makedirs(f'{outpath}/{name}')

    relpath = os.path.relpath(f'{PS1_dir}/{name}', f'{outpath}/{name}')

    target_files = ['_PS1stack_g.fits', '_PS1stack_r.fits', '_PS1stack_i.fits',
                    '_PS1stack_z.fits', '_PS1stack_y.fits', '_PS1stack_irg.tiff']

    for file in target_files:
        try:
            os.symlink(f'{PS1_dir}/{name}/{name}{file}',
                       f'{outpath}/{name}/{name}{file}')
        except FileExistsError:
            pass
        

for name in tqdm_notebook(data['NAME'], total=len(data['NAME'])):
    name = name.replace(' ', '_')

    if not os.path.isdir(f'{SDSS_dir}/{name}'):
        continue
    
    #name = ''.join(e for e in name if e.isalnum())
    if not os.path.isdir(f'{outpath}/{name}'):
        os.makedirs(f'{outpath}/{name}')

    relpath = os.path.relpath(f'{SDSS_dir}/{name}', f'{outpath}/{name}')

    target_files = ['_SDSSstack_g.fits', '_SDSSstack_r.fits', '_SDSSstack_i.fits',
                    '_SDSSstack_z.fits', '_SDSSstack_irg.tiff']

    for file in target_files:
        try:
            os.symlink(f'{SDSS_dir}/{name}/{name}{file}',
                       f'{outpath}/{name}/{name}{file}')
        except FileExistsError:
            pass



HBox(children=(IntProgress(value=0, max=1943), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1943), HTML(value='')))


