### File to evaluate difference in utilising isLRG directly and the full DESI pipeline

##### Existing Pipeline

In [2]:
from astropy.io import fits
import os
import wget
import numpy as np
import time
import pandas as pd
from brick import Brick
import telegram_send


In [51]:

""" File to download, process, classify and delete galaxies from DR9 all in one"""

""" Defining area to download, how many bricks to download in one session and which storage to use (Astrodisk is the name of a hardrive)"""
area = 'south'
device = 'Astrostick'
bricks_to_classify = 30000
south_survey_is_south = True

## ToDo: Create special folder in astrodisk with dedicated bricks
hdulistBricksSouthSummary = fits.open('../../bricks_data/survey-bricks-dr9-south.fits')
data_south = hdulistBricksSouthSummary[1].data
brickname_south = data_south.field('brickname')
brickid_south = data_south.field('brickid')
south_survey_is_south = data_south.field('survey_primary')

In [55]:
# Everything is times in order to measure when the pipeline slows down
start = time.time()

print()
print(f"=============================== Process {area} ..... ==================================")
print()

bricks_name = []
bricks_path = []

# Getting already downloaded files from the Harddrive:


for filename in os.listdir(f'/Volumes/{device}/bricks_data/{area}/'):
    brickn = filename.replace("tractor-", "")
    brickn = brickn.replace(".fits", "")
    bricks_path.append(filename)
    bricks_name.append(brickn)

bricks_name.pop()
bricks_path.pop()
"""for filename in os.listdir(f'../../bricks_data/tractor/'):
    if '.fits' not in filename:
        continue
    brickn = filename.replace("tractor-", "")
    brickn = brickn.replace(".fits", "")
    bricks_path.append(filename)
    bricks_name.append(brickn)"""

# Define empty Dataframes that will hold the information on stars and galaxies
df_galaxy = pd.DataFrame(columns=['BrickID', 'RA', 'DEC', 'LRG', 'ELG', 'QSO'])
df_stars = pd.DataFrame(columns=['RA', 'DEC', 'GMAG', 'RMAG', 'ZMAG'])

# Prints information on the  current session e.g. how many bricks are left --> all the code until here takes a few minutes to complete
print(f"No of bricks to classify in {area}: {len(bricks_name)} ")
print("Time taken for bricks left extraction: ", round(((time.time() - start) / 60), 2))

# There have been problems with very few bricks that were not found on the servers, this code is only to avoid the script from crashing here
c = 0
problem_bricks = []
inter = time.time()

# This is the actual loop doing the classification for the bricks that are missing from the catalogue:

for i, brickname in enumerate(bricks_name):

    # Download Brick

    brickid = brickid_south[np.where(brickname_south == brickname)]

    # North Bricks
    # brickid = brickid_north[np.where(brickname_north == brickname)]

    if len(brickid > 0):
        brickid = brickid[0]
    else:
        brickid = 0

    # Open Brick

    #hdu = fits.open(f'../../bricks_data/tractor/tractor-{brickname}.fits')
    hdu = fits.open(f'/Volumes/{device}/bricks_data/{area}/tractor-{brickname}.fits')
    data = hdu[1].data

    # Define the Brick Object  --> in brick.py
    brick = Brick(data)

    # south = north_survey_is_south[np.where(brickid_north == brickid)]

    south = south_survey_is_south[np.where(brickid_south == brickid)]
    if len(south) > 0:
        south = south[0]
    else:
        south = True

    ## Enable this is classifying North Objects
    # south = north_survey_is_south[np.where(brickid_north == brickid)][0]

    # Initialise Brick Object
    brick.initialise_brick_for_galaxy_classification(south)

    # Classify Brick objects into categories --> takes under 1 second after optimisation
    target_objects = brick.classify_galaxies()

    # Appending one empty line per brick to be sure that all bricks are extracted
    df_galaxy = df_galaxy.append({'BrickID': brickid, 'RA': np.nan, 'DEC': np.nan, 'LRG': 0, 'ELG': 0, 'QSO': 0},
                                 ignore_index=True)

    support_df = pd.DataFrame(target_objects,
                              columns=['BrickID', 'RA', 'DEC', 'LRG', 'ELG', 'QSO'])

    df_galaxy = df_galaxy.append(support_df)

    # Repeat steps for stellar objects

    brick.initialise_brick_for_stellar_density()

    #stars = brick.get_stellar_objects()

    #support_df = pd.DataFrame(stars, columns=['RA', 'DEC', 'GMAG', 'RMAG', 'ZMAG'])
    #df_stars = df_stars.append(support_df)

    # Every 100 objects, the newly classified objects are added to the existing catalogue to avoid massive reruns when the script crashes

    """if i % 100 == 0:
        print()
        print(i / (bricks_to_classify / 100), '%')
        df_galaxy = df_galaxy.astype(
            {'BrickID': 'int32', 'LRG': 'int8', 'ELG': 'int8', 'QSO': 'int8'})
        df_galaxy.to_csv(f'../../bricks_data/galaxy_catalogue_{area}.csv', mode='a', index=False, header=False)
        df_stars.to_csv(f'../../bricks_data/stellar_catalogue_{area}.csv', mode='a', index=False, header=False)
        # df_galaxy.to_csv('../../bricks_data/galaxy_catalogue_sample_profiling.csv', index=False, header=False)
        # df_stars.to_csv('../../bricks_data/stellar_catalogue_sample_profiling.csv', index=False, header=False)
        df_galaxy = df_galaxy[0:0]
        df_stars = df_stars[0:0]"""

    # This script used to send me updates to my phone using a Telegram Bot, so i knew when it crashed or it was completed

    # Remove Downloaded Brick
    # os.remove(f'/Volumes/{device}/bricks_data/{area}/tractor-{brickname}.fits')

    # Stop the loop when the defined number of bricks was classified, if this number is greater than remaining bricks, script will run till all bricks are finished
    if i > bricks_to_classify:
        break

    if i % 100 == 0:
        print(f" Brick {area} processed: ", brickname, ", Brick ", i, " of ", bricks_to_classify)

df_galaxy = df_galaxy.astype(
    {'BrickID': 'int32', 'LRG': 'int8', 'ELG': 'int8', 'QSO': 'int8'})
#df_galaxy.to_csv(f'../../bricks_data/galaxy_catalogue_{area}.csv', mode='a', index=False, header=False)
#df_stars.to_csv(f'../../bricks_data/stellar_catalogue_{area}.csv', mode='a', index=False, header=False)
#df_galaxy = df_galaxy[0:0]
#df_stars = df_stars[0:0]
print()
print(f"=============================== Download {area} completed ==================================")
print()

# Prints session statistics upon completion
print("Minutes taken for: ", i, " bricks: ", round(((time.time() - start) / 60), 2))
print("Hours taken for: ", i, " bricks: ", round(((time.time() - start) / 3600), 2))
# message = f'++++++ Finished {bricks_to_classify} bricks. Avg. Bandwidths: {round(((time.time() - start) / bricks_to_classify), 2)} seconds per brick ++++++'





No of bricks to classify in south: 1051 
Time taken for bricks left extraction:  0.0
 Brick south processed:  2443p257 , Brick  0  of  30000
 Brick south processed:  0531m550 , Brick  100  of  30000
 Brick south processed:  2133m022 , Brick  200  of  30000
 Brick south processed:  0813m195 , Brick  300  of  30000
 Brick south processed:  0254p060 , Brick  400  of  30000
 Brick south processed:  3142p100 , Brick  500  of  30000
 Brick south processed:  0171p265 , Brick  600  of  30000
 Brick south processed:  1295p062 , Brick  700  of  30000
 Brick south processed:  0405p090 , Brick  800  of  30000
 Brick south processed:  0346m290 , Brick  900  of  30000
 Brick south processed:  3500p065 , Brick  1000  of  30000


Minutes taken for:  1050  bricks:  15.77
Hours taken for:  1050  bricks:  0.26


0046m020


In [57]:

df_galaxy.dropna(inplace=True)

LRG = df_galaxy[df_galaxy.LRG == 1]
print("LRG:", len(LRG))

ELG = df_galaxy[df_galaxy.ELG == 1]
print("ELG:", len(ELG))

QSO = df_galaxy[df_galaxy.QSO == 1]
print("QSO:", len(QSO))

print(len(df_galaxy))
print(len(LRG) + len(ELG) + len(QSO))

print(len(df_galaxy) - (len(LRG) + len(ELG) + len(QSO)))

LRG: 41081
ELG: 154533
QSO: 195119
372459
390733
-18274


### Utilising DesiHub Pipeline

In [6]:
from desitarget.cuts import select_targets

filenames = []

#path = f'/Volumes/{device}/bricks_data/{area}/'
path = '../../bricks_data/tractor/'

for filename in os.listdir(path):
    if '.fits' not in filename:
        continue
    filenames.append(f'{path}/{filename}')
print(len(filenames))


2


In [8]:
res = select_targets(
    infiles=filenames, numproc=1, qso_selection='colorcuts', nside=None, gaiasub=False,
    tcnames=['LRG', 'ELG', 'QSO'], backup=False)


INFO:cuts.py:2942:select_targets: Running on the main survey
111


In [15]:

cols = [('RELEASE', '>i2'), ('BRICKID', '>i4'), ('BRICKNAME', 'S8'), ('BRICK_OBJID', '>i4'), ('MORPHTYPE', 'S4'), ('RA', '>f8'), ('RA_IVAR', '>f4'), ('DEC', '>f8'), ('DEC_IVAR', '>f4'), ('DCHISQ', '>f4', (5,)), ('EBV', '>f4'), ('FLUX_G', '>f4'), ('FLUX_R', '>f4'), ('FLUX_Z', '>f4'), ('FLUX_IVAR_G', '>f4'), ('FLUX_IVAR_R', '>f4'), ('FLUX_IVAR_Z', '>f4'), ('MW_TRANSMISSION_G', '>f4'), ('MW_TRANSMISSION_R', '>f4'), ('MW_TRANSMISSION_Z', '>f4'), ('FRACFLUX_G', '>f4'), ('FRACFLUX_R', '>f4'), ('FRACFLUX_Z', '>f4'), ('FRACMASKED_G', '>f4'), ('FRACMASKED_R', '>f4'), ('FRACMASKED_Z', '>f4'), ('FRACIN_G', '>f4'), ('FRACIN_R', '>f4'), ('FRACIN_Z', '>f4'), ('NOBS_G', '>i2'), ('NOBS_R', '>i2'), ('NOBS_Z', '>i2'), ('PSFDEPTH_G', '>f4'), ('PSFDEPTH_R', '>f4'), ('PSFDEPTH_Z', '>f4'), ('GALDEPTH_G', '>f4'), ('GALDEPTH_R', '>f4'), ('GALDEPTH_Z', '>f4'), ('FLUX_W1', '>f4'), ('FLUX_W2', '>f4'), ('FLUX_W3', '>f4'), ('FLUX_W4', '>f4'), ('FLUX_IVAR_W1', '>f4'), ('FLUX_IVAR_W2', '>f4'), ('FLUX_IVAR_W3', '>f4'), ('FLUX_IVAR_W4', '>f4'), ('MW_TRANSMISSION_W1', '>f4'), ('MW_TRANSMISSION_W2', '>f4'), ('MW_TRANSMISSION_W3', '>f4'), ('MW_TRANSMISSION_W4', '>f4'), ('ALLMASK_G', '>i2'), ('ALLMASK_R', '>i2'), ('ALLMASK_Z', '>i2'), ('FIBERFLUX_G', '>f4'), ('FIBERFLUX_R', '>f4'), ('FIBERFLUX_Z', '>f4'), ('FIBERTOTFLUX_G', '>f4'), ('FIBERTOTFLUX_R', '>f4'), ('FIBERTOTFLUX_Z', '>f4'), ('REF_EPOCH', '>f4'), ('WISEMASK_W1', 'u1'), ('WISEMASK_W2', 'u1'), ('MASKBITS', '>i2'), ('LC_FLUX_W1', '>f4', (15,)), ('LC_FLUX_W2', '>f4', (15,)), ('LC_FLUX_IVAR_W1', '>f4', (15,)), ('LC_FLUX_IVAR_W2', '>f4', (15,)), ('LC_NOBS_W1', '>i2', (15,)), ('LC_NOBS_W2', '>i2', (15,)), ('LC_MJD_W1', '>f8', (15,)), ('LC_MJD_W2', '>f8', (15,)), ('SHAPE_R', '>f4'), ('SHAPE_E1', '>f4'), ('SHAPE_E2', '>f4'), ('SHAPE_R_IVAR', '>f4'), ('SHAPE_E1_IVAR', '>f4'), ('SHAPE_E2_IVAR', '>f4'), ('SERSIC', '>f4'), ('SERSIC_IVAR', '>f4'), ('REF_ID', '>i8'), ('REF_CAT', 'S2'), ('GAIA_PHOT_G_MEAN_MAG', '>f4'), ('GAIA_PHOT_G_MEAN_FLUX_OVER_ERROR', '>f4'), ('GAIA_PHOT_BP_MEAN_MAG', '>f4'), ('GAIA_PHOT_BP_MEAN_FLUX_OVER_ERROR', '>f4'), ('GAIA_PHOT_RP_MEAN_MAG', '>f4'), ('GAIA_PHOT_RP_MEAN_FLUX_OVER_ERROR', '>f4'), ('GAIA_PHOT_BP_RP_EXCESS_FACTOR', '>f4'), ('GAIA_ASTROMETRIC_EXCESS_NOISE', '>f4'), ('GAIA_DUPLICATED_SOURCE', '?'), ('GAIA_ASTROMETRIC_SIGMA5D_MAX', '>f4'), ('GAIA_ASTROMETRIC_PARAMS_SOLVED', 'i1'), ('PARALLAX', '>f4'), ('PARALLAX_IVAR', '>f4'), ('PMRA', '>f4'), ('PMRA_IVAR', '>f4'), ('PMDEC', '>f4'), ('PMDEC_IVAR', '>f4'), ('PHOTSYS', '<U1'), ('TARGETID', '>i8'), ('DESI_TARGET', '>i8'), ('BGS_TARGET', '>i8'), ('MWS_TARGET', '>i8'), ('SUBPRIORITY', '>f8'), ('OBSCONDITIONS', '>i8'), ('PRIORITY_INIT_DARK', '>i8'), ('NUMOBS_INIT_DARK', '>i8'), ('PRIORITY_INIT_BRIGHT', '>i8'), ('NUMOBS_INIT_BRIGHT', '>i8'), ('PRIORITY_INIT_BACKUP', '>i8'), ('NUMOBS_INIT_BACKUP', '>i8')]

print(len(cols))


111


In [29]:
from desiutil.bitmask import BitMask
from desitarget.targetmask import load_mask_bits
_bitdefs = load_mask_bits()

desi_mask = BitMask('desi_mask', _bitdefs)

targets = set()
for j in range(len(res)):
    for i in range(len(res[0])):
        #print(f'{cols[i][0]}: {res[0][i]}')


        if cols[i][0] == 'DESI_TARGET':
            print(f'{cols[i][0]}: {res[j][i]}')

            targets.add(res[j][i])


print(targets)

DESI_TARGET: 65537
DESI_TARGET: 655394
DESI_TARGET: 1179778
DESI_TARGET: 65537
DESI_TARGET: 655394
DESI_TARGET: 65537
DESI_TARGET: 65537
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 65537
DESI_TARGET: 262148
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 1179778
DESI_TARGET: 65537
DESI_TARGET: 65537
DESI_TARGET: 655458
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 262148
DESI_TARGET: 655394
DESI_TARGET: 655458
DESI_TARGET: 1179778
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 65537
DESI_TARGET: 655394
DESI_TARGET: 1179778
DESI_TARGET: 65537
DESI_TARGET: 1179778
DESI_TARGET: 655394
DESI_TARGET: 65537
DESI_TARGET: 1179778
DESI_TARGET: 655394
DESI_TARGET: 65537
DESI_TARGET: 65537
DESI_TARGET: 65537
DESI_TARGET: 655394
DESI_TARGET: 262148
DESI_TARGET: 655458
DESI_TARGET: 655394
DESI_TARGET: 655394
DESI_TARGET: 655458
DESI_TARGET: 65537
DESI_TARGET: 655394
DESI_TAR

#### Adapt to extract:

1. Different columns needed for redshift analysis
2. Separate LRG, ELG, QSO and Laymen Break Dropouts
3. Parallelise Runs


In [None]:
start = time.time()

elg = select_targets(
    infiles=filenames, numproc=1, qso_selection='colorcuts', nside=None, gaiasub=False,
    tcnames=['ELG'], backup=False
)

print("Minutes taken for: ", i, " bricks: ", round(((time.time() - start) / 60), 2))
print("Hours taken for: ", i, " bricks: ", round(((time.time() - start) / 3600), 2))



In [64]:
start = time.time()

qso = select_targets(
    infiles=filenames, numproc=1, qso_selection='colorcuts', nside=None, gaiasub=False,
    tcnames=['QSO'], backup=False)

print("Minutes taken for: ", i, " bricks: ", round(((time.time() - start) / 60), 2))
print("Hours taken for: ", i, " bricks: ", round(((time.time() - start) / 3600), 2))



INFO:cuts.py:2942:select_targets: Running on the main survey
INFO:cuts.py:3059:_update_status: 20/1051 files; 0.9 secs/file; 0.3 total mins elapsed
INFO:cuts.py:3059:_update_status: 40/1051 files; 0.9 secs/file; 0.6 total mins elapsed
INFO:cuts.py:3059:_update_status: 60/1051 files; 0.9 secs/file; 0.9 total mins elapsed
INFO:cuts.py:3059:_update_status: 80/1051 files; 0.9 secs/file; 1.2 total mins elapsed
INFO:cuts.py:3059:_update_status: 100/1051 files; 0.9 secs/file; 1.5 total mins elapsed
INFO:cuts.py:3059:_update_status: 120/1051 files; 0.9 secs/file; 1.8 total mins elapsed
INFO:cuts.py:3059:_update_status: 140/1051 files; 0.9 secs/file; 2.1 total mins elapsed
INFO:cuts.py:3059:_update_status: 160/1051 files; 0.9 secs/file; 2.4 total mins elapsed
INFO:cuts.py:3059:_update_status: 180/1051 files; 0.9 secs/file; 2.7 total mins elapsed
INFO:cuts.py:3059:_update_status: 200/1051 files; 0.9 secs/file; 3.0 total mins elapsed
INFO:cuts.py:3059:_update_status: 220/1051 files; 0.9 secs/file

In [66]:
start = time.time()

qso_rf = select_targets(
    infiles=filenames, numproc=1, qso_selection='randomforest', nside=None, gaiasub=False,
    tcnames=['QSO'], backup=False)

print("Minutes taken for: ", i, " bricks: ", round(((time.time() - start) / 60), 2))
print("Hours taken for: ", i, " bricks: ", round(((time.time() - start) / 3600), 2))


INFO:cuts.py:2942:select_targets: Running on the main survey
INFO:cuts.py:3059:_update_status: 20/1051 files; 5.8 secs/file; 1.9 total mins elapsed
INFO:cuts.py:3059:_update_status: 40/1051 files; 5.8 secs/file; 3.8 total mins elapsed
INFO:cuts.py:3059:_update_status: 60/1051 files; 5.6 secs/file; 5.6 total mins elapsed
INFO:cuts.py:3059:_update_status: 80/1051 files; 5.6 secs/file; 7.5 total mins elapsed
INFO:cuts.py:3059:_update_status: 100/1051 files; 5.6 secs/file; 9.4 total mins elapsed
INFO:cuts.py:3059:_update_status: 120/1051 files; 5.6 secs/file; 11.2 total mins elapsed
INFO:cuts.py:3059:_update_status: 140/1051 files; 5.6 secs/file; 13.0 total mins elapsed
INFO:cuts.py:3059:_update_status: 160/1051 files; 5.6 secs/file; 14.8 total mins elapsed
INFO:cuts.py:3059:_update_status: 180/1051 files; 5.6 secs/file; 16.7 total mins elapsed
INFO:cuts.py:3059:_update_status: 200/1051 files; 5.5 secs/file; 18.4 total mins elapsed
INFO:cuts.py:3059:_update_status: 220/1051 files; 5.5 secs

In [67]:
start = time.time()

res = select_targets(
    infiles=filenames, numproc=1, qso_selection='colorcuts', nside=None, gaiasub=False,
    tcnames=['LRG', 'ELG', 'QSO'], backup=False)

print("Minutes taken for: ", i, " bricks: ", round(((time.time() - start) / 60), 2))
print("Hours taken for: ", i, " bricks: ", round(((time.time() - start) / 3600), 2))


INFO:cuts.py:2942:select_targets: Running on the main survey
INFO:cuts.py:3059:_update_status: 20/1051 files; 0.9 secs/file; 0.3 total mins elapsed
INFO:cuts.py:3059:_update_status: 40/1051 files; 0.9 secs/file; 0.6 total mins elapsed
INFO:cuts.py:3059:_update_status: 60/1051 files; 0.9 secs/file; 0.9 total mins elapsed
INFO:cuts.py:3059:_update_status: 80/1051 files; 0.9 secs/file; 1.2 total mins elapsed
INFO:cuts.py:3059:_update_status: 100/1051 files; 0.9 secs/file; 1.5 total mins elapsed
INFO:cuts.py:3059:_update_status: 120/1051 files; 0.9 secs/file; 1.8 total mins elapsed
INFO:cuts.py:3059:_update_status: 140/1051 files; 0.9 secs/file; 2.1 total mins elapsed
INFO:cuts.py:3059:_update_status: 160/1051 files; 0.9 secs/file; 2.4 total mins elapsed
INFO:cuts.py:3059:_update_status: 180/1051 files; 0.9 secs/file; 2.7 total mins elapsed
INFO:cuts.py:3059:_update_status: 200/1051 files; 0.9 secs/file; 3.0 total mins elapsed
INFO:cuts.py:3059:_update_status: 220/1051 files; 0.9 secs/file

In [68]:
print("LRG:", len(lrg))
print("ELG:", len(elg))

LRG: 40569
ELG: 152287


In [72]:
print(len(lrg) + len(elg) + len(qso))

print("LRG:", len(lrg))
print("ELG:", len(elg))
print("QSO:", len(qso))
print("QSO_RF:", len(qso_rf))
print("All:", len(res))

211869
LRG: 40569
ELG: 152287
QSO: 19013
QSO_RF: 19907
All: 206874


In [None]:


filenames = []

for filename in os.listdir(f'../../bricks_data/tractor/'):
if '.fits' not in filename:
    continue
filenames.append(f'/Volumes/{device}/bricks_data/{area}/{filename}')





Work on Adapting the DesiTarget Pipeline to return different types of objects: --> not creating fork for lack of wifi:
Logic of Select_Targets:
1. Check whether bounded box is passed and sanitise inputs
2. for x in infiles:
    A. targets.append(_update_status(_select_targets_file(x)))
    2. targets = np.concatenate(targets)
    3. For file, process targets in file, print results, concat all target arrays

3. Remove gaiacuts


Changes:
- Process returned array to extract relevant information
- Distinguish between different types of galaxies
- Introduce Lyman Beak Galaxies
-