## Classification of Objects in selected bricks into ELG, LRG, QSO

In [35]:
import sys
import numpy as np
from astropy import constants as const
from astropy import units as u
from astropy.io import fits
from astropy.io import ascii
import random
import matplotlib.pyplot as plt
import wget
import seaborn as sns
import random
import os
import warnings
import time
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import pickle

warnings.filterwarnings("ignore")
random.seed(666)
#import healpy

In [36]:
from platform import python_version
print(python_version())

3.8.8


### Defining colour cuts

In [37]:
def is_LRG_target(g,r,z,W1):
    if not (18.01 < z < 20.41):
        return False
    if not (0.75 < (r - z) < 2.45):
        return False
    if not (-0.6 < (z - W1) - 0.8*(r - z)):
        return False
    if not ((z - 17.18)/2 < (r - z) < (z - 15.11)/2):
        return False
    if not (((r - z) > 1.15) or ((g-r) > 1.65)):
        return False
    return True
    
def is_ELG_target(g,r,z):
    if not (21.0 < g < 23.45):
        return False
    if not (0.3 < (r - z) < 1.6):
        return False
    if not ((g-r) < 1.15*(r-z) - 0.15):
        return False
    if not ((g-r) < 1.6 - 1.2*(r-z)):
        return False
    return True

def is_QSO_target(g,r,z,maskbit):
    if r > 22.7:
        return False
    if r < 17.5:
        return False
    if not g - r < 1.3:
        return False
    if not (-0.4 < r - z < 1.1):
        return False
    return True
        

### Getting bricks data

In [38]:
hdulistBricksSummary = fits.open('../bricks_data/survey-bricks-dr9-south.fits')
dataSummary = hdulistBricksSummary[1].data
bricknameSummary = dataSummary.field('brickname')
brickidSummary = dataSummary.field('brickid')
brick_galaxy_info = np.array([[220271,34,10,177]])

## Classification Loop
#### Steps:
##### 1. Download a randomly sampled brick
##### 2. Calculate extinction corrected magnitudes ( m=22.5−2.5log10(flux))
##### 3. Apply colour cuts
##### 4. Delete brick data

In [39]:
# Asserting that numerical brickId is also unique and can be used as more storage efficient index to a brick
print(len(bricknameSummary))
print(len(brickidSummary))
print(len(np.unique(brickidSummary)))

253658
253658
253658


In [41]:
start = time.time()

for brick in range(1):
    
    #Sampling a random brick and preparing the data
    randomint = random.randint(0, len(bricknameSummary))
    brickname = bricknameSummary[randomint] 
    brickid = brickidSummary[randomint] 
    
    folder = brickname[:3]
    url = f'https://portal.nersc.gov/cfs/cosmo/data/legacysurvey/dr9/south/tractor/{folder}/tractor-{brickname}.fits'
    #wget.download(url, '../bricks_data/tractor/')
    wget.download(url, '/Volumes/Astrostick/bricks_data/')

    #hdulistSingleBrick = fits.open(f'../bricks_data/tractor/tractor-{brickname}.fits')

    hdulistSingleBrick = fits.open(f'/Volumes/Astrostick/bricks_data/tractor-{brickname}.fits')
    data = hdulistSingleBrick[1].data
    
    #fluxes and magnitude
    flux_g = data.field('flux_g')
    flux_r = data.field('flux_r')
    flux_z = data.field('flux_z')
    flux_w1 = data.field('flux_w1')
    flux_w2 = data.field('flux_w2')
    flux_w3 = data.field('flux_w3')
    flux_w4 = data.field('flux_w4')

    mw_transmission_g = data.field('mw_transmission_g')
    mw_transmission_r = data.field('mw_transmission_r')
    mw_transmission_z = data.field('mw_transmission_z')
    mw_transmission_w1 = data.field('mw_transmission_w1')
    mw_transmission_w2 = data.field('mw_transmission_w2')
    mw_transmission_w3 = data.field('mw_transmission_w3')
    mw_transmission_w4  = data.field('mw_transmission_w4')

    #correcting for extinction ---> divide by the transmission
    flux_g_corrected = flux_g - mw_transmission_g
    flux_r_corrected = flux_r - mw_transmission_r
    flux_z_corrected = flux_z - mw_transmission_z
    flux_w1_corrected = flux_w1 - mw_transmission_w1
    flux_w2_corrected = flux_w2 - mw_transmission_w2
    flux_w3_corrected = flux_w3 - mw_transmission_w3
    flux_w4_corrected = flux_w4 - mw_transmission_w4



    mag_g = 22.5-2.5*np.log10(flux_g_corrected)
    mag_r = 22.5-2.5*np.log10(flux_r_corrected)
    mag_z = 22.5-2.5*np.log10(flux_z_corrected)
    mag_w1 = 22.5-2.5*np.log10(flux_w1_corrected)
    mag_w2 = 22.5-2.5*np.log10(flux_w2_corrected)
    mag_w3 = 22.5-2.5*np.log10(flux_w3_corrected)
    mag_w4 = 22.5-2.5*np.log10(flux_w4_corrected)

    #Retrieving the maskbits for quasar detection

    maskbits = data.field('maskbits')

    target_label_array = np.zeros(len(mag_g))
    for i in range(len(mag_g)):
        if is_LRG_target(mag_g[i], mag_r[i], mag_z[i], mag_w1[i]):
            target_label_array[i] = 1
            continue
        if is_ELG_target(mag_g[i], mag_r[i], mag_z[i]):
            target_label_array[i] = 2
            continue
        if is_QSO_target(mag_g[i], mag_r[i], mag_z[i], maskbits[i]):
            target_label_array[i] = 3
            continue
            
    lrg = (target_label_array == 1).sum()
    elg = (target_label_array == 2).sum()
    qso = (target_label_array == 3).sum()
    
    summary_stats = np.array([[brickid,lrg,elg,qso]])

    brick_galaxy_info = np.append(brick_galaxy_info, summary_stats, axis=0)
    
    #os.remove(f'../bricks_data/tractor/tractor-{brickname}.fits')
    os.remove(f'/Volumes/Astrostick/bricks_data/tractor-{brickname}.fits')

    if (bricks % 50) == 0:
        print("Bricks finished:", brick, " of 1000")

print(brick_galaxy_info)

print("Time taken: ", time.time()- start)

Bricks finished: 0  of 1000
[[220271     34     10    177]
 [382618     51      3    316]
 [324544     27      6    261]]
Time taken:  6.011116981506348


In [None]:
print(len(brick_galaxy_info))


### Beginning preliminary analysis

In [None]:
sample = brick_galaxy_info
nexp_gSummary = dataSummary.field('nexp_g')
nexp_rSummary = dataSummary.field('nexp_r')
nexp_zSummary = dataSummary.field('nexp_z')



In [None]:
print(sample.shape)

In [None]:
sample = np.c_[sample, np.zeros(len(sample))]
print(sample.shape)
print(sample)

In [None]:
print(sample[:,0])
ind = 0
for j in range(len(brickidSummary)):
    if sample[0,0] == brickidSummary[j]:
        ind = j
print(ind)
print(nexp_gSummary[ind])
print(nexp_rSummary[ind])
print(nexp_zSummary[ind])


In [None]:
#Exporting the data
with open('sample_brick_catalogue.pickle', 'wb') as f:
    pickle.dump(sample, f)

In [None]:

sampled = sample
for i in range(len(sampled)):
    index = 0
    for j in range(len(brickidSummary)):
        if sampled[i,0] == brickidSummary[j]:
            index = j
    sampled[i, 4] = nexp_gSummary[index]
    sampled[i, 5] = nexp_rSummary[index]
    sampled[i, 6] = nexp_zSummary[index]

In [None]:
elg = sampled[:, 1].reshape(-1, 1)
lrg = sampled[:, 2].reshape(-1, 1)
olg = sampled[:, 3].reshape(-1, 1)
exposure_g = sampled[:, 4].reshape(-1, 1)
exposure_r = sampled[:, 5].reshape(-1, 1)
exposure_z = sampled[:, 6].reshape(-1, 1)

In [None]:
reg_g_elg = LinearRegression().fit(exposure_g, elg)
print(reg_g_elg.score(exposure_g, elg))

In [None]:
elg = sampled[:, 1]
exposure_g = sampled[:, 4].reshape(-1,1)
print(elg.shape)
print(exposure_g.shape)
ols_g_elg = sm.OLS(elg,exposure_g).fit()
print(ols_g_elg.summary())