In [2]:
from astroquery.sdss import SDSS
from astropy import coordinates as coords
from astropy.io import fits
import numpy as np
from PIL import Image, ImageDraw
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from astropy.table import Table,vstack,Column,unique, hstack
import copy
import os.path
from __future__ import print_function

In [183]:
# Retrieve star and galaxy objects and combine them in one table.

# Retrieved parameters for each object:
# objID: unique SDSS identifier composed from [skyVersion,rerun,run,camcol,field,obj]
# run: run number
# rerun: rerun number
# camcol: camera column
# field: field number
# rowc: row center position (r-band coordinates)
# colc: column center position (r-band coordinates)
# petroRad_r: petrosian radius (r-band)
# rowc_g: row center position (g-band coordinates)
# colc_g: column center position (g-band coordinates)
# rowc_i: row center position (i-band coordinates)
# colc_i: column center position (i-band coordinates)

# Data release
DR = 15

star_data = SDSS.query_sql('''SELECT TOP 20000 p.objID, p.run, p.rerun, p.camcol, p.field, p.rowc, p.colc, p.petroRad_r, 
                     p.rowc_g, p.rowc_i, p.colc_g, p.colc_i, s.class
                     FROM PhotoObj AS p 
                     JOIN SpecObj AS s ON s.bestobjid = p.objid 
                     WHERE s.class = 'STAR' ''', data_release=DR)

galaxy_data = SDSS.query_sql('''SELECT TOP 20000 p.objID, p.run, p.rerun, p.camcol, p.field, p.rowc, p.colc, p.petroRad_r, 
                     p.rowc_g, p.rowc_i, p.colc_g, p.colc_i, s.class
                     FROM PhotoObj AS p 
                     JOIN SpecObj AS s ON s.bestobjid = p.objid 
                     WHERE s.class = 'GALAXY' ''', data_release=DR)

# Combine the two tables in a vertical stack
star_galaxy_data = vstack([star_data, galaxy_data])

# Add the associated image name for each object
num_of_objects = len(star_galaxy_data)
image_name_column = Column(name='imageName', data=["                  " for obj in range(num_of_objects)])
star_galaxy_data.add_column(image_name_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    run = star_galaxy_data['run'][i]
    rerun = star_galaxy_data['rerun'][i]
    camcol = star_galaxy_data['camcol'][i]
    field = star_galaxy_data['field'][i]
    imageName = '%d_%d_%d_%d'%(run,rerun,camcol,field)
    star_galaxy_data['imageName'][i] = imageName
    
detected_fields = unique(star_galaxy_data, keys=['imageName'])

detected_fields.write('tables/detected_fields_table.data', format='ascii', overwrite = False)

In [5]:
# Retrieve the objects that are not in the table for each field

DR = 15
detected_fields = Table.read('tables/detected_fields_table_dr%d.data'%(DR),format='ascii')
star_galaxy_data = Table()
num_of_detected_fields = len(detected_fields)
for i in range(num_of_detected_fields):
    run = detected_fields['run'][i]
    rerun = detected_fields['rerun'][i]
    camcol = detected_fields['camcol'][i]
    field = detected_fields['field'][i]
    
    data = SDSS.query_sql('''SELECT p.objID, p.run, p.rerun, p.camcol, p.field, p.rowc, p.colc, p.petroRad_r, 
                     p.rowc_g, p.rowc_i, p.colc_g, p.colc_i, s.class
                     FROM PhotoObj AS p
                     JOIN SpecObj AS s ON s.bestobjid = p.objid 
                     WHERE (s.class = 'STAR' OR s.class = 'GALAXY')
                     AND p.run=%d AND p.rerun=%d AND p.camcol=%d AND p.field=%d'''%(run,rerun,camcol,field), data_release=DR, timeout = 120)
    star_galaxy_data = vstack([star_galaxy_data, data])

star_galaxy_data.sort(['class'])
star_galaxy_data.write('tables/star_galaxy_table_dr%d_extra.data'%(DR),format='ascii', overwrite = False)

print("download done")


download done


In [None]:
star_galaxy_data = Table.read('tables/star_galaxy_table_dr15_extra.data',format='ascii')

num_of_objects = len(star_galaxy_data)
image_name_column = Column(name='imageName', data=["                  " for obj in range(num_of_objects)])
star_galaxy_data.add_column(image_name_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    run = star_galaxy_data['run'][i]
    rerun = star_galaxy_data['rerun'][i]
    camcol = star_galaxy_data['camcol'][i]
    field = star_galaxy_data['field'][i]
    imageName = '%d_%d_%d_%d'%(run,rerun,camcol,field)
    star_galaxy_data['imageName'][i] = imageName

star_galaxy_data.write('tables/star_galaxy_table_dr%d_extra_with_imgname.data'%(DR),format='ascii', overwrite = False)

In [10]:
# GALAXY MORPHOLOGY

DR = 15

star_galaxy_data = Table.read('tables/star_galaxy_table_dr15.data',format='ascii')

num_of_objects = len(star_galaxy_data)
morphology_column = Column(name='morphology', data=["UnknownMorphology" for obj in range(num_of_objects)])
star_galaxy_data.add_column(morphology_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    if star_galaxy_data['class'][i] == "STAR":
        continue
    objID = star_galaxy_data['objID'][i]
    run = star_galaxy_data['run'][i]
    rerun = star_galaxy_data['rerun'][i]
    camcol = star_galaxy_data['camcol'][i]
    field = star_galaxy_data['field'][i]
    imageName = star_galaxy_data['imageName'][i]
    
    galaxy_morph_data = SDSS.query_sql('''SELECT TOP 1 g.p_el, g.p_cw, g.p_acw, g.p_edge, g.p_mg, g.p_cs, g.spiral, g.elliptical, g.uncertain
                                        FROM zooSpec AS g
                                        WHERE (g.specobjid = %d OR g.objid = %d OR g.dr7objid = %d)'''%(objID, objID, objID), data_release=DR)
    
    if galaxy_morph_data == None:
        continue
        
    if galaxy_morph_data['uncertain'][0] == 1:
        continue
    elif galaxy_morph_data['elliptical'][0] == 1:
        star_galaxy_data['morphology'][i] = 'elliptical'
    elif galaxy_morph_data['spiral'][0] == 1:
        star_galaxy_data['morphology'][i] = 'spiral'
    elif galaxy_morph_data['p_edge'][0] >= 0.8:
        star_galaxy_data['morphology'][i] = 'edgeon'
    elif galaxy_morph_data['p_mg'][0] >= 0.8:
        star_galaxy_data['morphology'][i] = 'merge'
    else: # clockwise and anticlockwise spirals
        star_galaxy_data['morphology'][i] = 'spiral'

star_galaxy_data.write('tables/star_galaxy_with_morphology_dr%d.data'%(DR),format='ascii', overwrite = False)

In [None]:
# GALAXY MORPHOLOGY - SECOND MORPHOLOGY SELECTION APPROACH

DR = 15

star_galaxy_data = Table.read('tables/star_galaxy_table_dr15_extra.data',format='ascii')

num_of_objects = len(star_galaxy_data)
morphology_column = Column(name='morphology', data=["UnknownMorphology" for obj in range(num_of_objects)])
star_galaxy_data.add_column(morphology_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    if star_galaxy_data['class'][i] == "STAR":
        continue
    objID = star_galaxy_data['objID'][i]
    run = star_galaxy_data['run'][i]
    rerun = star_galaxy_data['rerun'][i]
    camcol = star_galaxy_data['camcol'][i]
    field = star_galaxy_data['field'][i]
    
    try:
        galaxy_morph_data = SDSS.query_sql('''SELECT TOP 1 g.p_el, g.p_cw, g.p_acw, g.p_edge, g.p_mg, g.p_cs, g.spiral, g.elliptical, g.uncertain
                                            FROM zooSpec AS g
                                            WHERE (g.specobjid = %d OR g.objid = %d OR g.dr7objid = %d)'''%(objID, objID, objID), data_release=DR)
    except:
        continue
        
    if galaxy_morph_data == None:
        continue
    
    votesMap = {}
    votes = []
    
    votesMap[galaxy_morph_data['p_el'][0]] = "elliptical"
    votesMap[galaxy_morph_data['p_cw'][0]] = "spiral"
    votesMap[galaxy_morph_data['p_acw'][0]] = "spiral"
    votesMap[galaxy_morph_data['p_edge'][0]] = "edgeon"
    votesMap[galaxy_morph_data['p_mg'][0]] = "merge"
    votesMap[galaxy_morph_data['p_cs'][0]] = "combined_spiral"
    
    votes.append(galaxy_morph_data['p_el'][0])
    votes.append(galaxy_morph_data['p_cw'][0])
    votes.append(galaxy_morph_data['p_acw'][0])
    votes.append(galaxy_morph_data['p_edge'][0])
    votes.append(galaxy_morph_data['p_mg'][0])
    votes.append(galaxy_morph_data['p_cs'][0])
    
    maxVote = max(votes)
    
    if maxVote <= 0:
        continue
    else:
        star_galaxy_data['morphology'][i] = votesMap[maxVote]
        print("%d out of %d"%(i, num_of_objects))

star_galaxy_data.write('tables/star_galaxy_with_morphology_dr%d_extra.data'%(DR),format='ascii', overwrite = False)

In [15]:
# INCLUDE IMAGE NAMES AS A COLUMN IN THE GALAXY MORPHOLOGY TABLE

star_galaxy_morphology_data = Table.read('tables/star_galaxy_with_morphology_dr15_extra.data',format='ascii')

num_of_objects = len(star_galaxy_morphology_data)
image_name_column = Column(name='imageName', data=["                  " for obj in range(num_of_objects)])
star_galaxy_morphology_data.add_column(image_name_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    run = star_galaxy_morphology_data['run'][i]
    rerun = star_galaxy_morphology_data['rerun'][i]
    camcol = star_galaxy_morphology_data['camcol'][i]
    field = star_galaxy_morphology_data['field'][i]
    imageName = '%d_%d_%d_%d'%(run,rerun,camcol,field)
    star_galaxy_morphology_data['imageName'][i] = imageName

star_galaxy_morphology_data.write('tables/star_galaxy_with_morphology_dr15_extra.data',format='ascii', overwrite = True)

In [15]:
# EDGE-ON GALAXIES

DR = 15

galaxy_data = SDSS.query_sql('''SELECT TOP 300 p.objID, p.run, p.rerun, p.camcol, p.field, p.rowc, p.colc, p.petroRad_r, 
                     p.rowc_g, p.rowc_i, p.colc_g, p.colc_i, s.class
                     FROM PhotoObj AS p 
                     JOIN SpecObj AS s ON s.bestobjid = p.objid
                     JOIN zooSpec AS g ON (g.specobjid = p.objid OR g.objid = p.objid OR g.dr7objid = p.objid)
                     WHERE s.class = 'GALAXY' AND g.p_edge >= 0.8''', data_release=DR)


# Add the associated image name for each object
num_of_objects = len(galaxy_data)
image_name_column = Column(name='imageName', data=["                  " for obj in range(num_of_objects)])
galaxy_data.add_column(image_name_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    run = galaxy_data['run'][i]
    rerun = galaxy_data['rerun'][i]
    camcol = galaxy_data['camcol'][i]
    field = galaxy_data['field'][i]
    imageName = '%d_%d_%d_%d'%(run,rerun,camcol,field)
    galaxy_data['imageName'][i] = imageName
    
morphology_column = Column(name='morphology', data=["edgeon" for obj in range(num_of_objects)])
galaxy_data.add_column(morphology_column)
    
detected_fields = unique(galaxy_data, keys=['imageName'])

detected_fields.write('tables/detected_fields_edgeon_table_dr%d.data'%(DR), format='ascii', overwrite = True)

galaxy_data.write('tables/edgeon_galaxies_dr%d.data'%(DR), format='ascii', overwrite = True)

In [None]:
# SPIRAL GALAXIES

DR = 15

galaxy_data = SDSS.query_sql('''SELECT TOP 300 p.objID, p.run, p.rerun, p.camcol, p.field, p.rowc, p.colc, p.petroRad_r, 
                     p.rowc_g, p.rowc_i, p.colc_g, p.colc_i, s.class
                     FROM PhotoObj AS p 
                     JOIN SpecObj AS s ON s.bestobjid = p.objid
                     JOIN zooSpec AS g ON (g.specobjid = p.objid OR g.objid = p.objid OR g.dr7objid = p.objid)
                     WHERE s.class = 'GALAXY' AND (g.p_cw >= 0.8 OR g.p_acw >= 0.8)''', data_release=DR)


# Add the associated image name for each object
num_of_objects = len(galaxy_data)
image_name_column = Column(name='imageName', data=["                  " for obj in range(num_of_objects)])
galaxy_data.add_column(image_name_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    run = galaxy_data['run'][i]
    rerun = galaxy_data['rerun'][i]
    camcol = galaxy_data['camcol'][i]
    field = galaxy_data['field'][i]
    imageName = '%d_%d_%d_%d'%(run,rerun,camcol,field)
    galaxy_data['imageName'][i] = imageName
    
morphology_column = Column(name='morphology', data=["spiral" for obj in range(num_of_objects)])
galaxy_data.add_column(morphology_column)
    
detected_fields = unique(galaxy_data, keys=['imageName'])

detected_fields.write('tables/detected_fields_spiral_table_dr%d.data'%(DR), format='ascii', overwrite = True)

galaxy_data.write('tables/spiral_galaxies_dr%d.data'%(DR), format='ascii', overwrite = True)

In [None]:
# ELLIPTICAL GALAXIES

DR = 15

galaxy_data = SDSS.query_sql('''SELECT TOP 300 p.objID, p.run, p.rerun, p.camcol, p.field, p.rowc, p.colc, p.petroRad_r, 
                     p.rowc_g, p.rowc_i, p.colc_g, p.colc_i, s.class
                     FROM PhotoObj AS p 
                     JOIN SpecObj AS s ON s.bestobjid = p.objid
                     JOIN zooSpec AS g ON (g.specobjid = p.objid OR g.objid = p.objid OR g.dr7objid = p.objid)
                     WHERE s.class = 'GALAXY' AND g.p_el >= 0.8''', data_release=DR)


# Add the associated image name for each object
num_of_objects = len(galaxy_data)
image_name_column = Column(name='imageName', data=["                  " for obj in range(num_of_objects)])
galaxy_data.add_column(image_name_column)

# from 0 to (num_of_objects - 1)
for i in range(num_of_objects):
    run = galaxy_data['run'][i]
    rerun = galaxy_data['rerun'][i]
    camcol = galaxy_data['camcol'][i]
    field = galaxy_data['field'][i]
    imageName = '%d_%d_%d_%d'%(run,rerun,camcol,field)
    galaxy_data['imageName'][i] = imageName
    
morphology_column = Column(name='morphology', data=["elliptical" for obj in range(num_of_objects)])
galaxy_data.add_column(morphology_column)
    
detected_fields = unique(galaxy_data, keys=['imageName'])

detected_fields.write('tables/detected_fields_elliptical_table_dr%d.data'%(DR), format='ascii', overwrite = True)

galaxy_data.write('tables/elliptical_galaxies_dr%d.data'%(DR), format='ascii', overwrite = True)

In [None]:
# SYMMETRIC DATASET EXPERIMENT

# Retrieve 30000 star objects and 30000 galaxy objects and combine them in one table.

# Retrieved parameters for each object:
# objID: unique SDSS identifier composed from [skyVersion,rerun,run,camcol,field,obj]
# run: run number
# rerun: rerun number
# camcol: camera column
# field: field number
# rowc: row center position (r-band coordinates)
# colc: column center position (r-band coordinates)
# petroRad_r: petrosian radius (r-band)
# rowc_g: row center position (g-band coordinates)
# colc_g: column center position (g-band coordinates)
# rowc_i: row center position (i-band coordinates)
# colc_i: column center position (i-band coordinates)

"""
field_data = SDSS.query_sql('''SELECT DISTINCT TOP 6000 f.fieldID, f.skyVersion, f.run, f.rerun, f.camcol, f.field, f.nStars, f.nGalaxy
                             FROM Field As f
                             JOIN PhotoObj AS p ON (p.skyVersion = f.skyVersion AND p.run = f.run AND p.rerun = f.rerun AND p.camcol = f.camcol AND p.field = f.field)
                             JOIN SpecObj AS s ON s.bestobjid = p.objid 
                             WHERE f.nStars > 0 AND f.nGalaxy > 0
                             AND (SELECT Count(*)
                                     FROM PhotoObj AS p
                                     JOIN SpecObj AS s ON s.bestobjid = p.objid 
                                     WHERE s.class = 'STAR'
                                     AND p.skyVersion = f.skyVersion AND p.run = f.run AND p.rerun = f.rerun AND p.camcol = f.camcol AND p.field = f.field) >= f.nStars
                            AND (SELECT Count(*)
                                     FROM PhotoObj AS p
                                     JOIN SpecObj AS s ON s.bestobjid = p.objid 
                                     WHERE s.class = 'GALAXY'
                                     AND p.skyVersion = f.skyVersion AND p.run = f.run AND p.rerun = f.rerun AND p.camcol = f.camcol AND p.field = f.field) >= f.nGalaxy''', data_release=15)
"""

fields = Table.read('tables/detected_fields_table_2.data',format='ascii')

field_data = SDSS.query_sql('''SELECT DISTINCT TOP 6000 f.fieldID, f.skyVersion, f.run, f.rerun, f.camcol, f.field, f.nStars, f.nGalaxy
                             FROM Field As f''', data_release=15)

num_of_fields = len(field_data)
print(num_of_fields)

star_galaxy_data = Table()
for i in range(1):
    run = fields['run'][i]
    rerun = fields['rerun'][i]
    camcol = fields['camcol'][i]
    field = fields['field'][i]
    
    data = SDSS.query_sql('''SELECT p.objID, p.run, p.rerun, p.camcol, p.field, p.rowc, p.colc, p.petroRad_r, 
                     p.rowc_g, p.rowc_i, p.colc_g, p.colc_i, s.class
                     FROM PhotoObj AS p
                     JOIN SpecObj AS s ON s.bestobjid = p.objid 
                     WHERE (s.class = 'STAR' OR s.class = 'GALAXY')
                     AND p.run=%d AND p.rerun=%d AND p.camcol=%d AND p.field=%d '''%(run,rerun,camcol,field), data_release=15)
    star_galaxy_data = vstack([star_galaxy_data, data])

    print(len(star_galaxy_data))
    
    
print(fields[0])
    