In [1]:
from astroquery.sdss import SDSS
from astropy import coordinates as coords
from astropy.io import fits
import numpy as np
from PIL import Image
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from astropy.table import Table,vstack,Column,unique
import copy
import os.path

In [3]:
# Prepare the training dataset for yolo darknet (FIRST EXPERIMENT - THE ACTUAL MODELS WERE TRAINED USING TURICREATE)

outdir = 'yolo'
sample = 'train' # choose 'validate' for validation data set

star_galaxy_data = Table.read('tables/star_galaxy_table.data',format='ascii')
star_galaxy_data = star_galaxy_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_table.data',format='ascii')

num_of_objects = len(star_galaxy_data)
num_of_fields = len(detected_fields)

sample_file = open('%s/%s.txt'%(outdir,sample),'w')

for i in range(2):
    imageName = detected_fields['imageName'][i]
    
    image_source = 'jpg_images/%s.jpg'%(imageName)
    output_image_path = '%s/images/%s/%s.jpg'%(outdir,sample,imageName)
    output_labels_path = '%s/labels/%s/%s.txt'%(outdir,sample,imageName)
    
    # Get all objects in this field
    mask = star_galaxy_data.groups.keys['imageName'] == imageName
    objects_in_field = star_galaxy_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
            
    if (num_of_obj_in_field >= 1):
        
        output_labels_file = open(output_labels_path,'w')
        
        for j in range(num_of_obj_in_field):
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 5:
                continue
            
            column_center_r_band = objects_in_field['colc'][j]
            row_center_r_band = objects_in_field['rowc'][j]
            obj_class = objects_in_field['class'][j]
            
            if obj_class == 'STAR':
                obj_class_number = 0
            else: # GALAXY
                obj_class_number = 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            output_labels_file.write('%d %f %f %f %f\n'%(obj_class_number, x_ratio, y_ratio, width_ratio, height_ratio))
            
        output_labels_file.close()
        
        image.save(output_image_path, 'jpeg', quality=97)
        
        sample_file.write('%s/images/%s/%s.jpg\n'%(outdir,sample,imageName))
        
sample_file.close()

In [None]:
# STAR GALAXY DATASET

star_galaxy_data = Table.read('tables/star_galaxy_table_dr15.data',format='ascii')
star_galaxy_data = star_galaxy_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_table_dr15.data',format='ascii')

num_of_objects = len(star_galaxy_data)
num_of_fields = len(detected_fields)

print(num_of_objects)
print(num_of_fields)

num_of_objects = len(star_galaxy_data)
num_of_fields = len(detected_fields)

annotations_file = open('annotations.txt','w')

numOfExtractedStars = 0
numOfExtractedGalaxies = 0
    
for i in range(7000):
    
    imageName = detected_fields['imageName'][i]
    
    image_source = 'jpg_images_dr15/%s.jpg'%(imageName)
    
    #output_image_path = '%s/images/%s/%s.jpg'%(outdir,sample,imageName)
    #output_labels_path = '%s/labels/%s/%s.txt'%(outdir,sample,imageName)
    
    # Get all objects in this field
    mask = star_galaxy_data.groups.keys['imageName'] == imageName
    objects_in_field = star_galaxy_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
    
    objCount = 0
    if (num_of_obj_in_field >= 1):
        
        for j in range(num_of_obj_in_field):
            
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 20 or petrosian_radius > 300:
                continue
            
            column_center_r_band = objects_in_field['colc'][j] # x
            row_center_r_band = objects_in_field['rowc'][j] # y
            obj_class = objects_in_field['class'][j]
            
            if obj_class == 'STAR':
                output_image_path = 'stars/%s_obj%d.jpg'%(imageName,objCount)
                numOfExtractedStars += 1
            else: # GALAXY
                output_image_path = 'galaxies/%s_obj%d.jpg'%(imageName,objCount)
                numOfExtractedGalaxies += 1
            objCount += 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            x = column_center_r_band
            y = row_center_r_band
            invertedY = image_height - row_center_r_band # Starts from top
            
            annotations_file.write('%s %f %f %f %s\n'%(obj_class, x, invertedY, petrosian_radius, imageName))
                    
            position = (x, y)
            size = (petrosian_radius, petrosian_radius)
            
            shiftedX = max(0, x - petrosian_radius)
            shiftedY = max(0, invertedY - petrosian_radius)
            shiftedWidth = min(image_width - 1, x + petrosian_radius)
            shiftedHeight = min(image_height - 1, invertedY + petrosian_radius)
            
            croppedObjImg = image.crop((shiftedX, shiftedY, shiftedWidth, shiftedHeight)) # left, top, right, bottom
            croppedObjImg.save(output_image_path, 'jpeg', quality=97)
            
        #image.save(output_image_path, 'jpeg', quality=97)
        
        
annotations_file.close()

print("num of stars: ", numOfExtractedStars)
print("num of galaxies: ", numOfExtractedGalaxies)

In [None]:
# GALAXY 2 DATASET

star_galaxy_morphology_data = Table.read('tables/star_galaxy_with_morphology_dr15_extra.data',format='ascii')
star_galaxy_morphology_data = star_galaxy_morphology_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_table_dr15.data',format='ascii')

num_of_objects = len(star_galaxy_morphology_data)
num_of_fields = len(detected_fields)

print(num_of_objects)
print(num_of_fields)

numOfExtractedGalaxies = 0
    
for i in range(7000):
    
    imageName = detected_fields['imageName'][i]
    
    image_source = 'jpg_images_dr15/%s.jpg'%(imageName)

    # Get all objects in this field
    mask = star_galaxy_morphology_data.groups.keys['imageName'] == imageName
    objects_in_field = star_galaxy_morphology_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
    
    objCount = 0
    if (num_of_obj_in_field > 0):
        
        for j in range(num_of_obj_in_field):
            
            obj_class = objects_in_field['class'][j]
            
            if obj_class != 'GALAXY' or objects_in_field['morphology'][j] == 'UnknownMorphology':
                continue
                
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 20 or petrosian_radius > 300:
                continue
            
            column_center_r_band = objects_in_field['colc'][j] # x
            row_center_r_band = objects_in_field['rowc'][j] # y
            
            output_image_path = 'star_galaxy_dataset/galaxies2/%s_obj%d.jpg'%(imageName,objCount)
                
            numOfExtractedGalaxies += 1
            objCount += 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            x = column_center_r_band
            y = row_center_r_band
            invertedY = image_height - row_center_r_band # Starts from top
                    
            position = (x, y)
            size = (petrosian_radius, petrosian_radius)
            
            shiftedX = max(0, x - petrosian_radius)
            shiftedY = max(0, invertedY - petrosian_radius)
            shiftedWidth = min(image_width - 1, x + petrosian_radius)
            shiftedHeight = min(image_height - 1, invertedY + petrosian_radius)
            
            croppedObjImg = image.crop((shiftedX, shiftedY, shiftedWidth, shiftedHeight)) # left, top, right, bottom
            croppedObjImg.save(output_image_path, 'jpeg', quality=97)
            
        #image.save(output_image_path, 'jpeg', quality=97)

print("num of galaxies: ", numOfExtractedGalaxies)

In [None]:
# GALAXY 2 DATASET FOR EDGEON

edgeon_galaxy_morphology_data = Table.read('tables/edgeon_galaxies_dr15.data',format='ascii')
edgeon_galaxy_morphology_data = edgeon_galaxy_morphology_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_edgeon_table_dr15.data',format='ascii')

num_of_objects = len(edgeon_galaxy_morphology_data)
num_of_fields = len(detected_fields)

print(num_of_objects)
print(num_of_fields)

numOfExtractedGalaxies = 0
    
for i in range(num_of_fields):
    
    imageName = detected_fields['imageName'][i]
    
    image_source = 'edgeon_galaxy_fields_dr15/%s.jpg'%(imageName)

    # Get all objects in this field
    mask = edgeon_galaxy_morphology_data.groups.keys['imageName'] == imageName
    objects_in_field = edgeon_galaxy_morphology_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
    
    objCount = 0
    if (num_of_obj_in_field > 0):
        
        for j in range(num_of_obj_in_field):
            
            obj_class = objects_in_field['class'][j]
            
            if obj_class != 'GALAXY' or objects_in_field['morphology'][j] == 'UnknownMorphology':
                continue
                
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 20 or petrosian_radius > 300:
                continue
            
            column_center_r_band = objects_in_field['colc'][j] # x
            row_center_r_band = objects_in_field['rowc'][j] # y
            
            if objects_in_field['morphology'][j] == 'edgeon':
                output_image_path = 'star_galaxy_dataset/galaxies2/%s_obj%d.jpg'%(imageName,objCount)
            else:
                continue
                
            numOfExtractedGalaxies += 1
            objCount += 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            x = column_center_r_band
            y = row_center_r_band
            invertedY = image_height - row_center_r_band # Starts from top
                    
            position = (x, y)
            size = (petrosian_radius, petrosian_radius)
            
            shiftedX = max(0, x - petrosian_radius)
            shiftedY = max(0, invertedY - petrosian_radius)
            shiftedWidth = min(image_width - 1, x + petrosian_radius)
            shiftedHeight = min(image_height - 1, invertedY + petrosian_radius)
            
            croppedObjImg = image.crop((shiftedX, shiftedY, shiftedWidth, shiftedHeight)) # left, top, right, bottom
            croppedObjImg.save(output_image_path, 'jpeg', quality=97)
            
        #image.save(output_image_path, 'jpeg', quality=97)

print("num of galaxies: ", numOfExtractedGalaxies)

In [None]:
# GALAXY MORPHOLOGY DATASET

star_galaxy_morphology_data = Table.read('tables/star_galaxy_with_morphology_dr15_extra.data',format='ascii')
star_galaxy_morphology_data = star_galaxy_morphology_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_table_dr15.data',format='ascii')

num_of_objects = len(star_galaxy_morphology_data)
num_of_fields = len(detected_fields)

print(num_of_objects)
print(num_of_fields)

numOfExtractedGalaxies = 0
    
for i in range(7000):
    
    imageName = detected_fields['imageName'][i]
    
    image_source = 'jpg_images_dr15/%s.jpg'%(imageName)

    # Get all objects in this field
    mask = star_galaxy_morphology_data.groups.keys['imageName'] == imageName
    objects_in_field = star_galaxy_morphology_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
    
    objCount = 0
    if (num_of_obj_in_field > 0):
        
        for j in range(num_of_obj_in_field):
            
            obj_class = objects_in_field['class'][j]
            
            if obj_class != 'GALAXY':
                continue
                
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 20 or petrosian_radius > 300:
                continue
            
            column_center_r_band = objects_in_field['colc'][j] # x
            row_center_r_band = objects_in_field['rowc'][j] # y
            
            if objects_in_field['morphology'][j] == 'combined_spiral' or objects_in_field['morphology'][j] == 'spiral':
                output_image_path = 'galaxy_morphology_dataset/spiral/%s_obj%d.jpg'%(imageName,objCount)
            elif objects_in_field['morphology'][j] == 'elliptical':
                output_image_path = 'galaxy_morphology_dataset/elliptical/%s_obj%d.jpg'%(imageName,objCount)
            elif objects_in_field['morphology'][j] == 'merge':
                output_image_path = 'galaxy_morphology_dataset/merge/%s_obj%d.jpg'%(imageName,objCount)
            elif objects_in_field['morphology'][j] == 'edgeon':
                output_image_path = 'galaxy_morphology_dataset/edgeon/%s_obj%d.jpg'%(imageName,objCount)
            else:
                continue
                
            numOfExtractedGalaxies += 1
            objCount += 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            x = column_center_r_band
            y = row_center_r_band
            invertedY = image_height - row_center_r_band # Starts from top
                    
            position = (x, y)
            size = (petrosian_radius, petrosian_radius)
            
            shiftedX = max(0, x - petrosian_radius)
            shiftedY = max(0, invertedY - petrosian_radius)
            shiftedWidth = min(image_width - 1, x + petrosian_radius)
            shiftedHeight = min(image_height - 1, invertedY + petrosian_radius)
            
            croppedObjImg = image.crop((shiftedX, shiftedY, shiftedWidth, shiftedHeight)) # left, top, right, bottom
            croppedObjImg.save(output_image_path, 'jpeg', quality=97)
            
        #image.save(output_image_path, 'jpeg', quality=97)

print("num of galaxies: ", numOfExtractedGalaxies)

In [None]:
# GALAXY MORPHOLOGY EDGEON DATASET

edgeon_galaxy_morphology_data = Table.read('tables/edgeon_galaxies_dr15.data',format='ascii')
edgeon_galaxy_morphology_data = edgeon_galaxy_morphology_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_edgeon_table_dr15.data',format='ascii')

num_of_objects = len(edgeon_galaxy_morphology_data)
num_of_fields = len(detected_fields)

print(num_of_objects)
print(num_of_fields)

numOfExtractedGalaxies = 0
    
for i in range(num_of_fields):
    
    imageName = detected_fields['imageName'][i]
    
    image_source = 'edgeon_galaxy_fields_dr15/%s.jpg'%(imageName)

    # Get all objects in this field
    mask = edgeon_galaxy_morphology_data.groups.keys['imageName'] == imageName
    objects_in_field = edgeon_galaxy_morphology_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
    
    objCount = 0
    if (num_of_obj_in_field > 0):
        
        for j in range(num_of_obj_in_field):
            
            obj_class = objects_in_field['class'][j]
            
            if obj_class != 'GALAXY':
                continue
                
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 20 or petrosian_radius > 300:
                continue
            
            column_center_r_band = objects_in_field['colc'][j] # x
            row_center_r_band = objects_in_field['rowc'][j] # y
            
            if objects_in_field['morphology'][j] == 'edgeon':
                output_image_path = 'galaxy_morphology_dataset/edgeon/%s_obj%d.jpg'%(imageName,objCount)
            else:
                continue
                
            numOfExtractedGalaxies += 1
            objCount += 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            x = column_center_r_band
            y = row_center_r_band
            invertedY = image_height - row_center_r_band # Starts from top
                    
            position = (x, y)
            size = (petrosian_radius, petrosian_radius)
            
            shiftedX = max(0, x - petrosian_radius)
            shiftedY = max(0, invertedY - petrosian_radius)
            shiftedWidth = min(image_width - 1, x + petrosian_radius)
            shiftedHeight = min(image_height - 1, invertedY + petrosian_radius)
            
            croppedObjImg = image.crop((shiftedX, shiftedY, shiftedWidth, shiftedHeight)) # left, top, right, bottom
            croppedObjImg.save(output_image_path, 'jpeg', quality=97)
            
        #image.save(output_image_path, 'jpeg', quality=97)

print("num of galaxies: ", numOfExtractedGalaxies)

In [None]:
# GALAXY MORPHOLOGY SPIRAL DATASET

spiral_galaxy_morphology_data = Table.read('tables/spiral_galaxies_dr15.data',format='ascii')
spiral_galaxy_morphology_data = spiral_galaxy_morphology_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_spiral_table_dr15.data',format='ascii')

num_of_objects = len(spiral_galaxy_morphology_data)
num_of_fields = len(detected_fields)

print(num_of_objects)
print(num_of_fields)

numOfExtractedGalaxies = 0
    
for i in range(num_of_fields):
    
    imageName = detected_fields['imageName'][i]
    
    image_source = 'spiral_galaxy_fields_dr15/%s.jpg'%(imageName)

    # Get all objects in this field
    mask = spiral_galaxy_morphology_data.groups.keys['imageName'] == imageName
    objects_in_field = spiral_galaxy_morphology_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
    
    objCount = 0
    if (num_of_obj_in_field > 0):
        
        for j in range(num_of_obj_in_field):
            
            obj_class = objects_in_field['class'][j]
            
            if obj_class != 'GALAXY':
                continue
                
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 20 or petrosian_radius > 300:
                continue
            
            column_center_r_band = objects_in_field['colc'][j] # x
            row_center_r_band = objects_in_field['rowc'][j] # y
            
            if objects_in_field['morphology'][j] == 'spiral':
                output_image_path = 'galaxy_morphology_dataset/spiral/%s_obj%d.jpg'%(imageName,objCount)
            else:
                continue
                
            numOfExtractedGalaxies += 1
            objCount += 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            x = column_center_r_band
            y = row_center_r_band
            invertedY = image_height - row_center_r_band # Starts from top
                    
            position = (x, y)
            size = (petrosian_radius, petrosian_radius)
            
            shiftedX = max(0, x - petrosian_radius)
            shiftedY = max(0, invertedY - petrosian_radius)
            shiftedWidth = min(image_width - 1, x + petrosian_radius)
            shiftedHeight = min(image_height - 1, invertedY + petrosian_radius)
            
            croppedObjImg = image.crop((shiftedX, shiftedY, shiftedWidth, shiftedHeight)) # left, top, right, bottom
            croppedObjImg.save(output_image_path, 'jpeg', quality=97)
            
        #image.save(output_image_path, 'jpeg', quality=97)

print("num of galaxies: ", numOfExtractedGalaxies)

In [None]:
# GALAXY MORPHOLOGY ELLIPTICAL DATASET

elliptical_galaxy_morphology_data = Table.read('tables/star_galaxy_with_morphology_dr15.data',format='ascii')
elliptical_galaxy_morphology_data = elliptical_galaxy_morphology_data.group_by('imageName')
detected_fields = Table.read('tables/detected_fields_table_dr15.data',format='ascii')

num_of_objects = len(elliptical_galaxy_morphology_data)
num_of_fields = len(detected_fields)

print(num_of_objects)
print(num_of_fields)

numOfExtractedGalaxies = 0
    
for i in range(7000):
    
    imageName = detected_fields['imageName'][i]
    
    image_source = 'jpg_images_dr15/%s.jpg'%(imageName)

    # Get all objects in this field
    mask = elliptical_galaxy_morphology_data.groups.keys['imageName'] == imageName
    objects_in_field = elliptical_galaxy_morphology_data.groups[mask]
    num_of_obj_in_field = len(objects_in_field)
    
    image = Image.open(image_source)
    image_width = image.size[0]
    image_height = image.size[1]
    
    objCount = 0
    if (num_of_obj_in_field > 0):
        
        for j in range(num_of_obj_in_field):
            
            obj_class = objects_in_field['class'][j]
            
            if obj_class != 'GALAXY':
                continue
                
            petrosian_radius = 2.1 * objects_in_field['petroRad_r'][j] / 0.396127 # divide by avg. pixel scale, which is in arcsec
            
            # Discard objects smaller than a 5 pixels box side
            if petrosian_radius < 20 or petrosian_radius > 300:
                continue
            
            column_center_r_band = objects_in_field['colc'][j] # x
            row_center_r_band = objects_in_field['rowc'][j] # y
            
            if objects_in_field['morphology'][j] == 'elliptical':
                output_image_path = 'galaxy_morphology_dataset2/elliptical/%s_obj%d.jpg'%(imageName,objCount)
            else:
                continue
                
            numOfExtractedGalaxies += 1
            objCount += 1
                
            x_ratio = column_center_r_band / image_width
            y_ratio = row_center_r_band / image_height
            width_ratio = petrosian_radius / image_width
            height_ratio = petrosian_radius / image_height
            
            x = column_center_r_band
            y = row_center_r_band
            invertedY = image_height - row_center_r_band # Starts from top
                    
            position = (x, y)
            size = (petrosian_radius, petrosian_radius)
            
            shiftedX = max(0, x - petrosian_radius)
            shiftedY = max(0, invertedY - petrosian_radius)
            shiftedWidth = min(image_width - 1, x + petrosian_radius)
            shiftedHeight = min(image_height - 1, invertedY + petrosian_radius)
            
            croppedObjImg = image.crop((shiftedX, shiftedY, shiftedWidth, shiftedHeight)) # left, top, right, bottom
            croppedObjImg.save(output_image_path, 'jpeg', quality=97)
            
        #image.save(output_image_path, 'jpeg', quality=97)

print("num of galaxies: ", numOfExtractedGalaxies)