In [1]:
import sys
sys.path.insert(0, '../gofher')

import os
import scipy
import numpy as np
import itertools
from skimage import measure
import matplotlib.pyplot as plt
from scipy.stats import expon, norm

from galaxy import galaxy
#from run_sersic import run_custom_sersic
from sep_helper import run_sep
from gofher import extract_data_for_histogram
from spin_parity import classify_spin_parity
from visualize import visualize_hist, get_key
from spin_parity import read_spin_parity_galaxies_label_from_csv
from run_fit import calculate_dist
from run_sersic import fit_sersic, evaluate_sersic_model
from mask import create_ellipse_mask
from fits import view_fits
from file_helper import prepare_csv_row, get_csv_cols, write_csv, check_if_folder_exists_and_create

def find_mask_spot_closest_to_center(the_mask,approx_center):
    shape=the_mask.shape
    all_labels = measure.label(the_mask) #https://scipy-lectures.org/packages/scikit-image/auto_examples/plot_labels.html
    blobs_labels = measure.label(all_labels, background=0)
    
    
    
    unique = np.unique(blobs_labels.flatten()) #https://stackoverflow.com/a/28663910/13544635
    #print(unique)
    center_of_masses = scipy.ndimage.center_of_mass(np.ones(shape),blobs_labels, index=unique)
    dist_to_center = list(map(lambda i: np.inf if i == 0 else calculate_dist(center_of_masses[i],approx_center),range(len(center_of_masses))))

    #plt.imshow(blobs_labels,origin='lower',vmin=0,vmax=4)
    #plt.colorbar()
    #plt.show()

    #print(dist_to_center)
    
    return blobs_labels==unique[np.argmin(dist_to_center)]

In [2]:
#CSV KEYS:
NAME_KEY = "name"
BAND_KEY = "band"

DISK_MAJ_ANGLE_KEY = 'diskMajAxsAngleRadians'
DISK_MIN_AXS_LEN_KEY = 'diskMinAxsLen'
DISK_MAJ_AXS_LEN_KEY = 'diskMajAxsLen'
INPUT_CENTER_C_KEY = 'inputCenterC'
INPUT_CENTER_R_KEY ='inputCenterR'

BULGE_MIN_AXS_LEN_KEY = "bulgeMinAxsLen"
BULGE_MAJ_AXS_LEN_KEY = "bulgeMajAxsLen"
BULGE_AXS_RATIO_KEY = "bulgeAxisRatio"
BULGE_MAJ_AXS_ANGLE_KEY = "bulgeMajAxsAngle"

#Keys required in BAND DICT
BAND_DICT_KEYS = [DISK_MAJ_ANGLE_KEY,
                 DISK_MIN_AXS_LEN_KEY,
                 DISK_MAJ_AXS_LEN_KEY,
                 INPUT_CENTER_C_KEY,
                 INPUT_CENTER_R_KEY,
                 BULGE_AXS_RATIO_KEY,
                 BULGE_MAJ_AXS_LEN_KEY,
                 BULGE_MAJ_AXS_ANGLE_KEY]

In [3]:
from math import pi

def can_float(to_check):
    try:
        float(to_check)
        return True
    except Exception:
        return False

def convert_matlab_to_cartessian(inputCenterC,inputCenterR):
    cx = inputCenterC - 1.5
    cy = inputCenterR - 1.5
    return (cx,cy)

def convert_disk_angle_to_bisection_angle(diskMajAxsAngle):
    return diskMajAxsAngle * -1.0

def normalize_angle(angle_in_rads: float) -> float:
    """Given an angle measured in radians, convert angle so that angle ∈ [0.0,2.0 * pi]
            (NOTE: measured counterclockwise from right-hand side of x-axis)
        Args:
            angle_in_rads: an angle in radians
        Returns:
            normalized angle (normalized angle ∈ [0.0,2.0 * pi]),
                (NOTE: measured counterclockwise from right-hand side of x-axis)"""

    if angle_in_rads >= 0.0:
        return angle_in_rads % (2.0 * pi)
    else:
        return (angle_in_rads % (-2.0 * pi)) + (2.0 * pi)

In [4]:
def calculate_ellipse_params(inputCenterC,inputCenterR,diskMajAxsLen,diskMinAxsLen,diskMajAxsAngleRadians):
    (h, k) = convert_matlab_to_cartessian(inputCenterC, inputCenterR)
    a = diskMajAxsLen * 0.5
    b = diskMinAxsLen * 0.5
    theta = convert_disk_angle_to_bisection_angle(diskMajAxsAngleRadians)
    return h,k,a,b,theta
        
def load_band_dict(band_dict):
    inputCenterC = 0;inputCenterR = 0
    diskMajAxsLen = 0; diskMinAxsLen = 0; diskMajAxsAngleRadians = 0
    for each_key in band_dict:
        if can_float(band_dict[each_key]):
            if each_key == DISK_MAJ_ANGLE_KEY:
                diskMajAxsAngleRadians = float(band_dict[each_key])
            elif each_key == DISK_MIN_AXS_LEN_KEY:
                diskMinAxsLen = float(band_dict[each_key])
            elif each_key == DISK_MAJ_AXS_LEN_KEY:
                diskMajAxsLen = float(band_dict[each_key])
            elif each_key == INPUT_CENTER_C_KEY:
                inputCenterC = float(band_dict[each_key])
            elif each_key == INPUT_CENTER_R_KEY:
                inputCenterR = float(band_dict[each_key])
            elif each_key == BULGE_AXS_RATIO_KEY:
                bulgeAxisRatio = float(band_dict[each_key])
            elif each_key == BULGE_MAJ_AXS_LEN_KEY:
                bulgeMajAxsLen = float(band_dict[each_key])
            elif each_key == BULGE_MAJ_AXS_ANGLE_KEY:
                bulgeMajAxsAngle = float(band_dict[each_key])
    h,k,a,b,theta = calculate_ellipse_params(inputCenterC,inputCenterR,diskMajAxsLen,diskMinAxsLen,diskMajAxsAngleRadians)
    return h,k,a,b,theta

In [5]:
import os
import pandas as pd
import numpy as np

def normalize_row_keys(the_row):
    to_return = dict()
    for key in the_row.keys():
        to_return[key.strip()] = key
    return to_return

def row_to_band_dict(the_row):
    gal_name = ''
    gal_band = ''
    gal_dict = {}
    normalized_rows = normalize_row_keys(the_row)
    #if row is missing, will be nan
    
    if 'name' in the_row:
        [gal_name,gal_band] = the_row['name'].strip().rsplit("_",1)
    elif 'name' in normalized_rows and normalized_rows['name'] != 'name':
        [gal_name,gal_band] = the_row[normalized_rows['name']].strip().rsplit("_",1)
    
    for key in BAND_DICT_KEYS:
        if key in the_row and not np.isnan(the_row[key]):
            gal_dict[key] = the_row[key]
        elif key in normalized_rows and normalized_rows[key] != key:
            gal_dict[key] = the_row[normalized_rows[key]]

    return gal_name, gal_band, gal_dict


#functions to read files:
def read_galaxy_csv(csv_path):
    '''read galaxy data from csv'''
    csv_dict = dict()
    df = pd.read_csv(csv_path,encoding = 'ISO-8859-1')
    
    for index, row in df.iterrows():
        gal_name, gal_band, gal_dict = row_to_band_dict(row)
        #print(row)
        #print(gal_dict)
        if gal_name != "" and gal_band != "" and len(gal_dict) == len(BAND_DICT_KEYS):
            if gal_name not in csv_dict:
                csv_dict[gal_name] = dict()
                
            csv_dict[gal_name][gal_band] = gal_dict
    return csv_dict

#folder helper
def check_if_folder_exists_and_create(path):
    '''check if folder exists and if not, create it'''
    if not os.path.exists(path):
        os.makedirs(path)

In [6]:
def run_gofher_on_sparcfire(name,fits_path, bands_in_order, ref_bands_in_order, dark_side_label, save_path, ref_band, h, k, a, b, theta, to_fit_sersic=False, r=0.5):
    the_gal = galaxy(name)

    for band in bands_in_order:
        the_gal.load_data(band,fits_path(name,band))
    the_gal.ref_band = ref_band
        
    data = the_gal.data[ref_band]
    shape = the_gal.data[ref_band].shape
    (cm_x, cm_y) = (shape[1]*0.5, shape[0]*0.5)
    the_el_sep, mu_bkg = run_sep(data, cm_x, cm_y)
    
    el_mask = create_ellipse_mask(h,k,a,b,theta,r=1.0,shape=shape)
    #view_fits(el_mask)

    inside_ellipse = data[np.logical_and(el_mask,the_gal.valid_pixel_mask[the_gal.ref_band])].flatten()
    loc, scale = expon.fit(inside_ellipse) #https://stackoverflow.com/questions/25085200/scipy-stats-expon-fit-with-no-location-parameter
    pdf_in = expon.pdf(data, loc=loc, scale=scale)
    #view_fits(pdf_in)

    outside_ellipse = data[np.logical_and(np.logical_not(el_mask),the_gal.valid_pixel_mask[the_gal.ref_band])].flatten()
    loc, scale = expon.fit(outside_ellipse) #https://stackoverflow.com/questions/25085200/scipy-stats-expon-fit-with-no-location-parameter
    pdf_out = expon.pdf(data, loc=loc, scale=scale)
    #view_fits(pdf_out)

    the_mask = pdf_out < pdf_in
    center_mask = find_mask_spot_closest_to_center(the_mask,(cm_x, cm_y))
    bright_spot_mask = np.logical_and(the_mask,np.logical_not(center_mask))

    if to_fit_sersic:
        center_mask = np.logical_and(center_mask,the_gal.valid_pixel_mask[the_gal.ref_band])

        sersic_model = fit_sersic(data, b*0.5, h,k,a,b,theta, center_mask,center_buffer=8,theta_buffer=np.pi/16)
        #eval_fit = data-evaluate_sersic_model(sersic_model,shape)
        #eval_fit[bright_spot_mask] = 0
        #view_fits(eval_fit,std_range=3)


        #set galaxy parameters:
        the_gal.x = getattr(sersic_model,'x_0').value
        the_gal.y = getattr(sersic_model,'y_0').value
        the_gal.theta = getattr(sersic_model,'theta').value
        the_gal.a = a
        the_gal.b = b
    else:
        #set galaxy parameters:
        the_gal.x = h
        the_gal.y = k
        the_gal.theta = theta
        the_gal.a = a
        the_gal.b = b
    
    el_mask = the_gal.create_ellipse(r=r)
    pos_mask, neg_mask = the_gal.create_bisection()
    
    pos_side_diff_dict = dict()
    neg_side_diff_dict = dict()
    the_band_pairs = []
    for (first_band,base_band) in itertools.combinations(bands_in_order, 2):
        if the_gal.is_band_pair_valid(first_band,base_band):
            band_pair_key = get_key(first_band,base_band)
            the_band_pairs.append(band_pair_key)

            diff_image, mask = the_gal.create_diff_image(first_band,base_band,el_mask)

            #view_fits(diff_image)
            #view_fits(pos_mask)
            #view_fits(neg_mask)
            #view_fits(el_mask)
            pos_side_diffs, neg_side_diffs = extract_data_for_histogram(diff_image,pos_mask,neg_mask,el_mask)
            pos_side_diff_dict[band_pair_key] = pos_side_diffs
            neg_side_diff_dict[band_pair_key] = neg_side_diffs

    mean_diff_dict, the_label_dict, the_score_dict, pl, nl = classify_spin_parity(the_gal,dark_side_label,pos_side_diff_dict,neg_side_diff_dict)
    #view_fits(el_mask)
    visualize_hist(the_gal, el_mask, pos_mask, neg_mask, pl, nl,
                   pos_side_diff_dict, neg_side_diff_dict, 
                   mean_diff_dict, the_label_dict, the_score_dict, 
                   bands_in_order, dark_side_label, color_image_path, save_path=save_path)
    
    return prepare_csv_row(the_gal,dark_side_label,the_band_pairs, mean_diff_dict, the_score_dict, pl, nl, the_label_dict)

In [7]:
#for testing:
path_to_input = "C:\\Users\\school\\Desktop\\github\\spin-parity-catalog\\original\\galaxies\\"
path_to_output = "C:\\Users\\school\\Desktop\\gofher_output"

#path_to_input = "/Users/cora-at-work/Desktop/github/spin-parity-catalog/original/galaxies/"
csv_path = "C:\\Users\\school\\Desktop\\github\\spin-parity-catalog\\table_info\\csv_format_of_table\\"
#csv_path = "/Users/cora-at-work/Desktop/github/spin-parity-catalog/table_info/csv_format_of_table/"

folder_name = "table5"
output_folder_name = "gofher_sparcfire_half_sersic"

bands_in_order = ['g','r','i','z','y']
ref_bands_in_order = ['i','z','y','r','g']

def fits_path(name,band):
    return os.path.join(path_to_input,folder_name,name,"{}_{}.fits".format(name,band))

def get_galaxy_list():
    return os.listdir(os.path.join(path_to_input,folder_name))

def color_image_path(name):
    return os.path.join(path_to_input,folder_name,name,"{}_color.jfif".format(name))

def get_csv_path():
    return os.path.join(csv_path,"table_{}.csv".format(folder_name.strip()[-1]))

def get_save_hist_path(name):
    return os.path.join(path_to_output,output_folder_name,folder_name,"{}.png".format(name))

def get_csv_out_path():
    return os.path.join(path_to_output,output_folder_name,"{}.csv".format(folder_name))

In [8]:
#path_to_sparcfire_csv = "/Users/cora-at-work/Desktop/github/spin-parity-catalog/original/output_from_running/SpArcFiRe/{}/galaxy.csv".format(folder_name)
path_to_sparcfire_csv = "C:\\Users\\school\\Desktop\\github\\spin-parity-catalog\\original\\output_from_running\\SpArcFiRe\\{}\\galaxy.csv".format(folder_name)

galaxy_csv = read_galaxy_csv(path_to_sparcfire_csv)
dark_side_labels = read_spin_parity_galaxies_label_from_csv(get_csv_path())
#dark_side_labels['IC 2101'] = dark_side_labels['IC2101']
the_band_pairs, the_csv_cols = get_csv_cols(bands_in_order)
the_csv_rows = []
i = 1 

#create folders
check_if_folder_exists_and_create(os.path.join(path_to_output,output_folder_name))
check_if_folder_exists_and_create(os.path.join(path_to_output,output_folder_name,folder_name))

#modify these for run:
to_fit_sersic=True
r=0.5

for name in galaxy_csv:
    print(i, name)
    if name not in dark_side_labels: continue
    for band in ref_bands_in_order:
        if band not in galaxy_csv[name]: continue
        try:
            h,k,a,b,theta = load_band_dict(galaxy_csv[name][band])
        except:
            continue
        if a != 0 and b != 0:
            try:
                csv_row = run_gofher_on_sparcfire(name, fits_path, bands_in_order, ref_bands_in_order, dark_side_labels[name], get_save_hist_path(name), band, h,k,a,b,theta,to_fit_sersic,r)
                the_csv_rows.append(csv_row)
                i += 1
                break
            except:
                continue
    #if i > 3:
    #    break
            
if True:
    write_csv(get_csv_out_path(),the_csv_cols,the_csv_rows)

1 IC2247
2 IC540
3 IC944
4 MCG-02-02-040
5 MCG-02-03-015
6 NGC1542
7 NGC3067
8 NGC3169
9 NGC3495
10 NGC3626
11 NGC4605
12 NGC4772
13 NGC6314
14 NGC681
15 UGC10205
16 UGC10297
17 UGC3107
18 UGC5111
19 UGC5498
19 UGC6036
20 UGC8267
21 UGC8778
22 UGC9665
