In [1]:
%matplotlib inline
import sys
from astropy import table
import numpy as np
from pathlib import Path
from matplotlib import colors, cm
from astropy import convolution
import cmocean
import betterplotlib as bpl

bpl.set_style()

In [2]:
home_dir = Path("../").resolve()
data_dir = home_dir / "data"

In [3]:
sys.path.append(str(home_dir / "pipeline"))
import utils

In [4]:
catalogs = []
for galaxy_dir in data_dir.iterdir():

    cat_name = galaxy_dir / "size" / "final_catalog_final_30_pixels_psf_my_stars_15_pixels_2x_oversampled.txt"
    final_cat = table.Table.read(str(cat_name), format="ascii.ecsv")
    catalogs.append(final_cat)
    
big_catalog = table.vstack(catalogs, join_type="inner")

# Flag galaxies with weird error properties

In [5]:
big_catalog["mass_min_equal_check"] = big_catalog["mass_msun"] == big_catalog["mass_msun_min"]
big_catalog["mass_max_equal_check"] = big_catalog["mass_msun"] == big_catalog["mass_msun_max"]
big_catalog["mass_min_bad_check"] = big_catalog["mass_msun"] < big_catalog["mass_msun_min"]
big_catalog["mass_max_bad_check"] = big_catalog["mass_msun"] > big_catalog["mass_msun_max"]

big_catalog["age_min_equal_check"] = big_catalog["age_yr"] == big_catalog["age_yr_min"]
big_catalog["age_max_equal_check"] = big_catalog["age_yr"] == big_catalog["age_yr_max"]
big_catalog["age_min_bad_check"] = big_catalog["age_yr"] < big_catalog["age_yr_min"]
big_catalog["age_max_bad_check"] = big_catalog["age_yr"] > big_catalog["age_yr_max"]

big_catalog["radius_min_equal_check"] = big_catalog["r_eff_pc_rmax_15pix_e-"] == 0
big_catalog["radius_max_equal_check"] = big_catalog["r_eff_pc_rmax_15pix_e+"] == 0
big_catalog["radius_min_bad_check"] = big_catalog["r_eff_pc_rmax_15pix_e-"] < 0
big_catalog["radius_max_bad_check"] = big_catalog["r_eff_pc_rmax_15pix_e+"] < 0

In [6]:
colnames = [item for item in big_catalog.colnames if "_check" in item]

In [7]:
colnames

['mass_min_equal_check',
 'mass_max_equal_check',
 'mass_min_bad_check',
 'mass_max_bad_check',
 'age_min_equal_check',
 'age_max_equal_check',
 'age_min_bad_check',
 'age_max_bad_check',
 'radius_min_equal_check',
 'radius_max_equal_check',
 'radius_min_bad_check',
 'radius_max_bad_check']

# Then print the statistics for each galaxy

In [8]:
galaxies = np.unique(big_catalog["galaxy"])

In [9]:
# print the header
shorts = ["N", 'M-=', "M+=", 'M-b', "M+b", 'T-=', "T+=", 'T-b', "T+b", 'R-=', "R+=", 'R-b', "R+b"]

In [10]:
header = f"{'galaxy':>10s} - "
for s in shorts:
    header += f"{s:>6} "
print(header)

for galaxy in galaxies:
    mask = big_catalog["galaxy"] == galaxy
    subset = big_catalog[mask]
    
    n_total = len(subset)

    
    out_str = f"{galaxy:>10s} - "
    out_str += f"{n_total:>6} "
    for col in colnames:
        out_str += f"{np.sum(subset[col]):>6} "

    print(out_str)

    galaxy -      N    M-=    M+=    M-b    M+b    T-=    T+=    T-b    T+b    R-=    R+=    R-b    R+b 
    ic4247 -      5      0      0      0      0      0      1      0      0      0      0      0      0 
     ic559 -     21      0      0      0      0      0      4      0      0      0      0      0      0 
 ngc1313-e -    137      9      0      0      2     63     78      0      2      0      0      0      0 
 ngc1313-w -    276     14      6      0      1    143    188      0      1      0      0      0      0 
   ngc1433 -    112      8      1      0     12     68     61      0     12      0      0      0      0 
   ngc1566 -    881     22      1      0     19    388    391      0     19      0      0      0      0 
   ngc1705 -     29      6      0      0      0      6     12      0      0      0      0      0      0 
   ngc3344 -    237     24      1      0      3    128    114      0      3      0      0      0      0 
   ngc3351 -     19      1      0      0      0      6 

Same thing, but percent of clusters with bad errors

In [48]:
header = f"{'galaxy':>10s} - "
for s in shorts:
    header += f"{s + '%':>6} "
print(header)

for galaxy in galaxies:
    mask = big_catalog["galaxy"] == galaxy
    subset = big_catalog[mask]
    
    n_total = len(subset)

    
    out_str = f"{galaxy:>10s} - "
    out_str += f"{n_total:>6} "
    for col in colnames:
        out_str += f"{100 * np.sum(subset[col]) / n_total:>6.1f} "

    print(out_str)

    galaxy -     N%   M-=%   M+=%   M-b%   M+b%   T-=%   T+=%   T-b%   T+b%   R-=%   R+=%   R-b%   R+b% 
    ic4247 -      5    0.0    0.0    0.0    0.0    0.0   20.0    0.0    0.0    0.0    0.0    0.0    0.0 
     ic559 -     21    0.0    0.0    0.0    0.0    0.0   19.0    0.0    0.0    0.0    0.0    0.0    0.0 
 ngc1313-e -    137    6.6    0.0    0.0    1.5   46.0   56.9    0.0    1.5    0.0    0.0    0.0    0.0 
 ngc1313-w -    276    5.1    2.2    0.0    0.4   51.8   68.1    0.0    0.4    0.0    0.0    0.0    0.0 
   ngc1433 -    112    7.1    0.9    0.0   10.7   60.7   54.5    0.0   10.7    0.0    0.0    0.0    0.0 
   ngc1566 -    881    2.5    0.1    0.0    2.2   44.0   44.4    0.0    2.2    0.0    0.0    0.0    0.0 
   ngc1705 -     29   20.7    0.0    0.0    0.0   20.7   41.4    0.0    0.0    0.0    0.0    0.0    0.0 
   ngc3344 -    237   10.1    0.4    0.0    1.3   54.0   48.1    0.0    1.3    0.0    0.0    0.0    0.0 
   ngc3351 -     19    5.3    0.0    0.0    0.0   31.6 

# The mass errors are more worrying to me. Check the ones that are bad

In [16]:
def plot_cluster_samples(x, x_min, x_max, ids, galaxy, value):
    # Plot the posteriors for the mass/radius of a given set of clusters

    # first plot the measured values. Here each cluster will have it's own row with a
    # dummy y value
    fig, ax = bpl.subplots(figsize=[7, 2 + 0.4 * len(x)])
    ax.make_ax_dark()
    dummy_y = np.arange(0, len(x))

    ax.errorbar(
        x,
        dummy_y,
        xerr=[x-x_min, x_max - x],
        c=bpl.color_cycle[0],
    )

    # then format the axes. set the y labels to be the cluster IDs
    ax.set_yticks(dummy_y)
    labels = [f"{galaxy[i]} - {ids[i]}" for i in range(len(x))]
    ax.set_yticklabels(labels)
    if value == "mass_msun":
        ax.set_limits(100, 1e6, min(dummy_y) - 1, max(dummy_y) + 1)
        ax.add_labels("Mass [$M_\odot$]", "ID")
    elif value == "radius":
        ax.set_limits(0.1, 30, min(dummy_y) - 1, max(dummy_y) + 1)
        ax.add_labels("Radius [pc]", "ID")
    elif value == "age_yr":
        ax.set_limits(1e6, 1e11, min(dummy_y) - 1, max(dummy_y) + 1)
        ax.add_labels("Age [yr]", "ID")
    ax.set_xscale("log")

In [45]:
for col in colnames:
    if "mass" not in col:
        continue
    print('='*100)
    print(col)
    subset = big_catalog[big_catalog[col]]
    print(f"{'Galaxy':<10} {'ID':<8} {'success':<8} {'M-':<10} {'M':<10} {'M+':<10} {'T-':<10} {'T':<10} {'T+':<10}")
    for row in subset:
        out_str = f"{row['galaxy']:<10} {row['ID']:<8} {row['good']:<8} "
        out_str += f"{np.log10(row['mass_msun_min']):<10.2f} {np.log10(row['mass_msun']):<10.2f} {np.log10(row['mass_msun_max']):<10.2f} "
        out_str += f"{np.log10(row['age_yr_min']):<10.2f} {np.log10(row['age_yr']):<10.2f} {np.log10(row['age_yr_max']):<10.2f}"
        print(out_str)

mass_min_equal_check
Galaxy     ID       success  M-         M          M+         T-         T          T+        
ngc6503    670      0        3.49       3.49       3.51       7.18       7.18       7.18      
ngc6503    784      1        3.29       3.29       3.36       7.70       7.70       7.78      
ngc6503    1021     1        3.99       3.99       4.02       8.30       8.30       8.30      
ngc6503    1050     0        3.90       3.90       3.92       8.30       8.30       8.30      
ngc6503    1248     1        2.78       2.78       2.84       6.70       6.70       6.70      
ngc6503    1249     1        2.80       2.80       2.86       6.70       6.70       6.70      
ngc6503    1906     1        4.18       4.18       4.22       8.48       8.48       8.48      
ngc6503    2265     0        2.46       2.46       2.58       6.30       6.48       6.48      
ngc6503    2421     1        2.66       2.66       2.92       6.70       6.78       6.78      
ngc6503    2632     0        

  out_str += f"{np.log10(row['mass_msun_min']):<10.2f} {np.log10(row['mass_msun']):<10.2f} {np.log10(row['mass_msun_max']):<10.2f} "
  out_str += f"{np.log10(row['age_yr_min']):<10.2f} {np.log10(row['age_yr']):<10.2f} {np.log10(row['age_yr_max']):<10.2f}"


# See how many clusters have zero total mass error
These would stay zero after I symmetrize them

In [40]:
print(np.sum(np.logical_and(big_catalog["mass_min_equal_check"], big_catalog["mass_max_equal_check"])))

1


This is the cluster with zero mass, and zero error. This is fine, we don't need to throw clusters out because of their mass errors

# 5983