# How cleanly are different spectral categories separated in the WHAV* diagram? Can we come up with clean dividing lines that can be used for reliable spectral classification based only on the H$\alpha$ EW and $\sigma_{\rm gas} - \sigma_*$?
---
Like Lisa suggested: given a set of boundaries in the WHAV* diagram, what % of BPT-classified star-forming spaxels lie within those boundaries, etc.? 

In here:
1. Recreate Fig. 15 from the paper draft, but with each spectral classification shown separately (classification based on *total* emission line fluxes)
2. Repeat the above, but with the emission line fluxes from *individual components*. Idea: put all 3 components in 1 diagram to make larger samples. 

In [1]:
%matplotlib widget

In [2]:
# Imports
import sys
import os 
import numpy as np
import pandas as pd
from astropy.visualization import hist

from spaxelsleuth.loaddata.lzifu import load_lzifu_galaxies
from spaxelsleuth.loaddata.sami import load_sami_galaxies
from spaxelsleuth.plotting.plottools import plot_empty_BPT_diagram
from spaxelsleuth.plotting.plottools import vmin_fn, vmax_fn, label_fn, cmap_fn
from spaxelsleuth.plotting.plottools import bpt_colours, bpt_labels, bpt_ticks
from spaxelsleuth.plotting.plottools import morph_labels, morph_ticks
from spaxelsleuth.plotting.plottools import ncomponents_labels, ncomponents_colours
from spaxelsleuth.plotting.plottools import component_labels, component_colours
from spaxelsleuth.plotting.plotgalaxies import plot2dhistcontours, plot2dscatter, plot2dcontours

import matplotlib
from matplotlib import rc, rcParams
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

rc("text", usetex=False)
rc("font",**{"family": "serif", "size": 14})
rcParams["savefig.bbox"] = "tight"
rcParams["savefig.format"] = "pdf"
plt.ion()
plt.close("all")


In [3]:
# Options
fig_path = "/priv/meggs3/u5708159/SAMI/figs/full_sample/"
savefigs = False
bin_type = "default"    # Options: "default" or "adaptive" for Voronoi binning
ncomponents = "recom"   # Options: "1" or "recom"
eline_SNR_min = 3       # Minimum S/N of emission lines to accept


In [4]:
# Load the sample
df = load_sami_galaxies(ncomponents=ncomponents,
                        bin_type=bin_type,
                        eline_SNR_min=eline_SNR_min, 
                        vgrad_cut=False,
                        correct_extinction=False,
                        sigma_gas_SNR_cut=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  result = getattr(ufunc, method)(*inputs, **kwargs)




In [5]:
# Load the LZIFU galaxies
df_lzifu = load_lzifu_galaxies(ncomponents=ncomponents,
                              bin_type=bin_type,
                              eline_SNR_min=eline_SNR_min, 
                              vgrad_cut=False,
                              correct_extinction=False,
                              sigma_gas_SNR_cut=True)        


Loading LZIFU DataFrame for all galaxies in the LZIFU subsample...


  result = getattr(ufunc, method)(*inputs, **kwargs)
  return -0.943 * ratio_y_vals**4 - 0.450 * ratio_y_vals**3 + 0.408 * ratio_y_vals**2 - 0.610 * ratio_y_vals - 0.025




In [6]:
# Plot settings
col_x = "sigma_gas - sigma_*"
col_y = "log HALPHA EW"
nbins = 100

# Spectral categories
cats = ["SF", "Composite", "Ambiguous", "Seyfert", "LINER", "Not classified"]


## Classification based on *total* fluxes: WHAV* diagrams
---

In [7]:
###############################################################################
# WHAV diagram (hist) - spectral classification (all together)
###############################################################################
col_z = "BPT (numeric) (total)"

# Create figure
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(5 * 3, 5))
fig.subplots_adjust(wspace=0)
bbox = axs[-1].get_position()
cax = fig.add_axes([bbox.x0 + bbox.width, bbox.y0, 0.03, bbox.height])

# recom components
for ii in range(3):
    plot2dhistcontours(df, col_x=f"{col_x} (component {ii})",
                          col_y=f"{col_y} (component {ii})",
                          col_z=f"{col_z} (component {ii})" if f"{col_z} (component {ii})" in df else col_z,
                          alpha=1.0, ax=axs[ii], cax=cax, nbins=nbins,
                          linewidths=0.5,
                          contours=True, hist=True, colors="white",
                          plot_colorbar=True if ii == 3 - 1 else False)
    # Decorations
    axs[ii].grid()
    axs[ii].set_ylabel("") if ii > 0 else None
    axs[ii].set_yticklabels([]) if ii > 0 else None
    axs[ii].set_title(component_labels[ii])
        

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
###############################################################################
# WHAV diagram (hist) - spectral classification (individually)
###############################################################################
for cat in cats:
    df_subset = df.loc[df["BPT (total)"] == cat]
    
    # Create figure
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(5 * 3, 5))
    fig.subplots_adjust(wspace=0)
    bbox = axs[-1].get_position()
    cax = fig.add_axes([bbox.x0 + bbox.width, bbox.y0, 0.03, bbox.height])

    # recom components
    for ii in range(3):
        plot2dhistcontours(df_subset, 
                           col_x=f"{col_x} (component {ii})",
                           col_y=f"{col_y} (component {ii})",
                           # col_z="log SFR surface density (component 0)", log_z=False,
                           # col_z="log N2 (total)", log_z=False,
                           # vmin=-0.4 - 0.6, vmax=-0.4 + 0.6, cmap="coolwarm",
                           col_z="count", log_z=True,
                           alpha=1.0, ax=axs[ii], cax=cax, nbins=nbins,
                           linewidths=0.5,
                           contours=True, hist=True, colors="white",
                           plot_colorbar=True if ii == 3 - 1 else False)
        # Decorations
        axs[ii].grid()
        axs[ii].set_ylabel("") if ii > 0 else None
        axs[ii].set_yticklabels([]) if ii > 0 else None
        axs[ii].set_title(component_labels[ii])
        fig.suptitle(cat)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  linewidths=linewidths)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Classification based on *individual* fluxes: WHAV* diagrams
---

In [None]:
###############################################################################
# WHAV diagram (hist) - LZIFU - spectral classification
###############################################################################
col_z = "BPT (numeric)"

# Create figure
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(5 * 3, 5))
fig.subplots_adjust(wspace=0)
bbox = axs[-1].get_position()
cax = fig.add_axes([bbox.x0 + bbox.width, bbox.y0, 0.03, bbox.height])

# recom components
for ii in range(3):
    plot2dhistcontours(df_lzifu, col_x=f"{col_x} (component {ii})",
                          col_y=f"{col_y} (component {ii})",
                          col_z=f"{col_z} (component {ii})" if f"{col_z} (component {ii})" in df_lzifu else col_z,
                          alpha=1.0, ax=axs[ii], cax=cax, nbins=nbins,
                          linewidths=0.5,
                          contours=True, hist=True, colors="white",
                          plot_colorbar=True if ii == 3 - 1 else False)
    
    # Decorations
    axs[ii].grid()
    axs[ii].set_ylabel("") if ii > 0 else None
    axs[ii].set_yticklabels([]) if ii > 0 else None
    axs[ii].set_title(component_labels[ii])
        

In [None]:
###############################################################################
# WHAV diagram (hist) - LZIFU - spectral classification (individually)
###############################################################################

for cat in cats:
    
    # Create figure
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(5 * 3, 5))
    fig.subplots_adjust(wspace=0)
    bbox = axs[-1].get_position()
    cax = fig.add_axes([bbox.x0 + bbox.width, bbox.y0, 0.03, bbox.height])

    # recom components
    for ii in range(3):
        df_subset = df_lzifu.loc[df_lzifu[f"BPT (component {ii})"] == cat]
        if df_subset.shape[0] > 0:
            plot2dhistcontours(df_subset, 
                               col_x=f"{col_x} (component {ii})",
                               col_y=f"{col_y} (component {ii})",
                               col_z="count", log_z=True,
                               alpha=1.0, ax=axs[ii], cax=cax, nbins=nbins,
                               linewidths=0.5,
                               contours=True, hist=True, colors="white",
                               plot_colorbar=True if ii == 3 - 1 else False)
            # Decorations
            axs[ii].grid()
            axs[ii].set_ylabel("") if ii > 0 else None
            axs[ii].set_yticklabels([]) if ii > 0 else None
            axs[ii].set_title(component_labels[ii])
            fig.suptitle(cat)


## Making a monster DataFrame containing the *individual* fluxes from our LZIFU fits plus the single-component spaxels from SAMI 
---

In [None]:
#//////////////////////////////////////////////////////////////////////////////
# 1-component spaxels from SAMI
df_1comp = df[df["Number of components"] == 1]

# Need to rename the BPT classifcation from "total" to "component 0" to be consistent with the labelling in the LZIFU DataFrame
df_1comp = df_1comp.rename(columns={
    "BPT (total)": "BPT (component 0)",
    "BPT (numeric) (total)": "BPT (numeric) (component 0)",
    "log N2 (total)": "log N2 (component 0)"
})

# Drop bad rows
df_1comp.loc[:, "Good?"] = ~df_1comp[f"{col_x} (component 0)"].isna() & ~df_1comp[f"{col_y} (component 0)"].isna()
cond = df_1comp["Good?"] == 1.0
df_1comp = df_1comp[cond]


In [None]:
#//////////////////////////////////////////////////////////////////////////////
# Make a copy of the LZIFU data frame where the fluxes/classifications for 
# all components are stored in "component 0"
df_lzifu_comp0 = None
for ii in [0, 1, 2]:
    df_this_component = df_lzifu[~df_lzifu[f"sigma_gas - sigma_* (component {ii})"].isna()]
    # print(df_this_component[print_cols])
    
    # Drop all columns that are NOT this component
    other_components = [cc for cc in [0, 1, 2] if cc != ii]
    cols_to_drop = [c for c in df_this_component.columns if f"(total)" in c]
    for cc in other_components:
        cols_to_drop += [c for c in df_this_component.columns if f"(component {cc})" in c]
    df_this_component = df_this_component.drop(columns=cols_to_drop)
    print("---------------------------------------------------")
    print(df_this_component[f"sigma_gas - sigma_* (component {ii})"])

    # Rename columns to have suffix "component 0"
    if ii != 0:
        cols_to_rename = [c for c in df_this_component.columns if c.endswith(f"(component {ii})")]
        new_col_names = [c.split(f"(component {ii})")[0] + "(component 0)" for c in cols_to_rename]
        rename_dict = dict(zip(cols_to_rename, new_col_names))
        df_this_component = df_this_component.rename(columns=rename_dict)
    print("---------------------------------------------------")
    print(df_this_component[f"sigma_gas - sigma_* (component 0)"])

    # Merge 
    if df_lzifu_comp0 is None:
        df_lzifu_comp0 = df_this_component.copy()
    else:
        df_lzifu_comp0 = df_lzifu_comp0.append(df_this_component)
    print("---------------------------------------------------")
    print(df_lzifu_comp0[f"sigma_gas - sigma_* (component 0)"])

# Drop bad rows
df_lzifu_comp0.loc[:, "Good?"] = ~df_lzifu_comp0[f"{col_x} (component 0)"].isna() & ~df_lzifu_comp0[f"{col_y} (component 0)"].isna()
cond = df_lzifu_comp0["Good?"] == 1.0
df_lzifu_comp0 = df_lzifu_comp0[cond]

In [None]:
#//////////////////////////////////////////////////////////////////////////////
# Merge 
# Need to avoid double-counting spaxels... 
# Find common galaxies and remove these from the SAMI 1-comp data frame 
lzifu_galaxies = df_lzifu_comp0.catid.unique()
sami_galaxies = df_1comp.catid.unique()
common_galaxies = [g for g in lzifu_galaxies if g in sami_galaxies]
lzifu_only_galaxies = [g for g in lzifu_galaxies if g not in sami_galaxies]

# Remove the common galaxies from the SAMI data frame
rows_to_drop = df_1comp.catid.isin(common_galaxies)
df_1comp_dropped = df_1comp.drop(rows_to_drop[rows_to_drop].index)

# Check
# assert len([g for g in gals_1comp_only if g in lzifu_galaxies]) == 0,\
#     "ERROR: there are galaxies in the 1-component SAMI data set that are also in the LZIFU data set!"
assert np.all(df_lzifu_comp0["Good?"].values),\
    "ERROR: there are 'bad' rows in df_lzifu_comp0!"
assert np.all(df_1comp_dropped["Good?"].values),\
    "ERROR: there are 'bad' rows in df_lzifu_comp0!"

# Now, merge 
df_1comp_dropped["Source"] = "SAMI data set (1-comp only)"
df_lzifu_comp0["Source"] = "LZIFU fits (multi-comp)"
df_merged = df_lzifu_comp0.append(df_1comp_dropped)


In [None]:
# Check! 
cond = df_merged["catid"] == 491956
cond &= df_merged["x (projected, arcsec)"] == 12
cond &= df_merged["y (projected, arcsec)"] == 12
df_merged[cond]["sigma_gas (component 0)"]

In [None]:
cond = df_lzifu["catid"] == 491956
cond &= df_lzifu["x (projected, arcsec)"] == 12
cond &= df_lzifu["y (projected, arcsec)"] == 12
df_lzifu[cond][["sigma_gas (component 0)","sigma_gas (component 1)","sigma_gas (component 2)"]]

In [None]:
df_1comp_dropped["log N2 (component 0)"]

In [None]:
###############################################################################
# WHAV diagram (hist) - spectral classification (individually), all shown on 
# the SAME diagram
###############################################################################
nbins = 100
fig, axs = plt.subplots(nrows=3, ncols=len(cats), figsize=(3 * len(cats), 3 * 3))
plt.tight_layout()
fig.subplots_adjust(wspace=0.3)

for cc, cat in enumerate(cats):
    for rr, (df_this, name) in enumerate(zip([df_1comp_dropped, df_lzifu_comp0, df_merged,],
                                 ["SAMI 1-component spaxels only", "LZIFU indivudual components", "SAMI 1-comp. spaxels + LZIFU ind. components"])):
        # Create figure
        ax = axs[rr][cc]
        bbox = ax.get_position()
        cax = fig.add_axes([bbox.x0 + bbox.width, bbox.y0, bbox.width / 10, bbox.height])
       
        # Plot: 
        df_subset = df_this.loc[df_this[f"BPT (component 0)"] == cat]
        N = df_subset.shape[0]
        if N > 0:
            plot2dhistcontours(df_subset, 
                               col_x=f"{col_x} (component 0)",
                               col_y=f"{col_y} (component 0)",
                               # col_z="r/R_e", log_z=False,
                               col_z="log N2 (component 0)", log_z=False,
                               vmin=-0.4 - 0.6, vmax=-0.4 + 0.6, cmap="coolwarm",
                               alpha=1.0, ax=ax, cax=cax, nbins=nbins,
                               linewidths=0.5, 
                               contours=True, hist=True, colors="white",
                               plot_colorbar=True)
            
        # Decorations
        ax.grid()
        ax.text(s=f"{cat} (N = {N:d})", x=0.1, y=0.9, verticalalignment="top", transform=ax.transAxes)
        axs[rr][0].set_title(name)
        
# plt.tight_layout()
        

### Picking out the weirdos
--- 
* What are the galaxies with the highest EWs in the LINER subset? **There are a couple that are bona-fide LLAGN with only minimal evidence for outflows (9388000001 and 9008500002), one with bona-fide merger-induced shocks (517278) and one that appears to be SF-bulge mixing but with LINER ratios for some reason.**
* What are the Seyferts? 

In [None]:
# Moderate-high EWs but with delta_sigma < 0
df_liners = df_lzifu_comp0[(df_lzifu_comp0["HALPHA EW (component 0)"] > 3) & (df_lzifu_comp0["BPT (component 0)"] == "LINER") & (df_lzifu_comp0["sigma_gas - sigma_* (component 0)"] < -100)]
df_liners["catid"].unique()

In [None]:
# Moderate-high EWs but with delta_sigma > 0
df_liners = df_1comp_dropped[(df_1comp_dropped["HALPHA EW (component 0)"] > 14) & (df_1comp_dropped["BPT (component 0)"] == "LINER") & (df_1comp_dropped["sigma_gas - sigma_* (component 0)"] > 0)]
df_liners["catid"].unique()

In [None]:
# seyferts: pick those with >3 Seyfert-like components
df_seyferts = df_lzifu_comp0[df_lzifu_comp0["BPT (component 0)"] == "Seyfert"]
seyfert_gals = df_seyferts.catid.unique()
good_gals = []
for gal in seyfert_gals:
    df_gal = df_seyferts[df_seyferts["catid"] == gal]
    if df_gal[df_gal["BPT (component 0)"] == "Seyfert"].shape[0] > 10:
        good_gals.append(gal)
good_gals

## Try creating classification rules
---

In [None]:
delta_sigma_bound = 0
halpha_ew_upper_bound = 14
halpha_ew_lower_bound = 3

# Only include classified spaxels for now
df_classified = df_merged[df_merged["BPT (component 0)"] != "Not classified"]

# Which DataFrame to use for these calculations
df_to_use = df_lzifu_comp0

In [None]:
print("-------------------------------------------------------")
print("STAR-FORMING BOUNDARY")
print(f"sigma_gas - sigma_* < {delta_sigma_bound:.0f} km s^-1")
print(f"EW(Ha) > {halpha_ew_upper_bound:.0f} Å")
print("-------------------------------------------------------")

# What % of SF-components are within these bounds?
for cat in cats:
    cond_cat = df_to_use["BPT (component 0)"] == cat
    cond_cat_in_bounds = cond_cat & (df_to_use["sigma_gas - sigma_* (component 0)"] <= delta_sigma_bound)
    cond_cat_in_bounds &= df_to_use["HALPHA EW (component 0)"] >= halpha_ew_upper_bound
    frac_in_bounds = df_to_use[cond_cat_in_bounds].shape[0] / df_to_use[cond_cat].shape[0] * 100
    print(f"{frac_in_bounds:.2f}% of all {cat} components are within these bounds")
print("-------------------------------------------------------")


In [None]:
print("-------------------------------------------------------")
print("COMPOSITE BOUNDARY")
print(f"sigma_gas - sigma_* < {delta_sigma_bound:.0f} km s^-1")
print(f"{halpha_ew_lower_bound:.0f} Å < EW(Ha) < {halpha_ew_upper_bound:.0f} Å")
print("-------------------------------------------------------")

# What % of components in different categories are within these bounds?
for cat in cats:
    cond_cat = df_to_use["BPT (component 0)"] == cat
    cond_cat_in_bounds = cond_cat & (df_to_use["sigma_gas - sigma_* (component 0)"] <= delta_sigma_bound)
    cond_cat_in_bounds &= df_to_use["HALPHA EW (component 0)"] < halpha_ew_upper_bound
    cond_cat_in_bounds &= df_to_use["HALPHA EW (component 0)"] >= halpha_ew_lower_bound
    frac_in_bounds = df_to_use[cond_cat_in_bounds].shape[0] / df_to_use[cond_cat].shape[0] * 100
    print(f"{frac_in_bounds:.2f}% of all {cat} components are within these bounds")
print("-------------------------------------------------------")


In [None]:
print("-------------------------------------------------------")
print("LINER BOUNDARY")
print(f"sigma_gas - sigma_* < {delta_sigma_bound:.0f} km s^-1")
print(f"EW(Ha) < {halpha_ew_lower_bound:.0f} Å")
print("-------------------------------------------------------")

# What % of components in different categories are within these bounds?
for cat in cats:
    cond_cat = df_to_use["BPT (component 0)"] == cat
    cond_cat_in_bounds = cond_cat & (df_to_use["sigma_gas - sigma_* (component 0)"] <= delta_sigma_bound)
    cond_cat_in_bounds &= df_to_use["HALPHA EW (component 0)"] < halpha_ew_lower_bound
    frac_in_bounds = df_to_use[cond_cat_in_bounds].shape[0] / df_to_use[cond_cat].shape[0] * 100
    print(f"{frac_in_bounds:.2f}% of all {cat} components are within these bounds")
print("-------------------------------------------------------")


In [None]:
print("-------------------------------------------------------")
print("SEYFERT/SHOCK BOUNDARY")
print(f"sigma_gas - sigma_* > {delta_sigma_bound:.0f} km s^-1")
print(f"EW(Ha) > {halpha_ew_lower_bound:.0f} Å")
print("-------------------------------------------------------")

# What % of components in different categories are within these bounds?
for cat in cats:
    cond_cat = df_to_use["BPT (component 0)"] == cat
    cond_cat_in_bounds = cond_cat & (df_to_use["sigma_gas - sigma_* (component 0)"] > delta_sigma_bound)
    cond_cat_in_bounds &= df_to_use["HALPHA EW (component 0)"] > halpha_ew_lower_bound
    frac_in_bounds = df_to_use[cond_cat_in_bounds].shape[0] / df_to_use[cond_cat].shape[0] * 100
    print(f"{frac_in_bounds:.2f}% of all {cat} components are within these bounds")
print("-------------------------------------------------------")


In [None]:
print("-------------------------------------------------------")
print("??? BOUNDARY")
print(f"sigma_gas - sigma_* > {delta_sigma_bound:.0f} km s^-1")
print(f"EW(Ha) < {halpha_ew_lower_bound:.0f} Å")
print("-------------------------------------------------------")

# What % of components in different categories are within these bounds?
for cat in cats:
    cond_cat = df_to_use["BPT (component 0)"] == cat
    cond_cat_in_bounds = cond_cat & (df_to_use["sigma_gas - sigma_* (component 0)"] > delta_sigma_bound)
    cond_cat_in_bounds &= df_to_use["HALPHA EW (component 0)"] < halpha_ew_lower_bound
    frac_in_bounds = df_to_use[cond_cat_in_bounds].shape[0] / df_to_use[cond_cat].shape[0] * 100
    print(f"{frac_in_bounds:.2f}% of all {cat} components are within these bounds")
print("-------------------------------------------------------")
