In [1]:
%matplotlib widget

In [2]:
# Imports
import sys
import os 
import numpy as np
import pandas as pd
from astropy.visualization import hist
from tqdm import tqdm
from scipy import constants
from scipy.stats import ks_2samp, anderson_ksamp, spearmanr

from spaxelsleuth.loaddata.lzifu import load_lzifu_galaxies
from spaxelsleuth.loaddata.sami import load_sami_galaxies
from spaxelsleuth.plotting.plottools import plot_empty_BPT_diagram
from spaxelsleuth.plotting.plottools import vmin_fn, vmax_fn, label_fn, cmap_fn, fname_fn
from spaxelsleuth.plotting.plottools import bpt_colours, bpt_labels, whav_colors, whav_labels
from spaxelsleuth.plotting.plottools import morph_labels, morph_ticks
from spaxelsleuth.plotting.plottools import ncomponents_labels, ncomponents_colours
from spaxelsleuth.plotting.plottools import component_labels, component_colours
from spaxelsleuth.plotting.plotgalaxies import plot2dhistcontours, plot2dscatter, plot2dcontours
from spaxelsleuth.plotting.plot2dmap import plot2dmap
from spaxelsleuth.plotting.sdssimg import plot_sdss_image

import matplotlib
from matplotlib import rc, rcParams
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

from IPython.core.debugger import Tracer

rc("text", usetex=False)
rc("font",**{"family": "serif", "size": 11})
rcParams["savefig.bbox"] = "tight"
rcParams["savefig.format"] = "pdf"
plt.ion()
plt.close("all")


In [3]:
# Options
fig_path = "/priv/meggs3/u5708159/SAMI/figs/paper/"
savefigs = True
bin_type = "default"    # Options: "default" or "adaptive" for Voronoi binning
ncomponents = "recom"   # Options: "1" or "recom"
eline_SNR_min = 5       # Minimum S/N of emission lines to accept
plt.close("all")


In [4]:
# Load the sample
df = load_sami_galaxies(ncomponents=ncomponents,
                        bin_type=bin_type,
                        eline_SNR_min=eline_SNR_min, 
                        vgrad_cut=False,
                        line_amplitude_SNR_cut=True,
                        correct_extinction=False,
                        sigma_gas_SNR_cut=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,c



# Using the KS and AD 2-sample tests to investigate the drivers of the number of components measured in each spaxel
---

See [here](https://stats.stackexchange.com/questions/465196/kolmogorov-smirnov-test-statistic-interpretation-with-large-samples) for a good discussion.

Interpreting the statistic values: in both the KS and AD tests, larger values imply greater deviation between the underlying distributions of the two samples.

### Select subsample

In [70]:
# Full sample
df_1comp = df[df["Number of components"] == 1]
df_2comp = df[df["Number of components"] == 2]
df_3comp = df[df["Number of components"] == 3]

fname = "hist_statistics_whole_sample"
fig_title = "All spaxels"

In [61]:
# AGN-only
df_AGN = df.copy()
df_AGN = df_AGN[(df_AGN["BPT (total)"] == "Seyfert") | ((df_AGN["BPT (total)"] == "LINER") & (df_AGN["HALPHA EW (total)"] > 3))]

# Split by number of components
df_1comp = df_AGN[df_AGN["Number of components"] == 1]
df_2comp = df_AGN[df_AGN["Number of components"] == 2]
df_3comp = df_AGN[df_AGN["Number of components"] == 3]

fname = "hist_statistics_AGN-like_only"
fig_title = "AGN-like spaxels only"

In [6]:
# SF-only
df_SF = df.copy()
df_SF = df_SF[df_SF["BPT (total)"] == "SF"]

# Split by number of components
df_1comp = df_SF[df_SF["Number of components"] == 1]
df_2comp = df_SF[df_SF["Number of components"] == 2]
df_3comp = df_SF[df_SF["Number of components"] == 3]

fname = "hist_statistics_SF_only"
fig_title = "SF spaxels only"

In [22]:
# SF-only, with beam smearing cut
df_SF_beam_smearing_cut = df_SF.copy()
for ii in range(3):
    cond_beam_smearing = df_SF_beam_smearing_cut[f"Beam smearing flag (component {ii})"] == True

    # NaN out offending cells
    df_SF_beam_smearing_cut.loc[cond_beam_smearing, 
           [f"v_gas (component {ii})",
            f"sigma_gas (component {ii})", 
            f"v_gas error (component {ii})",
            f"sigma_gas error (component {ii})",]] = np.nan

# Split by number of components
df_1comp = df_SF_beam_smearing_cut[df_SF_beam_smearing_cut["Number of components"] == 1]
df_2comp = df_SF_beam_smearing_cut[df_SF_beam_smearing_cut["Number of components"] == 2]
df_3comp = df_SF_beam_smearing_cut[df_SF_beam_smearing_cut["Number of components"] == 3]

fname = "hist_statistics_SF_only_BScut"
fig_title = "SF spaxels only, beam smearing cut"

In [24]:
# SF-only, with beam smearing cut, with spaxels at small radii removed
df_SF_large_radii = df_SF_beam_smearing_cut.copy()
df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["r/R_e"] > 1.0]

# Split by number of components
df_1comp = df_SF_large_radii[df_SF_large_radii["Number of components"] == 1]
df_2comp = df_SF_large_radii[df_SF_large_radii["Number of components"] == 2]
df_3comp = df_SF_large_radii[df_SF_large_radii["Number of components"] == 3]

fname = "hist_statistics_SF_only_BScut_large_radii"
fig_title = r"SF spaxels only, beam smearing cut, $r/R_e > 1$"

In [26]:
# SF-only, with beam smearing cut, with spaxels at small radii removed, at low inclination
df_SF_large_radii = df_SF_beam_smearing_cut.copy()
df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["r/R_e"] > 1.0]
df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["Inclination i (degrees)"] < 30]
# df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["z_spec"] < 0.04]

# Split by number of components
df_1comp = df_SF_large_radii[df_SF_large_radii["Number of components"] == 1]
df_2comp = df_SF_large_radii[df_SF_large_radii["Number of components"] == 2]
df_3comp = df_SF_large_radii[df_SF_large_radii["Number of components"] == 3]

fname = "hist_statistics_SF_only_BScut_large_radii_low_inclination"
fig_title = r"SF spaxels only, beam smearing cut, $r/R_e > 1$, $i < 30^\circ$"

In [22]:
# SF only, low inclination only, cut
df_cut = df.copy()
for ii in [1, 2]:
    df_cut[f"Low flux component (component {ii})"] = False
    cond_low_flux = df_cut[f"HALPHA A (component {ii})"] < 0.05 * df_cut["HALPHA A (component 0)"]
    df_cut.loc[cond_low_flux, f"Low flux component (component {ii})"] = True

    # NaN out entire rows
    df_cut.loc[df_cut[f"Low flux component (component {ii})"]] = np.nan

df_SF_cut = df_cut.copy()
df_SF_cut = df_SF_cut[df_SF_cut["BPT (total)"] == "SF"]
df_SF_low_inc = df_SF_cut[df_SF_cut["Inclination i (degrees)"] < 30]

# Split by number of components
df_1comp = df_SF_low_inc[df_SF_low_inc["Number of components"] == 1]
df_2comp = df_SF_low_inc[df_SF_low_inc["Number of components"] == 2]
df_3comp = df_SF_low_inc[df_SF_low_inc["Number of components"] == 3]

fname = "hist_statistics_SF_only_low_inclination"
fig_title = r"SF spaxels only, $i < 30^\circ$"

## Run KS, AD tests

In [23]:
# Print statistics
n_tot = df_1comp.shape[0] + df_2comp.shape[0] + df_3comp.shape[0]
print(f"Number of spaxels with emission lines: {n_tot}")
for ii, df_comp in enumerate([df_1comp, df_2comp, df_3comp]):
    print(f"Number of {ii + 1}-component spaxels: {df_comp.shape[0]} ({df_comp.shape[0] / n_tot * 100}%)")


Number of spaxels with emission lines: 13836
Number of 1-component spaxels: 12645 (91.39202081526453%)
Number of 2-component spaxels: 1023 (7.393755420641804%)
Number of 3-component spaxels: 168 (1.2142237640936688%)


In [12]:
print_test_scores = False
individual_figs = False

In [16]:
df["delta v_gas (2/1)"]

0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
           ..
2804639   NaN
2805539   NaN
2806730   NaN
2810176   NaN
2815127   NaN
Name: delta v_gas (2/1), Length: 2826785, dtype: float64

In [24]:
alpha = 0.01  # p-value below which we reject the null hypothesis
cols = ["log HALPHA EW (total)", "log HALPHA EW (component 0)", "log SFR (component 0)", "log SFR surface density (component 0)", 
        "sigma_gas (component 0)", "v_gas (component 0)", "sigma_gas - sigma_* (component 0)", "delta v_gas (1/0)", 
        "sigma_gas (component 1)", "v_gas (component 1)", "sigma_gas - sigma_* (component 1)", "delta v_gas (2/1)", 
        "r/R_e", "HALPHA extinction correction",  "v_*", "sigma_*",
        "D4000", "log M_*", "R_e (kpc)", "log(M/R_e)", 
        "Inclination i (degrees)", "Bin size (square kpc)", "z_spec", "v_grad (component 0)",
        "log O3 (total)", "log N2 (total)", "log S2 (total)", "log O1 (total)"]

if not individual_figs:
    fig, axs = plt.subplots(nrows=len(cols) // 4, ncols=4, figsize=(20, 20))
    fig.subplots_adjust(wspace=0.35, hspace=0.4)
for cc, col_x in enumerate(cols):
    if not individual_figs:
        ax = axs.flat[cc]
    else:
        fig, ax = plt.subplots(1, 1, figsize=(4, 4))
    
    # Extract values
    d1 = np.array([d for d in df_1comp[col_x].values if ~np.isnan(d)])
    d2 = np.array([d for d in df_2comp[col_x].values if ~np.isnan(d)])
    d3 = np.array([d for d in df_3comp[col_x].values if ~np.isnan(d)])
    
    # Run KS test 
    if print_test_scores:
        r_KS = ks_2samp(d1, d2)
        if r_KS.pvalue < alpha:
            # print(f"KS test: {col_x}: the two distributions are different at a {alpha * 100:.3f}% level (p-value = {r_KS.pvalue * 100:.10f}%)")
            ax.text(s=r"KS: $%.2f$ ($p = %.3f$)" % (r_KS.statistic, r_KS.pvalue), x=0.05, y=0.95, verticalalignment="top", fontsize="small", transform=ax.transAxes, color="red")
        else:
            # print(f"KS test: {col_x}: the two distributions are the same at a {alpha * 100:.3f}% level (p-value = {r_KS.pvalue * 100:.10f}%)")
            ax.text(s=r"KS: $%.2f$ ($p = %.3f$)" % (r_KS.statistic, r_KS.pvalue), x=0.05, y=0.95, verticalalignment="top", fontsize="small", transform=ax.transAxes)
        
        # Run the 2-sample KS test 
        r_AD = anderson_ksamp([d1, d2])
        if r_AD.significance_level < alpha:
            # print(f"AD test: {col_x}: the two distributions are different at a {alpha * 100:.3f}% level (p-value = {r_AD.significance_level * 100:.5f}%)")
            ax.text(s=r"AD: $%.2f$ ($p \leq %.3f$)" % (r_AD.statistic, r_AD.significance_level), x=0.05, y=0.88, verticalalignment="top", fontsize="small", transform=ax.transAxes, color="red")
        else:
            # print(f"AD test: {col_x}: the two distributions are the same at a {alpha * 100:.3f}% level (p-value = {r_AD.significance_level * 100:.5f}%)")
            ax.text(s=r"AD: $%.2f$ ($p = %.3f$)" % (r_AD.statistic, r_AD.significance_level), x=0.05, y=0.88, verticalalignment="top", fontsize="small", transform=ax.transAxes)
        
    # Plot the distributions of the quantity in 1, 2 and 3-component spaxels
    for ii, d in enumerate([d1, d2, d3]):
        if len(d) > 0:
            hist(d, density=True, histtype="step",
                 ax=ax, 
                 range=(vmin_fn(col_x), vmax_fn(col_x)),
                 bins="scott",
                 label=f"{ncomponents_labels[ii + 1]} comp.",
                 color=ncomponents_colours[ii + 1])
    
    # Decorations
    ax.set_xlabel(label_fn(col_x) + " (component 1)" if "(component 0)" in col_x else label_fn(col_x))
    ax.autoscale(enable=True, axis="x", tight=True)
    ax.set_ylabel(r"$N$ (normalised)")
    if individual_figs:
        ax.legend(loc="upper right", fontsize="x-small")
        if savefigs:
            fname_ind = os.path.join(fig_path, "individual_hist_plots", fname) + "_" + fname_fn(col_x) + ".pdf"
            print(f"Saving file to {fname_ind}")
            fig.savefig(fname_ind, bbox_inches="tight", format="pdf")

if not individual_figs:       
    axs[0][0].legend(loc="lower center", bbox_to_anchor=[0.5, 1.1], fontsize="small")
    fig.suptitle(fig_title, y=0.9)

    # Save
    if savefigs:
        print(f"Saving file to {os.path.join(fig_path, fname)}.pdf")
        fig.savefig(os.path.join(fig_path, fname) + ".pdf", bbox_inches="tight", format="pdf")

    

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saving file to /priv/meggs3/u5708159/SAMI/figs/paper/hist_statistics_SF_only_low_inclination.pdf


# Using the KS and AD 2-sample tests to investigate the drivers of the number of *kinematically disturbed* components measured in each spaxel
---

In [168]:
plt.close("all")

### Select subsample

In [33]:
# Test: what is the difference between 2-component spaxels with NO KD components, and 2-component spaxels with at least 1 KD component?
df_1comp = df[(df["Number of kinematically disturbed components"] == 0) & (df["Number of components"] == 2)]
df_2comp = df[(df["Number of kinematically disturbed components"] >= 1) & (df["Number of components"] == 2)]
df_3comp = df[(df["Number of kinematically disturbed components"] >= 2) & (df["Number of components"] == 2)]

fname = os.path.join(fig_path, "hist_2comp_KDvsNoKD_statistics_whole_sample.pdf")
fig_title = "2-component spaxels only"

In [35]:
# Same as above, but SF spaxels only
df_SF = df.copy()
df_SF = df_SF[df_SF["BPT (total)"] == "SF"]

df_1comp = df_SF[(df_SF["Number of kinematically disturbed components"] == 0) & (df_SF["Number of components"] == 2)]
df_2comp = df_SF[(df_SF["Number of kinematically disturbed components"] >= 1) & (df_SF["Number of components"] == 2)]
df_3comp = df_SF[(df_SF["Number of kinematically disturbed components"] >= 2) & (df_SF["Number of components"] == 2)]

fname = os.path.join(fig_path, "hist_2comp_KDvsNoKD_statistics_SF_only.pdf")
fig_title = "SF, 2-component spaxels only"

In [37]:
# Full sample
df_1comp = df[(df["Number of kinematically disturbed components"] == 0) & (df["Number of components"] >= 1)]
df_2comp = df[(df["Number of kinematically disturbed components"] == 1) & (df["Number of components"] >= 1)]
df_3comp = df[(df["Number of kinematically disturbed components"] == 2) & (df["Number of components"] >= 1)]

fname = os.path.join(fig_path, "hist_KDcomponents_statistics_whole_sample.pdf")
fig_title = "All spaxels"

In [50]:
# SF-only
df_SF = df.copy()
df_SF = df_SF[df_SF["BPT (total)"] == "SF"]

# Split by number of components
df_1comp = df_SF[(df_SF["Number of kinematically disturbed components"] == 0) & (df_SF["Number of components"] >= 1)]
df_2comp = df_SF[(df_SF["Number of kinematically disturbed components"] == 1) & (df_SF["Number of components"] >= 1)]
df_3comp = df_SF[(df_SF["Number of kinematically disturbed components"] == 2) & (df_SF["Number of components"] >= 1)]

fname = os.path.join(fig_path, "hist_KDcomponents_statistics_SF_only.pdf")
fig_title = "SF spaxels only"

In [53]:
# SF-only, with beam smearing cut
df_SF_beam_smearing_cut = df_SF.copy()
for ii in range(3):
    cond_beam_smearing = df_SF_beam_smearing_cut[f"Beam smearing flag (component {ii})"] == True

    # NaN out offending cells
    df_SF_beam_smearing_cut.loc[cond_beam_smearing, 
           [f"v_gas (component {ii})",
            f"sigma_gas (component {ii})", 
            f"v_gas error (component {ii})",
            f"sigma_gas error (component {ii})",]] = np.nan

# Split by number of components
df_1comp = df_SF_beam_smearing_cut[(df_SF_beam_smearing_cut["Number of kinematically disturbed components"] == 0) & (df_SF_beam_smearing_cut["Number of components"] >= 1)]
df_2comp = df_SF_beam_smearing_cut[(df_SF_beam_smearing_cut["Number of kinematically disturbed components"] == 1) & (df_SF_beam_smearing_cut["Number of components"] >= 1)]
df_3comp = df_SF_beam_smearing_cut[(df_SF_beam_smearing_cut["Number of kinematically disturbed components"] == 2) & (df_SF_beam_smearing_cut["Number of components"] >= 1)]

fname = os.path.join(fig_path, "hist_KDcomponents_statistics_SF_only_BScut.pdf")
fig_title = "SF spaxels only, beam smearing cut"

In [55]:
# SF-only, with beam smearing cut, with spaxels at small radii removed
df_SF_large_radii = df_SF_beam_smearing_cut.copy()
df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["r/R_e"] > 1.0]

# Split by number of components
df_1comp = df_SF_large_radii[(df_SF_large_radii["Number of kinematically disturbed components"] == 0) & (df_SF_large_radii["Number of components"] >= 1)]
df_2comp = df_SF_large_radii[(df_SF_large_radii["Number of kinematically disturbed components"] == 1) & (df_SF_large_radii["Number of components"] >= 1)]
df_3comp = df_SF_large_radii[(df_SF_large_radii["Number of kinematically disturbed components"] == 2) & (df_SF_large_radii["Number of components"] >= 1)]

fname = os.path.join(fig_path, "hist_KDcomponents_statistics_SF_only_BScut_large_radii.pdf")
fig_title = r"SF spaxels only, beam smearing cut, $r/R_e > 1$"

In [57]:
# SF-only, with beam smearing cut, with spaxels at small radii removed, at low inclination
df_SF_large_radii = df_SF_beam_smearing_cut.copy()
df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["r/R_e"] > 1.0]
df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["Inclination i (degrees)"] < 30]
# df_SF_large_radii = df_SF_large_radii[df_SF_large_radii["z_spec"] < 0.04]

# Split by number of components
df_1comp = df_SF_large_radii[(df_SF_large_radii["Number of kinematically disturbed components"] == 0) & (df_SF_large_radii["Number of components"] >= 1)]
df_2comp = df_SF_large_radii[(df_SF_large_radii["Number of kinematically disturbed components"] == 1) & (df_SF_large_radii["Number of components"] >= 1)]
df_3comp = df_SF_large_radii[(df_SF_large_radii["Number of kinematically disturbed components"] == 2) & (df_SF_large_radii["Number of components"] >= 1)]

fname = os.path.join(fig_path, "hist_KDcomponents_statistics_SF_only_BScut_large_radii_low_inclination.pdf")
fig_title = r"SF spaxels only, beam smearing cut, $r/R_e > 1$, $i < 30^\circ$"

## Run KS, AD tests

In [31]:
# Print statistics
for ii, df_comp in enumerate([df_1comp, df_2comp, df_3comp]):
    print(f"Number of {ii + 1}-component spaxels: {df_comp.shape[0]}")


Number of 1-component spaxels: 643111
Number of 2-component spaxels: 15217
Number of 3-component spaxels: 348


In [58]:
alpha = 0.01  # p-value below which we reject the null hypothesis
cols = ["log HALPHA EW (total)", "log HALPHA EW (component 0)", "log SFR (component 0)", "log SFR surface density (component 0)", 
        "sigma_gas (component 0)", "v_gas (component 0)", "sigma_gas - sigma_* (component 0)", "v_gas - v_* (component 0)", 
        "r/R_e", "HALPHA extinction correction",  "v_*", "sigma_*",
        "D4000", "log M_*", "R_e (kpc)", "log(M/R_e)", 
        "Inclination i (degrees)", "Bin size (square kpc)", "z_spec", "v_grad (component 0)",
        "log O3 (total)", "log N2 (total)", "log S2 (total)", "log O1 (total)"]

fig, axs = plt.subplots(nrows=len(cols) // 4, ncols=4, figsize=(16, 20))
fig.subplots_adjust(wspace=0.35, hspace=0.4)
for col_x, ax in zip(cols, axs.flat):
    # Extract values
    d1 = np.array([d for d in df_1comp[col_x].values if ~np.isnan(d)])
    d2 = np.array([d for d in df_2comp[col_x].values if ~np.isnan(d)])
    d3 = np.array([d for d in df_3comp[col_x].values if ~np.isnan(d)])
   
    # Run KS test 
    if print_test_scores:
        r_KS = ks_2samp(d1, d2)
        if r_KS.pvalue < alpha:
            # print(f"KS test: {col_x}: the two distributions are different at a {alpha * 100:.3f}% level (p-value = {r_KS.pvalue * 100:.10f}%)")
            ax.text(s=r"KS: $%.2f$ ($p = %.3f$)" % (r_KS.statistic, r_KS.pvalue), x=0.05, y=0.95, verticalalignment="top", fontsize="small", transform=ax.transAxes, color="red")
        else:
            # print(f"KS test: {col_x}: the two distributions are the same at a {alpha * 100:.3f}% level (p-value = {r_KS.pvalue * 100:.10f}%)")
            ax.text(s=r"KS: $%.2f$ ($p = %.3f$)" % (r_KS.statistic, r_KS.pvalue), x=0.05, y=0.95, verticalalignment="top", fontsize="small", transform=ax.transAxes)

        # Run the 2-sample KS test 
        r_AD = anderson_ksamp([d1, d2])
        if r_AD.significance_level < alpha:
            # print(f"AD test: {col_x}: the two distributions are different at a {alpha * 100:.3f}% level (p-value = {r_AD.significance_level * 100:.5f}%)")
            ax.text(s=r"AD: $%.2f$ ($p \leq %.3f$)" % (r_AD.statistic, r_AD.significance_level), x=0.05, y=0.88, verticalalignment="top", fontsize="small", transform=ax.transAxes, color="red")
        else:
            # print(f"AD test: {col_x}: the two distributions are the same at a {alpha * 100:.3f}% level (p-value = {r_AD.significance_level * 100:.5f}%)")
            ax.text(s=r"AD: $%.2f$ ($p = %.3f$)" % (r_AD.statistic, r_AD.significance_level), x=0.05, y=0.88, verticalalignment="top", fontsize="small", transform=ax.transAxes)

    # Plot the distributions of the quantity in 1, 2 and 3-component spaxels
    for ii, d in enumerate([d1, d2, d3]):
        if len(d) > 0:
            hist(d, density=True, histtype="step",
                 ax=ax, 
                 range=(vmin_fn(col_x), vmax_fn(col_x)),
                 bins="scott",
                 label=f"{ii} KD component{'s' if ii >= 1 else ''}" + r" ($N = %d$)" % len(d),
                 color=ncomponents_colours[ii + 1])
    
    # Decorations
    ax.set_xlabel(label_fn(col_x) + " (component 1)" if "(component 0)" in col_x else label_fn(col_x))
    ax.autoscale(enable=True, axis="x", tight=True)
    ax.set_ylabel(r"$N$ (normalised)")
axs[0][0].legend(loc="lower center", bbox_to_anchor=[0.5, 1.1], fontsize="small")
fig.suptitle(fig_title, y=0.9)

# Save
if savefigs:
    print(f"Saving file to {fname}")
    fig.savefig(fname, bbox_inches="tight", format="pdf")
    
    

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  Nbins = np.ceil((data.max() - data.min()) / dx)
  Nbins = np.ceil((data.max() - data.min()) / dx)
  Nbins = np.ceil((data.max() - data.min()) / dx)
  Nbins = np.ceil((data.max() - data.min()) / dx)
  Nbins = np.ceil((data.max() - data.min()) / dx)
  Nbins = np.ceil((data.max() - data.min()) / dx)


Saving file to /priv/meggs3/u5708159/SAMI/figs/paper/hist_KDcomponents_statistics_SF_only_BScut_large_radii_low_inclination.pdf


## Searching for correlations between emission line variables and local/global galaxy properties using Spearman's rank correlation coefficient
---

In [19]:
x = np.random.uniform(low=0, high=100, size=100)
y = 10 + np.random.normal(loc=0, scale=500, size=100)

fig, ax = plt.subplots()
ax.scatter(x, y)

spearmanr(x, y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

SpearmanrResult(correlation=0.09716171617161716, pvalue=0.3362107398754076)

In [18]:
fig, ax = plt.subplots(); ax.hist(df["v_out (component 1)"].values, range=(-200, 200), bins=20)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(array([   0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0., 2274., 2417., 2569., 3871., 4918., 5502., 5452., 5202.,
        4400., 3140.]),
 array([-200., -180., -160., -140., -120., -100.,  -80.,  -60.,  -40.,
         -20.,    0.,   20.,   40.,   60.,   80.,  100.,  120.,  140.,
         160.,  180.,  200.]),
 <BarContainer object of 20 artists>)

In [5]:
# Compute outflow velocity
for ii in [1, 2]:
    df[f"v_out (component {ii})"] = np.abs(np.abs(df[f"v_gas (component {ii})"] - df[f"v_gas (component {ii - 1})"]) - 2 * df[f"sigma_gas (component {ii})"])

In [9]:
col_z = "r/R_e"

In [46]:
col_x_list = ["sigma_gas (component 1)", "sigma_gas (component 2)"]
col_y_list = ["sigma_gas (component 0)"]

In [13]:
col_x_list = ["log SFR surface density (component 0)", "log SFR (component 0)",]
col_y_list = ["HALPHA EW (component 0)", "HALPHA EW (component 1)", "HALPHA EW ratio (1/0)", "HALPHA EW ratio (2/1)"]

In [108]:
# col_x_list = ["log SFR surface density (component 0)", "log SFR (component 0)"]
col_x_list = ["log M_*", "log(M/R_e)", "z_spec", "Bin size (square kpc)", "Inclination i (degrees)"]


In [6]:
col_x_list = ["log SFR surface density (component 0)", "log SFR (component 0)", "r/R_e", "HALPHA extinction correction", "D4000", "sigma_*", ]

In [7]:
# Measured properties 
col_y_list = ["sigma_gas (component 0)", "sigma_gas (component 1)", "sigma_gas (component 2)",
              "sigma_gas - sigma_* (component 0)", "sigma_gas - sigma_* (component 1)", "sigma_gas - sigma_* (component 2)",
              "v_out (component 1)", "v_out (component 2)",
              "delta v_gas (1/0)", "delta v_gas (2/1)",
              "delta sigma_gas (1/0)", "delta sigma_gas (2/1)",
              "HALPHA EW ratio (1/0)", "HALPHA EW ratio (2/1)",]

In [15]:
alpha = 0.05
fig, axs = plt.subplots(nrows=len(col_y_list), ncols=len(col_x_list), figsize=(5 * len(col_x_list), 5 * len(col_y_list)), squeeze=False)
fig.subplots_adjust(wspace=0.6, hspace=0.4)
for xx, col_x in enumerate(col_x_list):
    for yy, col_y in enumerate(col_y_list):
        x_all = df.loc[(df["BPT (total)"] == "SF"), col_x].values
        y_all = df.loc[(df["BPT (total)"] == "SF"), col_y].values
        good_idxs = [ii for ii in range(len(x_all)) if ~np.isnan(x_all[ii]) and ~np.isnan(y_all[ii])]
        x = x_all[good_idxs]
        y = y_all[good_idxs]

        # Plot 
        ax = axs[yy][xx]
        bbox = ax.get_position()
        cax = fig.add_axes([bbox.x0 + bbox.width, bbox.y0, 0.05 * bbox.width, bbox.height])
        m = ax.scatter(x, y, s=0.5, c=df.loc[(df["BPT (total)"] == "SF"), col_z].values[good_idxs], vmin=vmin_fn(col_z), vmax=vmax_fn(col_z)) 
        plt.colorbar(mappable=m, cax=cax)
        ax.set_xlabel(label_fn(col_x) + "(component " + col_x.split("(component ")[1] if "(component " in col_x else label_fn(col_x))
        ax.set_ylabel(label_fn(col_y) + "(component " + col_y.split("(component ")[1] if "(component " in col_y else label_fn(col_y))
        cax.set_ylabel(label_fn(col_z))
        ax.set_yscale("log")
    
        # Compute the Spearman's rank coefficient
        r_s = spearmanr(x, y)
        if r_s.pvalue < alpha:
            ax.text(s=r"SR: %.3f ($p = %.5f$)" % (r_s.correlation, r_s.pvalue), color="green", x=0.05, y=0.95, verticalalignment="top", transform=ax.transAxes)
        else:
            ax.text(s=r"SR: %.3f ($p = %.5f$)" % (r_s.correlation, r_s.pvalue), color="red", x=0.05, y=0.95, verticalalignment="top", transform=ax.transAxes)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
plt.close("all")

In [33]:
df_cut = df.copy()

for ii in [1, 2]:
    df_cut[f"Low flux component (component {ii})"] = False
    cond_low_flux = df_cut[f"HALPHA A (component {ii})"] < 0.05 * df_cut["HALPHA A (component 0)"]
    df_cut.loc[cond_low_flux, f"Low flux component (component {ii})"] = True

    # NaN out rows 
    cols = [f"HALPHA (component {ii})", f"HALPHA error (component {ii})"]
    cols += [f"HALPHA EW (component {ii})", f"HALPHA EW error (component {ii})"]
    cols += [f"v_gas (component {ii})",
             f"sigma_gas (component {ii})",
             f"v_gas error (component {ii})",
             f"sigma_gas error (component {ii})",]
    df_cut.loc[df_cut[f"Low flux component (component {ii})"], :] = np.nan


In [40]:
df["delta v_gas (1/0)"].unique()

array([         nan, -17.96395302,  43.17044449, ..., -52.6231575 ,
        76.2046814 ,  94.96213531])

In [35]:
# Split into 2 categories: spaxels where dv ~ 0 and dv > 0
df_SF = df_cut.copy()
df_SF = df_SF[df_SF["BPT (total)"] == "SF"]
df_SF = df_SF[df_SF["Number of components"] == 2]

df_SF_no_outflow = df_SF[np.abs(df_SF["v_gas (component 1)"] - df_SF["v_gas (component 0)"]) <= 50]
df_SF_outflow = df_SF[np.abs(df_SF["v_gas (component 1)"] - df_SF["v_gas (component 0)"]) > 50]


In [41]:
col_x_list = ["log SFR surface density (component 0)", "log SFR (component 0)",]
col_y_list = ["delta v_gas (1/0)", "HALPHA EW (component 0)", "HALPHA EW (component 1)", "HALPHA EW ratio (1/0)", "HALPHA EW ratio (2/1)"]

In [44]:
alpha = 0.05
fig, axs = plt.subplots(nrows=len(col_y_list), ncols=len(col_x_list), figsize=(5 * len(col_x_list), 5 * len(col_y_list)), squeeze=False)
fig.subplots_adjust(wspace=0.6, hspace=0.4)
for xx, col_x in enumerate(col_x_list):
    for yy, col_y in enumerate(col_y_list):
        for df_this, colour, label in zip([df_SF_no_outflow, df_SF_outflow], ["red", "blue"], ["No outflow", "Outflow"]):
            x_all = df_this.loc[col_x].values
            y_all = df_this.loc[col_y].values
            good_idxs = [ii for ii in range(len(x_all)) if ~np.isnan(x_all[ii]) and ~np.isnan(y_all[ii])]
            x = x_all[good_idxs]
            y = y_all[good_idxs]

            # Plot 
            ax = axs[yy][xx]
            ax.scatter(x, y, s=0.5, c=colour, vmin=vmin_fn(col_z), vmax=vmax_fn(col_z), label=label) 
            ax.set_xlabel(label_fn(col_x) + "(component " + col_x.split("(component ")[1] if "(component " in col_x else label_fn(col_x))
            ax.set_ylabel(label_fn(col_y) + "(component " + col_y.split("(component ")[1] if "(component " in col_y else label_fn(col_y))
            ax.legend()
#             ax.set_yscale("log")

#             # Compute the Spearman's rank coefficient
#             r_s = spearmanr(x, y)
#             if r_s.pvalue < alpha:
#                 ax.text(s=r"SR: %.3f ($p = %.5f$)" % (r_s.correlation, r_s.pvalue), color="green", x=0.05, y=0.95, verticalalignment="top", transform=ax.transAxes)
#             else:
#                 ax.text(s=r"SR: %.3f ($p = %.5f$)" % (r_s.correlation, r_s.pvalue), color="red", x=0.05, y=0.95, verticalalignment="top", transform=ax.transAxes)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …