# Quick plotting for SFR79 from Observations and Simulations

#### <span style='color:blue'> Adapted to also include xCG individual data points in addition to binned-SDSS based grid of values! </span>

### Imports

In [None]:
import numpy as np
import pandas as pd
from scipy import stats

import yaml
from pathlib import Path
from datetime import datetime
import warnings

import matplotlib as mpl
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm, trange

import SiGMo as sgm

### Compute SFR79 from SFR

Generalised helper function, can be used for SDSS grid-style data and individual data points (in a 1-d grid). Computes the log SFR79 from the "actual" simulation SFR averages:

$$\mathrm{SFR79} = \frac{\mathrm{SFR}_\mathrm{5 Myr}}{\mathrm{SFR}_\mathrm{800 Myr}}$$

### Define directories

### Read (and make if necessary) directories according to analysis config file

In [None]:
project_dir = Path.cwd().parent

local_config = Path.cwd() / "SiGMo_analysis_config_local.yml"
general_config = Path.cwd() / "SiGMo_analysis_config.yml"
if local_config.is_file():
    config = yaml.safe_load(open(local_config))
elif general_config.is_file():
    config = yaml.safe_load(open(general_config))
else:
    raise ValueError

sfr79_dir = Path(config['paths']['sfr79_dir'])
snp_dir_GMSsim = Path(config['paths']['snp_dir_GMSsim'])
plot_dir = Path(config['paths']['plot_dir'])
plot_dir.mkdir(exist_ok=True, parents=True)   # create plot dir only if necessary

### try exponential formatting

In [None]:
time_res = 3.e-4

time_res_str = str(f'{time_res:.0e}')

print(time_res_str)

time_res_float_from_str = float(time_res_str)

print(time_res_float_from_str)

assert np.isclose(time_res, time_res_float_from_str, atol=time_res/100)

### Read-in: Observational Data (SDSS and xCG)

In [None]:
# reading observational results

# SDSS
sfr79_medians = np.loadtxt(str(sfr79_dir / "SFR79_2dhist_medians.txt"))
mstar_mesh =  np.loadtxt(str(sfr79_dir / "SFR79_2dhist_binedges_mstar_mesh.txt"))
sfr_mesh = np.loadtxt(str(sfr79_dir / "SFR79_2dhist_binedges_sfr_mesh.txt"))
n_binned = np.loadtxt(str(sfr79_dir / "SFR79_2dhist_binnumbers.txt"))

# removing low-number bins (if wanted, as before when setting up the simulation)
# if not wanted: comment the next two lines out!
n_binned_min = 40
sfr79_medians = np.where(n_binned >= n_binned_min, sfr79_medians, np.nan)

# xCG
xCG_df = pd.read_csv(sfr79_dir / "xCOLD_GASS_with_SDSS_SFR79_df.csv")
xCG_minimal_selector = ((xCG_df.LOGMSTAR > -900) & (xCG_df.LOGSFR_BEST > -900) & (xCG_df.LOGMH2 > - 900))
xCG_gasdetect_selector = (xCG_df.FLAG_CO == 1)
xCG_sfr79 = np.squeeze(xCG_df.loc[xCG_minimal_selector & xCG_gasdetect_selector, "SFR79values"].to_numpy())
xCG_df_selected = xCG_df.loc[xCG_minimal_selector & xCG_gasdetect_selector]

### Read in SDSS-based grid-like simulation data (that was created using binned SDSS data as ICs)

### SDSS-based sims: calculate SFR79 from SFRs

Compute the log SFR79 from the "actual" simulation SFR averages:

$$\mathrm{SFR79} = \frac{\mathrm{SFR}_\mathrm{5 Myr}}{\mathrm{SFR}_\mathrm{800 Myr}}$$

### Read in GMS-based simulation data (that was created using individual galaxies along or off-set from the GMS as desired as ICs)

In [None]:
# setting the snapshot type flag! False -> from one big Env snap; True -> from many indiv. snaps
single_snapshots = False

In [None]:
# how many objects per timestep?
n_envs = 1
# this is "unknown" unless looked up someplace else, but if 'None' will be determined from some Environment snapshot
n_halos = None
n_gals = None

# reading simulation results (SDSS)
env_grid_GMSsim, halo_grid_GMSsim, gal_grid_GMSsim = sgm.read_all_snapshots_from_dir(snp_dir=snp_dir_GMSsim,
                                                                                     n_envs=n_envs,
                                                                                     n_halos=n_halos,
                                                                                     n_gals=n_gals,
                                                                                     single_snapshots=single_snapshots)

In [None]:
# re-create/populate halo_grid and galaxy_grid (for FIRST Environment only) if necessary
if not single_snapshots:
    for t, env_snp in enumerate(tqdm(env_grid_GMSsim[0])):  # HARDCODED to 1st Environment only!
        for i_halo, halo in enumerate(env_snp.data['halos']):
            halo_grid_GMSsim[i_halo, t] = sgm.Snapshot(halo)
            for i_gal, gal in enumerate(halo['galaxies']):
                gal_grid_GMSsim[i_halo, t] = sgm.Snapshot(gal)  # HARDCODED to 1 Galaxy per Halo!!

### GMS-based sims: calculate SFR79 from SFRs

Compute the log SFR79 from the "actual" simulation SFR averages:

$$\mathrm{SFR79} = \frac{\mathrm{SFR}_\mathrm{5 Myr}}{\mathrm{SFR}_\mathrm{800 Myr}}$$

In [None]:
SFR79_grid_GMSsim = sgm.compute_SFR79_from_SFR(gal_grid_GMSsim)

print("SFR79_grid_GMSsim:\n", SFR79_grid_GMSsim)

### **SFR79 from observations vs from simulations**

### SDSS binned obs SFR79 with individual GMS sims

In [None]:
# calculate the mgas values (from Saintonge+22 relation) for SDSS bins
mstar_grid = (mstar_mesh[:-1, :-1] + mstar_mesh[1:, 1:]) / 2.
sfr_grid = (sfr_mesh[:-1, :-1] + sfr_mesh[1:, 1:]) / 2.
ssfr_grid = sfr_grid - mstar_grid
mgas_grid = sgm.calculate_mgas_mstar_from_sSFR_Saintonge2022(sSFR=ssfr_grid, log_values=True) + mstar_grid
# remove values for mgas where there's no SDSS bins (in sfr79_medians)
mgas_grid[np.isnan(sfr79_medians)] = np.nan


# colormap/norm etc. for SFR79 plot
sfr79_range = (-2, 2)
cmap_sfr79 = mpl.cm.RdBu
norm_sfr79 = mpl.colors.Normalize(vmin=sfr79_range[0], vmax=sfr79_range[1])
mapper_sfr79 = mpl.cm.ScalarMappable(norm=norm_sfr79, cmap=cmap_sfr79)


# colormap/norm etc. for MGAS plot
cmap_mgas = mpl.cm.Greens
norm_mgas = mpl.colors.Normalize(vmin=np.nanmin(mgas_grid), vmax=np.nanmax(mgas_grid))
mapper_mgas = mpl.cm.ScalarMappable(norm=norm_mgas, cmap=cmap_mgas)



# set up figure and axes
# fig, (ax_sfr79, ax_sfr79_cbar) = plt.subplots(
fig, ((ax_sfr79, ax_sfr79_cbar, ax_mgas, ax_mgas_cbar),
      (ax_sfr79_xCG, ax_sfr79_cbar_xCG, ax_mgas_xCG, ax_mgas_cbar_xCG)) = plt.subplots(
#     1, 2,
#     1, 4,
    2, 4,
    gridspec_kw={
       # 'width_ratios': (18, 1.2),
       'width_ratios': (18, 1.2, 18, 1.2),
        'height_ratios': (1, 1),
       'hspace': 0.05
    },
    # figsize=(7.2, 5.5),
    # figsize=(7.2*1.9, 5.5),
    figsize=(7.2*1.9, 5.5*1.9),
    # tight_layout=True
    constrained_layout=True
)


# ==========================

# collecting data to plot

# # first: check that SFR79 arrays from sim and obs are SAME LENGTH!
# assert len(xCG_sfr79) == len(SFR79_grid_GMSsim[:, -1])

# define which data to plot
gal_grid = gal_grid_GMSsim
halo_grid = halo_grid_GMSsim
env_grid = env_grid_GMSsim

# which snp is to be used for mstar-SFR values (aka position on the plot)?
# which_snp = 0  # intial mstar, SFR
which_snp = -1  # final mstar, SFR
# which_snp = int(0.0 * len(gal_grid[0]))  # somewhere in-between mstar, SFR

which_objects = range(0, len(gal_grid), 1)
x_data = []
y_data = []
# sfr79_data_obs = []
sfr79_data_sim = []
# sfr79_colours_obs = []
sfr79_colours_sim = []
# mgas_data_obs = []
mgas_data_sim = []
# mgas_colours_obs = []
mgas_colours_sim = []
for i in tqdm(which_objects):
    # x and y coords of GMS sims
    x_data.append(np.log10(gal_grid[i, which_snp].data['mstar']))
    y_data.append(np.log10(gal_grid[i, which_snp].data['SFR']) - 9.)  # -9. is for conversion from /Gyr to /yr

    # sfr79_data_obs.append(xCG_sfr79[i])
    sfr79_data_sim.append(SFR79_grid_GMSsim[i, -1])
    # sfr79_colours_obs.append(mapper_sfr79.to_rgba(xCG_sfr79[i]))
    sfr79_colours_sim.append(mapper_sfr79.to_rgba(SFR79_grid_GMSsim[i, -1]))

    # mgas_data_obs.append(xCG_sfr79[i])
    mgas_data_sim.append(np.log10(gal_grid[i, which_snp].data['mgas']))
    # mgas_colours_obs.append(mapper_mgas.to_rgba(xCG_sfr79[i]))
    mgas_colours_sim.append(mapper_mgas.to_rgba(np.log10(gal_grid[i, which_snp].data['mgas'])))

# convert lists to arrays
x_data = np.array(x_data)
y_data = np.array(y_data)
sfr79_data_sim = np.array(sfr79_data_sim)
sfr79_colours_sim = np.array(sfr79_colours_sim)
mgas_data_sim = np.array(mgas_data_sim)
mgas_colours_sim = np.array(mgas_data_sim)

# for xCOLD GASS detections
which_objects = range(0, len(xCG_df_selected), 1)
x_data_xCG = []
y_data_xCG = []
sfr79_data_obs = []
sfr79_colours_obs = []
mgas_data_obs = []
mgas_colours_obs = []
for i in tqdm(which_objects):
    # x and y coords of GMS sims
    x_data_xCG.append(xCG_df_selected.iloc[i]["LOGMSTAR"])
    y_data_xCG.append(xCG_df_selected.iloc[i]["LOGSFR_BEST"])  # already in units per yr

    sfr79_data_obs.append(xCG_sfr79[i])
    sfr79_colours_obs.append(mapper_sfr79.to_rgba(xCG_sfr79[i]))

    mgas_data_obs.append(xCG_df_selected.iloc[i]["LOGMH2"])
    mgas_colours_obs.append(mapper_mgas.to_rgba(xCG_df_selected.iloc[i]["LOGMH2"]))

# convert lists to arrays
x_data_xCG = np.array(x_data_xCG)
y_data_xCG = np.array(y_data_xCG)
sfr79_data_obs = np.array(sfr79_data_obs)
sfr79_colours_obs = np.array(sfr79_colours_obs)
mgas_data_obs = np.array(mgas_data_obs)
mgas_colours_obs = np.array(mgas_colours_obs)



# ========================


# SFR79 from SDSS


# plot observational SFR79 data
im_sfr79_obs = ax_sfr79.pcolormesh(
    mstar_mesh, sfr_mesh,
    sfr79_medians,
    cmap=cmap_sfr79,
    norm=norm_sfr79,
    label="observational SFR79: SDSS",
)


# plot colorbar
fig.colorbar(mpl.cm.ScalarMappable(norm=norm_sfr79, cmap=cmap_sfr79),
             cax=ax_sfr79_cbar,
             fraction=0.8,
             extend='both',
             # anchor=(0.0, 0.0),
             label='log SFR79')



# ================================



# MGAS plots (RHS)


# plot observational SFR79 data
im_mgas_obs = ax_mgas.pcolormesh(
    mstar_mesh, sfr_mesh,
    mgas_grid,
    cmap=cmap_mgas,
    norm=norm_mgas,
    label="observational SFR79: SDSS",
)

# plot colorbar
fig.colorbar(mpl.cm.ScalarMappable(norm=norm_mgas, cmap=cmap_mgas),
             cax=ax_mgas_cbar,
             fraction=0.8,
             extend='both',
             # anchor=(0.0, 0.0),
             label=r'log $M_\mathrm{gas}$ [$M_\odot$]')






# =======================

# SF79 xCOLD GASS (lower LHS)



# plot observational SFR79 data
im_sfr79_obs_xCG = ax_sfr79_xCG.scatter(
    x=x_data_xCG,
    y=y_data_xCG,
    c=sfr79_colours_obs,
    s=(mpl.rcParams['lines.markersize']*1.5)**2,
    cmap=cmap_sfr79,
    norm=norm_sfr79,
    # edgecolors='xkcd:white'
)


# plot colorbar
fig.colorbar(mpl.cm.ScalarMappable(norm=norm_sfr79, cmap=cmap_sfr79),
             cax=ax_sfr79_cbar_xCG,
             fraction=0.8,
             extend='both',
             # anchor=(0.0, 0.0),
             label='log SFR79')



# =======================

# MGAS xCOLD GASS (lower RHS)



# plot observational SFR79 data
im_mgas_obs_xCG = ax_mgas_xCG.scatter(
    x=x_data_xCG,
    y=y_data_xCG,
    c=mgas_colours_obs,
    s=(mpl.rcParams['lines.markersize']*1.5)**2,
    cmap=cmap_mgas,
    norm=norm_mgas,
    # edgecolors='xkcd:white'
)


# plot colorbar
fig.colorbar(mpl.cm.ScalarMappable(norm=norm_mgas, cmap=cmap_mgas),
             cax=ax_mgas_cbar_xCG,
             fraction=0.8,
             extend='both',
             # anchor=(0.0, 0.0),
             label=r'log $M_\mathrm{gas}$ [$M_\odot$]')



# ========================


# adding the Galaxy Main Sequence on top (Saintonge+2016, Eq. 5) <-- changed that to Saintonge+2022
GMS_x = np.linspace(start=np.min(mstar_mesh),
                    stop=np.max(mstar_mesh),
                    num=1000,
                    endpoint=True)
for ax in [ax_sfr79, ax_mgas, ax_sfr79_xCG, ax_mgas_xCG]:
    handle_GMS = ax.plot(GMS_x, sgm.GMS_Saintonge2022(GMS_x, log=True),
                         color='xkcd:magenta', ls='--', label="GMS: Saintonge & Catinella (2022)")

# remove unnecessary axes
# ax_sfr79_cbar.remove()

# figure labelling etc
for ax in [ax_sfr79, ax_mgas, ax_sfr79_xCG, ax_mgas_xCG]:
    ax.set_xlabel(r'log $M_\star$ [$M_\odot$]')
    ax.set_ylabel(r'log SFR [$M_\odot \, yr^{-1}$]')
    ax.set(xlim=(8.45, 11.6), ylim=(-2.3, 1.8))
    ax.tick_params(axis='both', which='both', direction='in', bottom=True, top=True, left=True, right=True)

# ax_sfr79.text(0.95, 0.05,
#              f"min(log SFR79) = {np.nanmin(sfr79_SDSSsim_grid):.3f}\n"
#              f"max(log SFR79) = {np.nanmax(sfr79_SDSSsim_grid):.3f}",
#              transform=ax_sfr79.transAxes,
#              va='bottom', ha='right')




# =========


# add GMS simulation SFR79

# # # first: check that SFR79 arrays from sim and obs are SAME LENGTH!
# # assert len(xCG_sfr79) == len(SFR79_grid_GMSsim[:, -1])
#
# # define which data to plot
# gal_grid = gal_grid_GMSsim
# halo_grid = halo_grid_GMSsim
# env_grid = env_grid_GMSsim
#
# # which snp is to be used for mstar-SFR values (aka position on the plot)?
# # which_snp = 0  # intial mstar, SFR
# which_snp = -1  # final mstar, SFR
# # which_snp = int(0.0 * len(gal_grid[0]))  # somewhere in-between mstar, SFR
#
# which_objects = range(0, len(gal_grid), 1)
# x_data = []
# y_data = []
# # sfr79_data_obs = []
# sfr79_data_sim = []
# # sfr79_colours_obs = []
# sfr79_colours_sim = []
# # mgas_data_obs = []
# mgas_data_sim = []
# # mgas_colours_obs = []
# mgas_colours_sim = []
# for i in tqdm(which_objects):
#     x_data.append([np.log10(gal_grid[i, which_snp].data['mstar'])])
#     y_data.append([np.log10(gal_grid[i, which_snp].data['SFR']) - 9.])  # -9. is for conversion from /Gyr to /yr
#
#     # sfr79_data_obs.append(xCG_sfr79[i])
#     sfr79_data_sim.append(SFR79_grid_GMSsim[i, -1])
#     # sfr79_colours_obs.append(mapper_sfr79.to_rgba(xCG_sfr79[i]))
#     sfr79_colours_sim.append(mapper_sfr79.to_rgba(SFR79_grid_GMSsim[i, -1]))
#
#     # mgas_data_obs.append(xCG_sfr79[i])
#     mgas_data_sim.append([np.log10(gal_grid[i, which_snp].data['mgas'])])
#     # mgas_colours_obs.append(mapper_mgas.to_rgba(xCG_sfr79[i]))
#     mgas_colours_sim.append(mapper_mgas.to_rgba([np.log10(gal_grid[i, which_snp].data['mgas'])]))



# plotting the GMS sim data points
for _ax, _c, _cmap, _norm, _data in zip([ax_sfr79, ax_mgas, ax_sfr79_xCG, ax_mgas_xCG], [sfr79_colours_sim, mgas_colours_sim]*2, [cmap_sfr79, cmap_mgas]*2, [norm_sfr79, norm_mgas]*2, [sfr79_data_sim, mgas_data_sim]*2):
    handle_scatter = _ax.scatter(
        x=x_data,
        y=y_data,
        s=(mpl.rcParams['lines.markersize']*2)**2,
        c=_c,
        cmap=_cmap,
        norm=_norm,
        # edgecolors='xkcd:light grey',
        edgecolors='xkcd:dark grey',
        label="simulated SFR79: based on GMS",
        zorder=10,
    )

    # add annotations?
    annotate_flag = False
    if annotate_flag:
        x_offset = 0.1
        y_offset = -0.1
        for i, _txt in enumerate(_data):
            _txt = _txt[0] if isinstance(_txt, list) else _txt
            _ax.annotate(f"{_txt:.3f}", (x_data[i] + x_offset, y_data[i] + y_offset))


# make the legend markers for scatter plot
handle_scatter_custom = (
    # mpl.lines.Line2D([0], [0], color=cmap_sfr79(0.1), ls='', marker='o', markeredgecolor=handle_scatter._edgecolors[0], markersize=np.sqrt(handle_scatter._sizes[0])),
    mpl.lines.Line2D([0], [0], color=cmap_sfr79(.75), ls='', marker='o', markeredgecolor=handle_scatter._edgecolors[0], markersize=np.sqrt(handle_scatter._sizes[0])),
    mpl.lines.Line2D([0], [0], color=cmap_sfr79(.5), ls='', marker='o', markeredgecolor=handle_scatter._edgecolors[0], markersize=np.sqrt(handle_scatter._sizes[0])),
    mpl.lines.Line2D([0], [0], color=cmap_sfr79(.25), ls='', marker='o', markeredgecolor=handle_scatter._edgecolors[0], markersize=np.sqrt(handle_scatter._sizes[0])),
    # mpl.lines.Line2D([0], [0], color=cmap_sfr79(0.9), ls='', marker='o', markeredgecolor=handle_scatter._edgecolors[0], markersize=np.sqrt(handle_scatter._sizes[0])),
)

handle_pcolormesh_custom =(
    # mpl.patches.Patch(facecolor=cmap_sfr79(0.25)),
    # mpl.lines.Line2D([0], [0], color=cmap_sfr79(.75), ls='', marker='s', markersize=0.9*np.sqrt(handle_scatter._sizes[0])),
    mpl.lines.Line2D([0], [0], color=cmap_sfr79(.65), ls='', marker='s', markersize=np.sqrt(handle_scatter._sizes[0])),
    # mpl.lines.Line2D([0], [0], color=cmap_sfr79(.5), ls='', marker='s', markersize=0.9*np.sqrt(handle_scatter._sizes[0])),
    mpl.lines.Line2D([0], [0], color=cmap_sfr79(.45), ls='', marker='s', markersize=np.sqrt(handle_scatter._sizes[0])),
    # mpl.lines.Line2D([0], [0], color=cmap_sfr79(.25), ls='', marker='s', markersize=0.9*np.sqrt(handle_scatter._sizes[0])),
    # mpl.lines.Line2D([0], [0], color=cmap_sfr79(.7), ls='', marker='s', markersize=1.25*np.sqrt(handle_scatter._sizes[0])),
)

handle_GMS_custom = (
    mpl.lines.Line2D([0], [0], color=handle_GMS[0]._color, linestyle=handle_GMS[0]._linestyle)
)


ax_sfr79.legend(
    [handle_pcolormesh_custom, handle_scatter_custom, handle_GMS_custom],
    [(im_sfr79_obs._label), (handle_scatter._label), (handle_GMS[0]._label)],
    handler_map={tuple: mpl.legend_handler.HandlerTuple(ndivide=None)},
    # loc='upper left',
    loc='lower right',
    # handlelength=1,
    # handleheight=1,
    framealpha=0.87,
)


In [None]:
# save heat-map plot to disk
fig.savefig(plot_dir / f'SFR79_SDSSobs_vs_GMSsims_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png', dpi=300)

### Deviation of Sims from xCG Obs

In [None]:
fig, (ax_sfr79, ax_mgas, ax_n_cb) = plt.subplots(
    1, 3,
    gridspec_kw={
        'width_ratios': (18, 18, 1.2),
        'height_ratios': (1,),
        'hspace': 0.05
    },
    # figsize=(7.2*1.9*0.7, 5.5*0.7),
    figsize=(7.2*2.0*0.7, 5.5*0.7),
    constrained_layout=True
)


_proxim = 0.15   # in dex
_proxim_mode = "rect"

_sampling = False   # whether the comparison is done with all nearby xCG galaxies or just a subsample drawn from it
_sampling_rng = np.random.default_rng(1234567)
_n_sample = 5

_bootstrapping = True   # if we want a bootstrap error sampling
_bootstrap_n = 50   # number of sets/random draws, NOT number of items per draw/set
_bootstrap_delta_sfr79_median_l = []
_bootstrap_delta_sfr79_mad_l = []
_bootstrap_delta_mgas_median_l = []
_bootstrap_delta_mgas_mad_l = []

_delta_sfr79_l = []
_delta_mgas_l = []
_mstar_l = []
_n_l = []
for (_x, _y, _sfr79, _mgas) in zip(x_data, y_data, sfr79_data_sim, mgas_data_sim):
    if _proxim_mode.casefold().startswith("rect"):
        _closeto = ((x_data_xCG >= (_x - _proxim)) & (x_data_xCG <= (_x + _proxim)) &   # rectangle for compare (in log space)
                    (y_data_xCG >= (_y - _proxim)) & (y_data_xCG <= (_y + _proxim)))
    elif _proxim_mode.casefold().startswith("circ"):
        _closeto = (((x_data_xCG - _x)**2 + (y_data_xCG - _y)**2) <= _proxim**2)   # circular comparison (in log space)
    else:
        print(f"Wrong keyword for selection of mode of proximity (_proxim_mode): '{_proxim_mode}'")
        raise ValueError

    if _sampling:

        _n = np.sum(_closeto)
        _n = np.min((_n, _n_sample))

        _closevals_sfr79 = xCG_df_selected.loc[_closeto, "SFR79values"].sample(n=_n, replace=False)
        _closevals_sfr79_median = np.median(_closevals_sfr79)
        _delta_sfr79 = _sfr79 - _closevals_sfr79_median

        print("median obs. SFR79:", _closevals_sfr79_median, f" (from {_n} detections)")
        print("Δ SFR79:", _delta_sfr79)

        _closevals_mgas = xCG_df_selected.loc[_closeto, "LOGMH2"].sample(n=_n, replace=False)
        _closevals_mgas_median = np.median(_closevals_mgas)
        _delta_mgas = _mgas - _closevals_mgas_median

        print("median obs. Mgas:", _closevals_mgas_median, f" (from {_n} detections)")
        print("Δ Mgas:", _delta_mgas)

        print()

    else:

        _closevals_sfr79 = xCG_df_selected.loc[_closeto, "SFR79values"]
        _closevals_sfr79_median = np.median(_closevals_sfr79)
        _delta_sfr79 = _sfr79 - _closevals_sfr79_median

        _n = len(_closevals_sfr79)

        print("median obs. SFR79:", _closevals_sfr79_median, f" (from {_n} detections)")
        print("Δ SFR79:", _delta_sfr79)

        _closevals_mgas = xCG_df_selected.loc[_closeto, "LOGMH2"]
        _closevals_mgas_median = np.median(_closevals_mgas)
        _delta_mgas = _mgas - _closevals_mgas_median

        print("median obs. Mgas:", _closevals_mgas_median, f" (from {_n} detections)")
        print("Δ Mgas:", _delta_mgas)

        print()

    if _bootstrapping:
        _subsample_delta_sfr79_l = []
        _subsample_delta_mgas_l = []

        for _i in range(_bootstrap_n):

            _subsample_sfr79 = xCG_df_selected.loc[_closeto, "SFR79values"].sample(frac=1, replace=True)
            _subsample_sfr79_median = np.median(_subsample_sfr79)
            _subsample_delta_sfr79 = _sfr79 - _subsample_sfr79_median

            _subsample_mgas = xCG_df_selected.loc[_closeto, "LOGMH2"].sample(frac=1, replace=True)
            _subsample_mgas_median = np.median(_subsample_mgas)
            _subsample_delta_mgas = _mgas - _subsample_mgas_median

            _subsample_delta_sfr79_l.append(_subsample_delta_sfr79)
            _subsample_delta_mgas_l.append(_subsample_delta_mgas)

        _bootstrap_delta_sfr79_median = np.median(_subsample_delta_sfr79_l)
        _bootstrap_delta_sfr79_mad = stats.median_abs_deviation(_subsample_delta_sfr79_l, scale=1)
        _bootstrap_delta_sfr79_median_l.append(_bootstrap_delta_sfr79_median)
        _bootstrap_delta_sfr79_mad_l.append(_bootstrap_delta_sfr79_mad)
        print("median Δ SFR79:", _bootstrap_delta_sfr79_median)
        print("MAD of Δ SFR79:", _bootstrap_delta_sfr79_mad)

        _bootstrap_delta_mgas_median = np.median(_subsample_delta_mgas_l)
        _bootstrap_delta_mgas_mad = stats.median_abs_deviation(_subsample_delta_mgas_l, scale=1)
        _bootstrap_delta_mgas_median_l.append(_bootstrap_delta_mgas_median)
        _bootstrap_delta_mgas_mad_l.append(_bootstrap_delta_mgas_mad)
        print("median Δ Mgas:", _bootstrap_delta_mgas_median)
        print("MAD of Δ Mgas:", _bootstrap_delta_mgas_mad)
        print()

    _delta_sfr79_l.append(_delta_sfr79)
    _delta_mgas_l.append(_delta_mgas)
    _mstar_l.append(_x)
    _n_l.append(_n)

    # ax_sfr79.plot(_x, _delta_sfr79, marker='o')
    # ax_mgas.plot(_x, _delta_mgas, marker='o')



# colormap/norm etc. for SFR79 plot
n_range = (np.nanmin(_n_l), np.nanmax(_n_l))
cmap_n = mpl.cm.Oranges
norm_n = mpl.colors.Normalize(vmin=n_range[0], vmax=n_range[1])
mapper_n = mpl.cm.ScalarMappable(norm=norm_n, cmap=cmap_n)


ax_sfr79.scatter(_mstar_l, _delta_sfr79_l, c=_n_l, cmap=cmap_n, norm=norm_n)
ax_mgas.scatter(_mstar_l, _delta_mgas_l, c=_n_l, cmap=cmap_n, norm=norm_n)

if _bootstrapping:
    ax_sfr79.errorbar(
        _mstar_l,
        _delta_sfr79_l,
        yerr=_bootstrap_delta_sfr79_mad_l,
        fmt='none',
        ls='',
        ecolor="xkcd:greyish teal",
        alpha=0.3,
        capsize=mpl.rcParams['lines.markersize'],
        zorder=-10
    )
    ax_mgas.errorbar(
        _mstar_l,
        _delta_mgas_l,
        yerr=_bootstrap_delta_mgas_mad_l,
        fmt='none',
        ls='',
        ecolor="xkcd:greyish teal",
        alpha=0.3,
        capsize=mpl.rcParams['lines.markersize'],
        zorder=-10
    )


for _ax in [ax_sfr79, ax_mgas]:
    _ax.axhline(0., color="xkcd:light teal", ls="--")


# plot colorbar
fig.colorbar(mpl.cm.ScalarMappable(norm=norm_n, cmap=cmap_n),
             cax=ax_n_cb,
             fraction=0.8,
             extend='both',
             # anchor=(0.0, 0.0),
             label=r'N (comparison partners in xCG)')


# figure labelling etc
for _ax, _ylabel in zip(
        [ax_sfr79, ax_mgas],
        [r'$\Delta$ SFR79', r'$\Delta$ $M_\mathrm{gas}$']
):
    _ax.set_xlabel(r'log $M_\star$ [$M_\odot$]')
    _ax.set_ylabel(_ylabel)
    # _ax.set(xlim=(8.45, 11.6), ylim=(-2.3, 1.8))
    _ax.tick_params(axis='both', which='both', direction='in', bottom=True, top=True, left=True, right=True)


In [None]:
# save delta plots to disk
fig.savefig(plot_dir / f'SFR79_Mgas_sims_vs_xCG_obs_within_{_proxim}dex_{_proxim_mode}_{f"sampling{_n_sample}_" if _sampling else ""}{f"with_bootstrap_errs_" if _bootstrapping else ""}{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png', dpi=300)

### Fit final Sims data points (to get relation and scatter)

In [None]:
# fit the sim data (recover relation similar to GMS?)
fit_degree = 1
# fit_range_x = [6, 10]
# fit_range_x = [6, 9.2]
fit_range_x = [-np.inf, np.inf]


# how many galaxies are out of bounds? (infinity or nan)
print("Number of out-of-bounds objects:", len(x_data) - np.sum(np.isfinite(x_data)))
# assert that that's the same for SFR and Mstar
assert np.all(np.isfinite(x_data) == np.isfinite(y_data))


# set filter/selector for sim data
data_select = np.isfinite(x_data) & (x_data < 15) & (y_data < 10)
x_data_select = x_data[data_select]
y_data_select = y_data[data_select]
# set filter/selector for the fitting (can be narrower to exclude certain regions)
fit_select = (x_data_select > fit_range_x[0]) & (x_data_select < fit_range_x[1])


# if we want colour-coding
which_snp = 0
# which_snp = -1
c_data = []
for i in range(0, len(gal_grid), 1):
    _mstar = np.log10(gal_grid[i, which_snp].data['mstar'])
    _sfr = np.log10(gal_grid[i, which_snp].data['SFR']) - 9     # conversion from Gyr^-1 to yr^-1
    _z = gal_grid[i, which_snp].data['z']
    _sfr_on_gms = sgm.GMS_Leslie2020(mstar=_mstar, z=_z, log=True)
    c_data.append(_sfr - _sfr_on_gms)
c_data = np.array(c_data)
c_data_select = c_data[data_select]


# colour mapping
delta_sfr_range_minmax = np.max([np.nanmin(c_data), np.nanmax(c_data)])
delta_sfr_range = [-delta_sfr_range_minmax, delta_sfr_range_minmax]
# delta_sfr_range = [np.nanmin(c_data), np.nanmax(c_data)]
# cmap_delta_sfr = mpl.cm.PiYG
cmap_delta_sfr = mpl.cm.PuOr_r
norm_delta_sfr = mpl.colors.Normalize(vmin=delta_sfr_range[0], vmax=delta_sfr_range[1])
mapper_delta_sfr = mpl.cm.ScalarMappable(norm=norm_delta_sfr, cmap=cmap_delta_sfr)


# fit
p_fit = np.polynomial.Polynomial.fit(x_data_select[fit_select], y_data_select[fit_select], deg=fit_degree)

delta_sfr = np.array(y_data_select) - p_fit(np.array(x_data_select))
delta_sfr_sigma = np.std(delta_sfr)


fig, (ax, ax_cbar) = plt.subplots(1, 2,
                                  figsize=(7.2, 5.5),
                                  gridspec_kw={
                                      'width_ratios': (18, 1.2),
                                      'height_ratios': (1,),
                                      'hspace': 0.05
                                  },
                                  constrained_layout=True
                                  )
x_lin = np.linspace(np.nanmin(x_data_select), np.nanmax(x_data_select), num=1000)

ax.plot(x_lin, sgm.GMS_Leslie2020(mstar=x_lin, z=0, log=True), color='xkcd:green', label=f'Leslie+20 GMS at z=0')
ax.plot(x_lin, p_fit(x_lin), color='xkcd:dark red', label=f'{"Linear" if fit_degree == 1 else f"{fit_degree}-order"} fit to simulations (σ={delta_sfr_sigma:.3f})', ls="--")
ax.scatter(x_data_xCG, y_data_xCG, c='xkcd:light grey', label='xCOLD GASS with gas detections')
ax.scatter(x_data_select, y_data_select,
           c=mapper_delta_sfr.to_rgba(c_data_select),
           label=f'Simulated galaxies ({len(x_data_select)}/{len(x_data)})',
           edgecolors='xkcd:grey',
           s=mpl.rcParams['lines.markersize'] ** 2 * 1.7,
           linewidths=mpl.rcParams["lines.linewidth"] * 0.3
           )


# colour bar
fig.colorbar(mappable=mapper_delta_sfr,
             cax=ax_cbar,
             fraction=0.8,
             # extend='both',
             # anchor=(0.0, 0.0),
             label=f'ΔSFR from GMS at z={_z:.1f} [dex]')


ax.set_xlabel(r'log $M_\mathrm{star}$ [$M_\odot$]')
ax.set_ylabel(r'log $SFR$ [$M_\odot$ yr$^{-1}$]')
ax.tick_params(axis='both', which='both', direction='in', bottom=True, top=True, left=True, right=True)
ax.legend()

In [None]:
# save fit plot to disk
fig.savefig(plot_dir / f'Fit_and_Std_sims_vs_xCG_obs_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png', dpi=300)

### M_halo analysis

### Fit final Sims data points (to get relation and scatter)

In [None]:
# fit the sim data (recover relation similar to GMS?)
fit_degree = 1
# fit_range_x = [6, 10]
# fit_range_x = [6, 9.2]
fit_range_x = [-np.inf, np.inf]


# how many galaxies are out of bounds? (infinity or nan)
print("Number of out-of-bounds objects:", len(x_data) - np.sum(np.isfinite(x_data)))
# assert that that's the same for SFR and Mstar
assert np.all(np.isfinite(x_data) == np.isfinite(y_data))


# set filter/selector for sim data
data_select = np.isfinite(x_data) & (x_data < 15) & (y_data < 10)
x_data_select = x_data[data_select]
y_data_select = y_data[data_select]
# set filter/selector for the fitting (can be narrower to exclude certain regions)
fit_select = (x_data_select > fit_range_x[0]) & (x_data_select < fit_range_x[1])


# if we want colour-coding
which_snp = 0
# which_snp = -1
c_data = []
for i in range(0, len(gal_grid), 1):
    _mhalo = np.log10(halo_grid[i, which_snp].data['mtot'])
    _mstar = np.log10(gal_grid[i, which_snp].data['mstar'])
    # _sfr = np.log10(gal_grid[i, which_snp].data['SFR']) - 9     # conversion from Gyr^-1 to yr^-1
    _z = gal_grid[i, which_snp].data['z']
    # _sfr_on_gms = sgm.GMS_Leslie2020(mstar=_mstar, z=_z, log=True)
    _mhalo_noscatter = np.log10(sgm.iter_mhalo_from_mstar(10.**_mstar, z=_z, try_lookup=False, interpolate=True))
    c_data.append(_mhalo - _mhalo_noscatter)
    # c_data.append(_sfr - _sfr_on_gms)
c_data = np.array(c_data)
c_data_select = c_data[data_select]


# colour mapping
mhalo_scatter_range_minmax = np.max([np.nanmin(c_data), np.nanmax(c_data)])
mhalo_scatter_range = [-mhalo_scatter_range_minmax, mhalo_scatter_range_minmax]
# mhalo_scatter_range = [np.nanmin(c_data), np.nanmax(c_data)]
print(mhalo_scatter_range)
# delta_sfr_range = [np.nanmin(c_data), np.nanmax(c_data)]
cmap_mhalo_scatter = mpl.cm.PiYG_r
# cmap_mhalo_scatter = mpl.cm.PuOr_r
norm_mhalo_scatter = mpl.colors.Normalize(vmin=mhalo_scatter_range[0], vmax=mhalo_scatter_range[1])
mapper_mhalo_scatter = mpl.cm.ScalarMappable(norm=norm_mhalo_scatter, cmap=cmap_mhalo_scatter)


# fit
p_fit = np.polynomial.Polynomial.fit(x_data_select[fit_select], y_data_select[fit_select], deg=fit_degree)

delta_sfr = np.array(y_data_select) - p_fit(np.array(x_data_select))
delta_sfr_sigma = np.std(delta_sfr)


fig, (ax, ax_cbar) = plt.subplots(1, 2,
                                  figsize=(7.2, 5.5),
                                  gridspec_kw={
                                      'width_ratios': (18, 1.2),
                                      'height_ratios': (1,),
                                      'hspace': 0.05
                                  },
                                  constrained_layout=True
                                  )
x_lin = np.linspace(np.nanmin(x_data_select), np.nanmax(x_data_select), num=1000)

ax.plot(x_lin, sgm.GMS_Leslie2020(mstar=x_lin, z=0, log=True), color='xkcd:green', label=f'Leslie+20 GMS at z=0')
ax.plot(x_lin, p_fit(x_lin), color='xkcd:dark red', label=f'{"Linear" if fit_degree == 1 else f"{fit_degree}-order"} fit to simulations (σ={delta_sfr_sigma:.3f})', ls="--")
ax.scatter(x_data_xCG, y_data_xCG, c='xkcd:light grey', label='xCOLD GASS with gas detections')
ax.scatter(x_data_select, y_data_select,
           c=mapper_mhalo_scatter.to_rgba(c_data_select),
           label=f'Simulated galaxies ({len(x_data_select)}/{len(x_data)})',
           edgecolors='xkcd:grey',
           s=mpl.rcParams['lines.markersize'] ** 2 * 1.7,
           linewidths=mpl.rcParams["lines.linewidth"] * 0.3
           )


# colour bar
fig.colorbar(mappable=mapper_mhalo_scatter,
             cax=ax_cbar,
             fraction=0.8,
             # extend='both',
             # anchor=(0.0, 0.0),
             # label=r'log $M_\mathrm{halo}$ [$M_\odot$]' + f' at z={_z:.1f}')
             label=r'$\Delta \; M_\mathrm{halo}$ from Moster+2010 SHMR' + f' at z={_z:.1f} [dex]')


ax.set_xlabel(r'log $M_\mathrm{star}$ [$M_\odot$]')
ax.set_ylabel(r'log $SFR$ [$M_\odot$ yr$^{-1}$]')
ax.tick_params(axis='both', which='both', direction='in', bottom=True, top=True, left=True, right=True)
ax.legend()

In [None]:
# save fit plot to disk
fig.savefig(plot_dir / f'Fit_and_Std_sims_vs_xCG_obs_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png', dpi=300)

### Short-term fluctuations analysis

### Fit final Sims data points (to get relation and scatter)

In [None]:
# fit the sim data (recover relation similar to GMS?)
fit_degree = 1
# fit_range_x = [6, 10]
# fit_range_x = [6, 9.2]
fit_range_x = [-np.inf, np.inf]


# how many galaxies are out of bounds? (infinity or nan)
print("Number of out-of-bounds objects:", len(x_data) - np.sum(np.isfinite(x_data)))
# assert that that's the same for SFR and Mstar
assert np.all(np.isfinite(x_data) == np.isfinite(y_data))


# set filter/selector for sim data
data_select = np.isfinite(x_data) & (x_data < 15) & (y_data < 10)
x_data_select = x_data[data_select]
y_data_select = y_data[data_select]
# set filter/selector for the fitting (can be narrower to exclude certain regions)
fit_select = (x_data_select > fit_range_x[0]) & (x_data_select < fit_range_x[1])


# if we want colour-coding
which_snp = 0
# which_snp = -1
c_data = []
for i in range(0, len(gal_grid), 1):
    _sMIR_scaling = halo_grid[i, which_snp].data['sMIR_scaling']
    c_data.append(_sMIR_scaling)
    # c_data.append(_sfr - _sfr_on_gms)
c_data = np.array(c_data)
c_data_select = c_data[data_select]


# colour mapping
# scatter_range_minmax = np.max([np.nanmin(c_data), np.nanmax(c_data)])
# scatter_range = [-scatter_range_minmax, scatter_range_minmax]
scatter_range = [np.nanmin(c_data), np.nanmax(c_data)]
print(scatter_range)
# delta_sfr_range = [np.nanmin(c_data), np.nanmax(c_data)]
cmap_scatter = mpl.cm.BrBG
# cmap_scatter = mpl.cm.PuOr_r
norm_scatter = mpl.colors.Normalize(vmin=scatter_range[0], vmax=scatter_range[1])
mapper_scatter = mpl.cm.ScalarMappable(norm=norm_scatter, cmap=cmap_scatter)


# fit
p_fit = np.polynomial.Polynomial.fit(x_data_select[fit_select], y_data_select[fit_select], deg=fit_degree)

delta_sfr = np.array(y_data_select) - p_fit(np.array(x_data_select))
delta_sfr_sigma = np.std(delta_sfr)


fig, (ax, ax_cbar) = plt.subplots(1, 2,
                                  figsize=(7.2, 5.5),
                                  gridspec_kw={
                                      'width_ratios': (18, 1.2),
                                      'height_ratios': (1,),
                                      'hspace': 0.05
                                  },
                                  constrained_layout=True
                                  )
x_lin = np.linspace(np.nanmin(x_data_select), np.nanmax(x_data_select), num=1000)

ax.plot(x_lin, sgm.GMS_Leslie2020(mstar=x_lin, z=0, log=True), color='xkcd:green', label=f'Leslie+20 GMS at z=0')
ax.plot(x_lin, p_fit(x_lin), color='xkcd:dark red', label=f'{"Linear" if fit_degree == 1 else f"{fit_degree}-order"} fit to simulations (σ={delta_sfr_sigma:.3f})', ls="--")
ax.scatter(x_data_xCG, y_data_xCG, c='xkcd:light grey', label='xCOLD GASS with gas detections')
ax.scatter(x_data_select, y_data_select,
           c=mapper_scatter.to_rgba(c_data_select),
           label=f'Simulated galaxies ({len(x_data_select)}/{len(x_data)})',
           edgecolors='xkcd:grey',
           s=mpl.rcParams['lines.markersize'] ** 2 * 1.7,
           linewidths=mpl.rcParams["lines.linewidth"] * 0.3
           )


# colour bar
fig.colorbar(mappable=mapper_scatter,
             cax=ax_cbar,
             fraction=0.8,
             # extend='both',
             # anchor=(0.0, 0.0),
             # label=r'log $M_\mathrm{halo}$ [$M_\odot$]' + f' at z={_z:.1f}')
             label=r'Current sMIR scaling factor' + f' at z={_z:.1f}')


ax.set_xlabel(r'log $M_\mathrm{star}$ [$M_\odot$]')
ax.set_ylabel(r'log $SFR$ [$M_\odot$ yr$^{-1}$]')
ax.tick_params(axis='both', which='both', direction='in', bottom=True, top=True, left=True, right=True)
ax.legend()

In [None]:
# save fit plot to disk
fig.savefig(plot_dir / f'Fit_and_Std_sims_vs_xCG_obs_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png', dpi=300)

### proto-type setting up Gaussian around GMS ICs

In [None]:
from numpy.random import default_rng
from scipy.stats import median_abs_deviation

# initiate random number generator
rng = default_rng(12345)
_n = 100
_z = 2.

# uniform random dist in Mstar
_mstar_min = 6.
_mstar_max = 10.

_mstar_vals = rng.uniform(low=_mstar_min,
                          high=_mstar_max,
                          size=_n)

# gaussian dist in SFR
_sfr_mean = 0
_sfr_width = 0.3    # here: 3 sigma
_sfr_sigma = _sfr_width/3.

_sfr_vals = rng.normal(loc=_sfr_mean,
                  scale=_sfr_sigma,
                  size=_n)

print('min:', np.min(_sfr_vals), '  max:', np.max(_sfr_vals))
print('mean:', np.mean(_sfr_vals), '  std:', np.std(_sfr_vals))
print('median:', np.median(_sfr_vals), '  MAD:', median_abs_deviation(_sfr_vals))
print(_sfr_vals)


# transformed
_gms = sgm.GMS_Leslie2020(_mstar_vals, z=_z, log=True)
_sfr_vals_tranf = _sfr_vals + _gms

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

ax[0].axhline(_sfr_mean, ls='--', color='xkcd:green', zorder=-1, label=f'In: mean={_sfr_mean:.3f}, std={_sfr_sigma:.3f}')
ax[0].scatter(_mstar_vals, _sfr_vals, label=f'Out: mean={np.mean(_sfr_vals):.3f}, std={np.std(_sfr_vals):.3f}, N={_n}')

_mstar_lin = np.linspace(_mstar_min, _mstar_max, 1000)
ax[1].plot(_mstar_lin, sgm.GMS_Leslie2020(mstar=_mstar_lin, z=_z, log=True), ls='--', color='xkcd:green', zorder=-1, label=f'Leslie+20 GMS at z={_z}')
ax[1].scatter(_mstar_vals, _sfr_vals_tranf, label=f'Transformed to GMS')

for _ax in ax:
    _ax.set_xlabel(r'log $M_\mathrm{star}$ [$M_\odot$]')
    _ax.set_ylabel(r'log $SFR$ [$M_\odot$ yr$^{-1}$]')
    _ax.tick_params(axis='both', which='both', direction='in', bottom=True, top=True, left=True, right=True)
    _ax.legend()

# ax[1].text(x=0.08, y=0.88, s=f'z = {_z}', transform=ax[1].transAxes)

In [None]:
# save heat-map plot to disk
fig.savefig(plot_dir / f'Generate_Distribution_around_GMS_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png', dpi=300)

### **select galaxies/halos: plot quantities and differences for individual param combinations over time**

In [None]:
# define which data to plot
gal_grid = gal_grid_GMSsim

# define what to plot
x_type = 'lookbacktime'
y_type = 'SFR'
# which_objects = [0, 100, 200, 300, 400, 500, 600, 700, 800]
# which_objects = [0, 50, 100, 150, 200, 250, 300]
# which_objects = [0, 50, 100, 150, 200, 250, -1]

N_gg = len(gal_grid)
N_graphs = 7
if N_gg > N_graphs:
    which_objects = [int(i/(N_graphs - 1) * (N_gg - 1)) for i in range(N_graphs)]
else:
    which_objects = [range(N_gg)]
print(f"plotting halos with indices {', '.join(str(i) for i in which_objects)}")

# grab plotting data
x_data = []
y_data = []
label_l = []
for i in tqdm(which_objects):
    x_data.append([gal.data[x_type] for gal in gal_grid[i][:]])
    y_data.append([gal.data[y_type] for gal in gal_grid[i][:]])
    # y_data.append([halo.data[y_type] for halo in halo_grid[i][:]])
    label_l.append(r"$\mathrm{M}_\star$" + f"= {gal_grid[i, 0].data['mstar']:.2e} \t SFR = {gal_grid[i, 0].data['SFR'] / 10 ** 9:.2e}")

# initialise plot
fig, ax = plt.subplots(nrows=2, figsize=(9, 9), constrained_layout=True)

# plot actual values
for i, (x, y, label) in enumerate(zip(x_data, y_data, label_l)):
    ax[0].plot(x, y, label=label)


# plot one quantity like SFR79
for i, label in zip(which_objects, label_l):
    ax[1].plot(i, SFR79_grid_GMSsim[i, -1], 'o', label=label)

# additional fig and axes config
fig.suptitle(f'{y_type}: comparing {len(which_objects)} simulated galaxies', fontsize=16)

ax[0].invert_xaxis()
ax[0].set_xlabel(x_type)
ax[0].set_yscale('log')
ax[0].set_ylabel(y_type)

ax[1].set_xlabel('simulation number (arb. index)')
ax[1].set_ylabel(f'log SFR79')

ax[1].legend()

# save to disk
fig.savefig(plot_dir / f'comparing_{y_type}_of_{len(which_objects)}_simulated_galaxies'
                       f'_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png')

### **all galaxies/halos: plot quantities and differences for individual param combinations over time**

In [None]:
# # define which data to plot
# gal_grid = gal_grid_xCGsim
# halo_grid = halo_grid_xCGsim
# env_grid = env_grid_xCGsim
# SFR79_grid = SFR79_grid_xCGsim

# # define which data to plot
# gal_grid = gal_grid_SDSSsim
# halo_grid = halo_grid_SDSSsim
# env_grid = env_grid_SDSSsim
# SFR79_grid = SFR79_grid_SDSSsim

# define which data to plot
gal_grid = gal_grid_GMSsim
halo_grid = halo_grid_GMSsim
env_grid = env_grid_GMSsim
SFR79_grid = SFR79_grid_GMSsim

# define what to plot
x_type = 'lookbacktime'
y_type = 'SFR'
# y_type = 'sMIR_scaling'
# c_type = 'mtot'
# c_type = 'mtot_over_mstar'
c_type = 'mtot_over_mgas'
c_snp = 0
# c_snp = -1
c_alpha = 0.25
which_objects = range(0, len(gal_grid), 1)

# grab plotting data
x_data = []
y_data = []
c_data = []
label_l = []
for i in tqdm(which_objects):
    x_data.append([gal.data[x_type] for gal in gal_grid[i][:]])
    y_data.append([gal.data[y_type] for gal in gal_grid[i][:]])
    # y_data.append([halo.data[y_type] for halo in halo_grid[i][:]])
    if gal_grid[i][c_snp].data['mgas'] == 0 and halo_grid[i][c_snp].data['mtot'] != 0:
        _c_data_i = [np.nan]
    else:
        _c_data_i = [halo_grid[i][c_snp].data['mtot'] / gal_grid[i][c_snp].data['mgas']]
    # c_data.append([halo_grid[i][c_snp].data[c_type]])
    c_data.append(_c_data_i)
    label_l.append(r"$\mathrm{M}_\star$" + f"= {gal_grid[i, 0].data['mstar']:.2e} \t SFR = {gal_grid[i, 0].data['SFR'] / 10 ** 9:.2e}")


# normalise the colour data to make colour the data according to it (e.g. total halo mass mtot)
c_data_range = (np.nanmin(c_data), np.nanmax(c_data))
cmap = mpl.cm.viridis_r
norm = mpl.colors.LogNorm(vmin=c_data_range[0], vmax=c_data_range[1])
mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)


# initialise plot
fig, ax = plt.subplots(nrows=2, figsize=(9, 9), constrained_layout=True)

# plot actual values
for i, (x, y, c, label) in enumerate(zip(x_data, y_data, c_data, label_l)):
    c_mapped = mapper.to_rgba(c)
    c_mapped[:, -1] = c_alpha
    ax[0].plot(x, y, c=c_mapped, label=label)


# plot one quantity like SFR79
for i, c, label in zip(which_objects, c_data, label_l):
    c_mapped = mapper.to_rgba(c)
    c_mapped[:, -1] = c_alpha
    # ax[1].plot(c_data[i], SFR79_grid_xCGsim[i, -1], c=c_mapped, marker='o', label=label)
    ax[1].plot(c_data[i], SFR79_grid[i, -1], c=c_mapped, marker='o', label=label)


# additional fig and axes config
fig.suptitle(f'{y_type}: comparing {len(which_objects)} simulated galaxies', fontsize=16)


ax[0].invert_xaxis()
ax[0].set_xlabel(x_type)
ax[0].set_yscale('log')
ax[0].set_ylabel(y_type)

ax[1].set_xscale('log')
ax[1].set_xlabel(c_type)
ax[1].set_ylabel(f'log SFR79')
ax[1].text(0.95, 0.05,
           f"{c_type} at lookbacktime = {env_grid[0, c_snp].data['lookbacktime']:.3f}",
           transform=ax[1].transAxes,
           va='bottom', ha='right')


In [None]:
# save to disk
fig.savefig(plot_dir / f'comparing_{y_type}_of_{len(which_objects)}_simulated_galaxies'
                       f'_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png')

### Residuals between xCG SFR79 from observations and from simulations

In [None]:
xCG_df

In [None]:
x_data_a = np.array(x_data)
y_data_a = np.array(y_data)
c_data_obs_a = np.array(c_data_obs)
c_data_sim_a = np.array(c_data_sim)

xCG_gas_mass_fraction = np.squeeze(xCG_df.loc[xCG_minimal_selector & xCG_gasdetect_selector, "gas_mass_fraction"].to_numpy())
xCG_SFRexcessGMS = np.squeeze(xCG_df.loc[xCG_minimal_selector & xCG_gasdetect_selector, "SFRexcessGMS"].to_numpy())
xCG_median_SFR79_in_bin = np.squeeze(xCG_df.loc[xCG_minimal_selector & xCG_gasdetect_selector, "median_SFR79_in_bin"].to_numpy())


In [None]:
fig, ax = plt.subplots(2, 1, figsize=(12, 12), constrained_layout=True)

c_data_plot_a = c_data_obs_a - c_data_sim_a

cmap = mpl.cm.viridis
norm = mpl.colors.Normalize(vmin=min(xCG_SFRexcessGMS), vmax=max(xCG_SFRexcessGMS))
mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
ax[0].scatter(xCG_gas_mass_fraction, c_data_plot_a, c=xCG_SFRexcessGMS, cmap=cmap, norm=norm, marker='o', s=50)
ax[0].set_xlabel("Gas Mass Fraction $\log(M_\mathrm{H2} \; / \; M_{\star})$")
fig.colorbar(mapper, ax=ax[0], label=r"Δ SFR [dex] above/below GMS")
# poly fit
exp_fit = np.polyfit(x=xCG_gas_mass_fraction, y=c_data_plot_a, deg=1)
p = np.poly1d(exp_fit)
x_lin = np.linspace(min(xCG_gas_mass_fraction), max(xCG_gas_mass_fraction), 1000)
ax[0].plot(x_lin, p(x_lin), zorder=-10, c='xkcd:grey', ls='--')

cmap = mpl.cm.magma
norm = mpl.colors.Normalize(vmin=min(xCG_gas_mass_fraction), vmax=max(xCG_gas_mass_fraction))
mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
ax[1].scatter(xCG_SFRexcessGMS, c_data_plot_a, c=xCG_gas_mass_fraction, cmap=mpl.cm.magma, marker='o', s=50)
ax[1].set_xlabel(r"Δ SFR [dex] above/below GMS")
fig.colorbar(mapper, ax=ax[1], label="Gas Mass Fraction $\log(M_\mathrm{H2} \; / \; M_{\star})$")
# poly fit
exp_fit = np.polyfit(x=xCG_SFRexcessGMS, y=c_data_plot_a, deg=1)
p = np.poly1d(exp_fit)
x_lin = np.linspace(min(xCG_SFRexcessGMS), max(xCG_SFRexcessGMS), 1000)
ax[1].plot(x_lin, p(x_lin), zorder=-10, c='xkcd:grey', ls='--')

ylim_vals = (None, 2.6)
for ax_i in ax:
    ax_i.set_ylabel(r'log(SFR79$_\mathrm{obs}$) - log(SFR79$_\mathrm{sim}$)')
    ax_i.set_ylim(*ylim_vals)
    out_of_plot = 0
    out_of_plot_up = 0
    out_of_plot_down = 0
    if not ylim_vals[0] is None:
        if np.any(c_data_plot_a < ylim_vals[0]):
            out_of_plot_down = np.sum(c_data_plot_a < ylim_vals[0])
            out_of_plot += out_of_plot_down
    if not ylim_vals[1] is None:
        if np.any(c_data_plot_a > ylim_vals[1]):
            out_of_plot_up = np.sum(c_data_plot_a > ylim_vals[1])
            out_of_plot += out_of_plot_up
    ax_i.text(0.03, 0.94,
              (f"data points out of plot: {out_of_plot} ({out_of_plot_up}" +
               r'$\uparrow$ + ' + f"{out_of_plot_down}" + r'$\downarrow$)'),
              transform=ax_i.transAxes,
              va='top', ha='left')

In [None]:
# save residuals plot to disk
fig.savefig(plot_dir / f'residuals_SFR79_obs_vs_sims_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png')

In [None]:
fig, ax = plt.subplots(figsize=(12, 9), constrained_layout=True)

sfr79_range = (-2, 2)
cmap = mpl.cm.RdBu
norm = mpl.colors.Normalize(vmin=sfr79_range[0], vmax=sfr79_range[1])
mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)

ax.scatter(xCG_gas_mass_fraction, xCG_SFRexcessGMS, c=c_data_obs_a, cmap=cmap, norm=norm, marker='o', edgecolor='xkcd:grey', s=50)
fig.colorbar(mapper, ax=ax, label=r"log SFR79 (obs)", extend='both')
# ax.set_facecolor('xkcd:grey')

# poly fit
exp_fit = np.polyfit(x=xCG_gas_mass_fraction, y=xCG_SFRexcessGMS, deg=1)
p = np.poly1d(exp_fit)
x_lin = np.linspace(min(xCG_gas_mass_fraction), max(xCG_gas_mass_fraction), 1000)
ax.plot(x_lin, p(x_lin), zorder=-10, c='xkcd:grey', ls='--')

# formatting to figure
ax.set_xlabel("Gas Mass Fraction $\log(M_\mathrm{H2} \; / \; M_{\star})$")
ax.set_ylabel(r"Δ SFR [dex] above/below GMS")

In [None]:
# save residuals plot to disk
fig.savefig(plot_dir / f'correlation_gasmassfrac_to_deltaSFR_cc_SFR79obs_{datetime.now().strftime("%Y.%m.%d-%H.%M.%S")}.png')