In [None]:
%load_ext autoreload
%autoreload 2

from get_data import (get_merged_table, get_bohlin78, get_shull2021)
from plot_fuse_results import plot_results_scatter, match_comments, plot_results_fit
from matplotlib import pyplot as plt
from astropy.table import Column

plt.ioff()

import matplotlib as mpl

mpl.rcParams['figure.dpi'] = 120
%matplotlib widget

# Get the data

### data for Gordon09 extinction curve sightlines

In [None]:
# data = get_merged_table()

# # add comments for certain stars here
# data.add_column(Column(['no'] * len(data), dtype='<U16', name='comment'))
# def set_comment(name, s):
#     data['comment'][data['Name'] == name] = s

# for name in ["HD200775", "HD164906", "HD045314", "HD206773"]:
#     set_comment(name, "lo_h_av")

# set_comment("HD096675", "hi_h_av")

# # write out in fits format so we can look at it in topcat
# data.write('merged_table.fits', overwrite=True)
# data

def set_comment(name, s):
    """Set the comment for a specific star to the string s."""
    data["comment"][data["Name"] == name] = s


# main data and comments to help marking some points
data = get_merged_table()
comp = get_merged_table(True)
data.add_column(Column(["none"] * len(data), dtype="<U16", name="comment"))

set_comment("HD096675", "hi_h_av")
# the 4 low outliers
for name in ["HD045314", "HD164906", "HD200775", "HD206773"]:
    set_comment(name, "lo_h_av")



### data for Gordon09 comparision star sightlines

In [None]:
data_comp = get_merged_table(comp=True)

### data for Bohlin 78 Copernicus sightlines

In [None]:
bohlin = get_bohlin78()

### Data for Shull 2021 sightlines

In [None]:
data_shull = get_shull2021()
data_shull
import numpy as np
i = np.argmax(data_shull['fh2'])
data_shull

# 1. Basic gas-to-dust slopes
## Gas-to-A(V)
- 4 strong outlieres in NH-AV relation. Including them breaks the fit. Effect on NH-E(B-V) is not that strong.
- Need to investigate these points, as they might also have a strong effect on the fit result of other plots.
- gas-to-A(V) narrower than gas-to-E(B-V)

In [None]:
from paper_scatter import finalize_double_grid
OUTPUT_TYPE = "pdf"
MARK4 = True # switch to enable marking of low NH/AV points
if MARK4:
    MARK_STRING = ["lo_h_av"]
else:
    MARK_STRING = None
fig, axs = plt.subplots(3, 3, sharey="row", sharex="col")
#fig.set_size_inches(paper_rcparams.base_width, paper_rcparams.base_width)

# use these variables, so we can easily swap column and rows around
# col = {"AV": 0, "EBV": 1, "A1000": 2}
middle = "A2900"
col = {"AV": 0, middle: 1, "A1000": 2}
row = {"nhtot": 0, "nhi": 1, "nh2": 2}

def choose_ax(x, y):
    return axs[row[y], col[x]]

ax = choose_ax("AV", "nhtot")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    "AV",
    "nhtot",
    # data_comp=comp,
    data_bohlin=bohlin,
    # ignore_comments=["lo_h_av", "hi_h_av"],
    report_rho=False,
)
out = np.where(match_comments(data, ["lo_h_av", "hi_h_av"]))[0]
r = plot_results_fit(
    xs, ys, covs, ax, report_rho=True, outliers=out, auto_outliers=True
)
# print("AV vs nhtot outliers: ", data['name'][

ax = choose_ax("AV", "nhi")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    "AV",
    "nhi",
    # data_comp=comp,
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
)
ax = choose_ax("AV", "nh2")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    "AV",
    "nh2",
    # data_comp=comp,
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
)

ax = choose_ax(middle, "nhtot")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    middle,
    "nhtot",
    # data_comp=comp,
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
    # ignore_comments=["hi_h_av"],
    report_rho=False,
)
r = plot_results_fit(xs, ys, covs, ax, outliers=out, auto_outliers=True, report_rho=True)

ax = choose_ax(middle, "nhi")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    middle,
    "nhi",
    # data_comp=comp,
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
)

ax = choose_ax(middle, "nh2")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    middle,
    "nh2",
    # data_comp=comp,
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
)

ax = choose_ax("A1000", "nhtot")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    "A1000",
    "nhtot",
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
)

ax = choose_ax("A1000", "nhi")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    "A1000",
    "nhi",
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
)

ax = choose_ax("A1000", "nh2")
xs, ys, covs = plot_results_scatter(
    ax,
    data,
    "A1000",
    "nh2",
    data_bohlin=bohlin,
    mark_comments=MARK_STRING,
    report_rho=False,
)
r = plot_results_fit(
    xs,
    ys,
    covs,
    ax,
    auto_outliers=True,
    fit_includes_outliers=True,
    report_rho=True,
)
for ax in axs[1:, 0]:
    ax.yaxis.offsetText.set_visible(False)

axs[0][0].legend(bbox_to_anchor=(1.5, 1), loc="lower center", ncol=4)

fig.tight_layout()
finalize_double_grid(fig, axs, "column_vs_column_a2900")


In [None]:
plt.figure()
xs, ys, covs = plot_results_scatter(
        plt.gca(),
        data,
        "AV",
        "nhtot",
        # data_comp=comp,
        # ignore_comments=["lo_h_av", "hi_h_av"],
        report_rho=True,
)
plt.show()

In [None]:
fig1 = plot_results2(data, 'AV', 'denhtot', pxrange=[0.0,3.5], pyrange=[0.0,0.8e22],
            data_comp=data_comp, data_bohlin=data_bohlin78)
fig2 = plot_results2(data, 'AV', 'denhtot', pxrange=[0.0,3.5], pyrange=[0.0,0.8e22],
            data_comp=data_comp, data_bohlin=data_bohlin78, ignore_comments=['lo_h_av', 'hi_h_av'])

In [None]:
_ = plot_results2(data, 'nhtot', 'nh2')

In [None]:
_ = plot_results2(data, 'denhtot', 'denh2')

The number densities look related, but might not be as significant, because of the common distance factor, which  probably has a rather high uncertainty.

## Gas-to-E(B-V)


In [None]:
fig1 = plot_results2(data, 'EBV', 'nhtot', pyrange=[0.0,0.8e22],
            data_comp=data_comp, data_bohlin=data_bohlin78, data_shull=data_shull)
fig2 = plot_results2(data, 'EBV', 'nhtot', pyrange=[0.0,0.8e22],
            data_comp=data_comp, data_bohlin=data_bohlin78, data_shull=data_shull, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])

In [None]:
_ = plot_results2(data, 'd', 'nhtot', pyrange=[0.0,0.8e22],
           ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])

# 2. Gas-to-dust vs other things

## VS dust column and gas column

Before making any big conlusions here, the correlation between NH / AV and AV needs to be implemented properly. Preliminarily, we have:

### Using A(V)
The bad points mentioned above seem to pull down NH/AV - AV pretty badly. Not including them makes the downward slope insignificant. 

With them included:

NH-AV goes down with AV, but up with NH! Despite the fact that AV and NH have a well-defined slope in the plots above!

In [None]:
fig1 = plot_results2(data, 'AV', 'NH_AV', pyrange=[0.0,0.5e22], pxrange=[0,4],
            data_comp=data_comp,data_bohlin=data_bohlin78, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])
fig1 = plot_results2(data, 'AV', 'NH_AV', pyrange=[0.0,0.5e22], pxrange=[0,4],
            data_comp=data_comp,data_bohlin=data_bohlin78, ignore_comments=['hi_h_av', 'lo_h_av'])
#fig2 = plot_results2(data, 'nhtot', 'NH_AV', pyrange=[0.0,0.5e22], pxrange=[0, 1.2e22],
 #           data_comp=data_comp,data_bohlin=data_bohlin78, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])

### Using E(B-V)

In this case, the slope of NH / EBV is unclear, but NH / EBV does sem to go up with EBV!

In [None]:
fig1 = plot_results2(data, 'EBV', 'NH_EBV', pyrange=[0.0,1.5e22], pxrange=[-.1,1],
            data_comp=data_comp,data_bohlin=data_bohlin78, data_shull=data_shull, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])
fig2 = plot_results2(data, 'nhtot', 'NH_EBV', pyrange=[0.0,1.5e22], pxrange=[0,1.2e22],
            data_comp=data_comp,data_bohlin=data_bohlin78, data_shull=data_shull, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])

## VS average grain size (RV)

In [None]:
_ = plot_results2(data, 'RV', 'NH_AV', pxrange=[2.5,6.0], pyrange=[0.0,0.5e22],
            data_comp=data_comp, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])

In [None]:
data['1_RV'].data

In [None]:
_ = plot_results2(data, '1_RV', 'NH_AV', pyrange=[0.0,0.5e22],
            data_comp=data_comp, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])

## VS density (NH)

In [None]:
_ = plot_results2(data, 'denhtot', 'NH_AV', pyrange=[0.0,0.5e22],
            data_comp=data_comp, ignore_comments=['hi_h_av','lo_h_av'], mark_comments=['lo_h_av'])
_ = plot_results2(data, 'nhi', 'NH_AV', pyrange=[0.0,0.5e22],
            data_comp=data_comp, ignore_comments=['hi_h_av','lo_h_av'], mark_comments=['lo_h_av'])

## VS molecular fraction (fH2)
### Using A(V)

In [None]:
_ = plot_results2(data, 'fh2', 'NH_AV', pxrange=[0.0,0.7], pyrange=[0.0,1e22],
            data_comp=data_comp, data_bohlin=data_bohlin78, ignore_comments=['hi_h_av', 'lo_h_av'], mark_comments=['lo_h_av'])

### Using E(B-V)

In [None]:
_ = plot_results2(data, 'fh2', 'NH_EBV', pxrange=[0.0,0.7], pyrange=[0.0,1.5e22],
            data_comp=data_comp, data_bohlin=data_bohlin78, data_shull=data_shull, ignore_comments=['hi_h_av'], mark_comments=['lo_h_av'])

# 3. Molecular fraction vs others 

E(B-V) seems a better predictor of fh2 than A(V)

## VS E(B-V) dust column

In [None]:
_ = plot_results2(data, 'EBV', 'fh2',
            data_comp=data_comp, data_bohlin=data_bohlin78, data_shull=data_shull, mark_comments=['lo_h_av'])

## VS A(V) dust column

In [None]:
_ = plot_results2(data, 'AV', 'fh2', data_comp=data_comp, data_bohlin=data_bohlin78, mark_comments=['hi_h_av', 'lo_h_av'])

## VS average grain size (RV)

In [None]:
_ = plot_results2(data, 'RV', 'fh2', pxrange=[2.0,6.0], pyrange=[0, 1], data_comp=data_comp, 
                  ignore_comments=['lo_h_av'], mark_comments=['hi_h_av'])

In [None]:
_ = plot_results2(data, '1_RV', 'fh2', pxrange=[.1, .5], pyrange=[0, 1], data_comp=data_comp, 
                  ignore_comments=['lo_h_av'], 
                  mark_comments=['hi_h_av','lo_h_av'])

# 4. Basic extinction relations

In [None]:
_ = plot_results2(data, 'EBV', 'RV', mark_comments=['lo_h_av'])

In [None]:
_ = plot_results2(data, 'AV', 'RV', mark_comments=['lo_h_av'])

# Other ideas

longitude versus rv and distance versus rv (and versus fh2)

In [None]:
_ = plot_results2(data, 'denhtot', 'fh2', mark_comments=['lo_h_av'])

In [None]:
_ = plot_results2(data, 'denhtot', '1_RV', mark_comments=['lo_h_av'], data_comp=data_comp)

COS FUV needs 2x10-13 ergs/blah blah

E140H for CO (maybe E140M), S/N 15...

E(B-V)/distance versus fH2.  Should also do A(V)/d.  Maybe n(H) as well.