In [1]:
import numpy as np
import pandas
import pyperclip

## Load data files

In [2]:
# load data from Hill papers into pandas.DataFrame objects
# set index to be the (C1, C3, C6, ...) labels
df_Hill_2020 = pandas.read_fwf('data_from_papers/Hill_2020.csv').set_index('label')
df_Hill_2021 = pandas.read_fwf('data_from_papers/Hill_2021.csv').set_index('label')

In [3]:
# construct a dictionary mapping from the old lettered
# labels (A, B, C, ...) to the new Cx labels (C1, C3, C6, ...)
# uses contents of Hill_2020.csv file
label_map = {k:v for v,k in df_Hill_2020['label2'][~df_Hill_2020['label2'].isna()].iteritems()}
label_map

{'A': 'C1',
 'J': 'C2',
 'B': 'C3',
 'D': 'C4',
 'F': 'C5',
 'C': 'C6',
 'K': 'C7',
 'E': 'C8',
 'I': 'C9',
 'H': 'C10',
 'L': 'C11',
 'G': 'C13',
 'N': 'C14',
 'M': 'C17'}

In [4]:
# load data from Miller paper into pandas.DataFrame object
df_Miller_2018 = pandas.read_fwf('data_from_papers/Miller_2018.csv')
# add a new column to the Miller data with the new (C1, C3, C6, ...) labels
df_Miller_2018['label'] = df_Miller_2018['label2'].map(label_map)
# set the index of the Miller DataFrame to be the (C1, C3, C6, ...) labels
df_Miller_2018 = df_Miller_2018.set_index('label')

In [5]:
# combine all the data into a single pandas.DataFrame object
# nested column names segregate based on data source
df = pandas.concat((df_Hill_2020, df_Hill_2021, df_Miller_2018), keys=('Hill_2020', 'Hill_2021', 'Miller_2018'), axis=1)

In [6]:
# check that all of the old labels (A, B, C, ...) line up in the data from each paper
assert(all(df['Hill_2020', 'label2'].isna() | (df['Hill_2020', 'label2'] == df['Hill_2021', 'label2']) & (df['Hill_2020', 'label2'] == df['Miller_2018', 'label2'])))

# df.loc[:,[('Hill_2020', 'label2'), ('Hill_2021', 'label2'), ('Miller_2018', 'label2')]]

In [7]:
# add a column to the Hill_2020 data with the method used for computing gas estimates
# all values Hill reported were computed using the CO34 method
# method: https://stackoverflow.com/questions/67477744/pandas-numpy-where-and-numpy-nan/
df['Hill_2020', 'method'] = np.where(df['Hill_2020', 'Mgas'].notna(), 'CO43', None)
df['Hill_2020', 'method'].fillna(np.nan, inplace=True) # convert None to NaN

In [8]:
# compute percent uncertainties, add as columns to df
df['Hill_2020', 'pu_Mdyn'] = df['Hill_2020', 'u_Mdyn'] / df['Hill_2020', 'Mdyn']
df['Hill_2020', 'pu_Mgas'] = df['Hill_2020', 'u_Mgas'] / df['Hill_2020', 'Mgas']
df['Hill_2021', 'pu_M*'] = df['Hill_2021', 'u_M*'] / df['Hill_2021', 'M*']
df['Miller_2018', 'pu_Mgas'] = df['Hill_2020', 'u_Mgas'] / df['Miller_2018', 'Mgas']

In [9]:
# compute ratio of Miller's Mgas to Hill's Mgas, add as a column to df
df['derived', 'Miller/Hill'] = df['Miller_2018', 'Mgas'] / df['Hill_2020', 'Mgas']

In [10]:
# let's look at what we've got
df

Unnamed: 0_level_0,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2021,...,Miller_2018,Miller_2018,Miller_2018,Miller_2018,Hill_2020,Hill_2020,Hill_2020,Hill_2021,Miller_2018,derived
Unnamed: 0_level_1,label2,Mdyn,u_Mdyn,Mgas,u_Mgas,FCii,u_FCii,FCO43,u_FCO43,label2,...,Mdyn,u_Mgas,V,method,method,pu_Mdyn,pu_Mgas,pu_M*,pu_Mgas,Miller/Hill
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
C1,A,27.0,1.1,7.5,0.7,16.86,0.2,0.98,0.03,A,...,115.0,2.1,-90.0,CO43,CO43,0.040741,0.093333,0.935135,0.058333,1.6
C2,J,6.9,0.3,2.1,0.2,8.82,0.13,0.27,0.02,J,...,23.0,0.5,-481.0,CO43,CO43,0.043478,0.095238,0.353846,0.090909,1.047619
C3,B,9.9,1.0,4.3,0.4,7.89,0.12,0.56,0.02,B,...,84.0,2.0,-124.0,CO43,CO43,0.10101,0.093023,,0.035714,2.604651
C4,D,13.8,1.7,3.0,0.3,5.9,0.15,0.38,0.02,D,...,175.0,1.5,-33.0,CO43,CO43,0.123188,0.1,,0.035714,2.8
C5,F,18.0,1.5,1.1,0.2,5.19,0.15,0.14,0.02,F,...,124.0,0.7,395.0,CO43,CO43,0.083333,0.181818,,0.058824,3.090909
C6,C,3.7,0.2,3.4,0.3,5.16,0.07,0.44,0.02,C,...,14.0,1.2,603.0,CO43,CO43,0.054054,0.088235,0.309353,0.044776,1.970588
C7,K,3.6,1.4,1.0,0.1,3.76,0.09,0.13,0.01,K,...,15.0,1.4,631.0,Cii,CO43,0.388889,0.1,0.75,0.032258,3.1
C8,E,2.1,0.2,2.4,0.3,3.68,0.12,0.31,0.02,E,...,24.0,0.9,84.0,CO43,CO43,0.095238,0.125,0.5,0.0625,2.0
C9,I,10.8,0.8,0.9,0.1,3.35,0.11,0.12,0.02,I,...,53.0,0.5,310.0,CO43,CO43,0.074074,0.111111,,0.045455,2.444444
C10,H,3.6,0.4,1.1,0.2,2.96,0.1,0.14,0.02,H,...,44.0,2.0,-719.0,Cii,CO43,0.111111,0.181818,0.777778,0.045455,4.0


## Boost Hill's gas masses to agree with Miller's gas masses

Hill's gas masses are likely underestimates since Hill chose $\alpha_{CO} = 1$ (p. 13). Instead, $\alpha_{CO}$ is probably around 2 or 3. We boost all of Hill's gas estimates by a common factor to correct for this.

To estimate the boost factor, we take the mean ratio of Miller's data to Hill's data. When computing this boost factor, we only consider galaxies for which we have good mass estimates from Hill and Miller based on $CO_{3-4}$ emission (as opposed to the less-reliable $[C\,II]$ method).

In [11]:
# find indeces in df where both Miller's and Hill's gas mass estimates were computed from CO34 emission
indices_Mill_Hill_CO = (df['Miller_2018', 'method'] == 'CO43') & (df['Hill_2020', 'method'] == 'CO43')
# find mean ratio of Miller's gas masses to Hill's, for these galaxies
Miller_Hill_ratio = df.loc[indices_Mill_Hill_CO, ('derived', 'Miller/Hill')].mean()
print('Miller_Hill_ratio:', Miller_Hill_ratio)

Miller_Hill_ratio: 2.1947764976321746


In [12]:
# add a new column to df that is Hill's gas measurements, but boosted by this factor
df['Hill_2020', 'Mgas_boosted'] = df['Hill_2020', 'Mgas'] * Miller_Hill_ratio
df

Unnamed: 0_level_0,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2021,...,Miller_2018,Miller_2018,Miller_2018,Hill_2020,Hill_2020,Hill_2020,Hill_2021,Miller_2018,derived,Hill_2020
Unnamed: 0_level_1,label2,Mdyn,u_Mdyn,Mgas,u_Mgas,FCii,u_FCii,FCO43,u_FCO43,label2,...,u_Mgas,V,method,method,pu_Mdyn,pu_Mgas,pu_M*,pu_Mgas,Miller/Hill,Mgas_boosted
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
C1,A,27.0,1.1,7.5,0.7,16.86,0.2,0.98,0.03,A,...,2.1,-90.0,CO43,CO43,0.040741,0.093333,0.935135,0.058333,1.6,16.460824
C2,J,6.9,0.3,2.1,0.2,8.82,0.13,0.27,0.02,J,...,0.5,-481.0,CO43,CO43,0.043478,0.095238,0.353846,0.090909,1.047619,4.609031
C3,B,9.9,1.0,4.3,0.4,7.89,0.12,0.56,0.02,B,...,2.0,-124.0,CO43,CO43,0.10101,0.093023,,0.035714,2.604651,9.437539
C4,D,13.8,1.7,3.0,0.3,5.9,0.15,0.38,0.02,D,...,1.5,-33.0,CO43,CO43,0.123188,0.1,,0.035714,2.8,6.584329
C5,F,18.0,1.5,1.1,0.2,5.19,0.15,0.14,0.02,F,...,0.7,395.0,CO43,CO43,0.083333,0.181818,,0.058824,3.090909,2.414254
C6,C,3.7,0.2,3.4,0.3,5.16,0.07,0.44,0.02,C,...,1.2,603.0,CO43,CO43,0.054054,0.088235,0.309353,0.044776,1.970588,7.46224
C7,K,3.6,1.4,1.0,0.1,3.76,0.09,0.13,0.01,K,...,1.4,631.0,Cii,CO43,0.388889,0.1,0.75,0.032258,3.1,2.194776
C8,E,2.1,0.2,2.4,0.3,3.68,0.12,0.31,0.02,E,...,0.9,84.0,CO43,CO43,0.095238,0.125,0.5,0.0625,2.0,5.267464
C9,I,10.8,0.8,0.9,0.1,3.35,0.11,0.12,0.02,I,...,0.5,310.0,CO43,CO43,0.074074,0.111111,,0.045455,2.444444,1.975299
C10,H,3.6,0.4,1.1,0.2,2.96,0.1,0.14,0.02,H,...,2.0,-719.0,Cii,CO43,0.111111,0.181818,0.777778,0.045455,4.0,2.414254


## Estimate gas masses based on Hill's Cii when CO43 flux is too weak

Hill (2020) calculated gas masses based on the measured $CO_{3-4}$ flux FCO43, but sometimes this flux was too weak to get a good result. We can estimate what the gas mass should have been by:
1. Coming up with a scaling factor relating the $[C\,II]$ flux FCii and the $CO_{3-4}$ flux FCO43.
2. Using the scaling factor to estimate what the $CO_{3-4}$ flux should have been based on the $[C\,II]$ flux.
3. Estimating the gas mass using the scaling factor that relates gas mass to $CO_{3-4}$ flux.

Note: we want to compute what the gas estimate should be *after* we have applied the boost above.

In [13]:
# add column of ratio of CO43 to Cii
df['Hill_2020', 'CO43/Cii'] = df['Hill_2020', 'FCO43'] / df['Hill_2020', 'FCii']
# compute mean ratio for galaxies where CO43 flux was large enough to compute gas mass
ratio_CO43_Cii = df.loc[df['Hill_2020', 'method'] == 'CO43', ('Hill_2020', 'CO43/Cii')].mean()

# add column of ratio of Mgas to CO43 flux
# this is discussed in the paper, but it is more convenient to grab it from the table
df['Hill_2020', 'Mgas/CO43'] = df['Hill_2020', 'Mgas'] / df['Hill_2020', 'FCO43']
# compute mean ratio for galaxies where CO43 flux was large enough to compute gas mass
ratio_Mgas_CO43 = df.loc[df['Hill_2020', 'method'] == 'CO43', ('Hill_2020', 'Mgas/CO43')].mean()

# add column of Mgas estimates based on CO43
df['Hill_2020', 'Mgas_CO43'] = Miller_Hill_ratio * ratio_Mgas_CO43 * ratio_CO43_Cii * df['Hill_2020', 'FCii']
# observe that we also apply the boost from above, to bring in line with Miller's values


In [14]:
# let's have a look at all of the gas mass estimates we have
summary_gas = df.loc[:,[('Hill_2020', 'Mgas'), ('Hill_2020', 'Mgas_boosted'), ('Hill_2020', 'Mgas_CO43'), ('Miller_2018', 'Mgas'), ('Miller_2018', 'method')]]

In [15]:
summary_gas.to_latex('table_gas_mass/table_generated.tex', float_format='%.1f', na_rep=' ', bold_rows=True)

In [16]:
df.loc['C11',:].T

Hill_2020    label2                 L
             Mdyn                 3.7
             u_Mdyn               0.3
             Mgas                 0.3
             u_Mgas               0.1
             FCii                 2.7
             u_FCii              0.11
             FCO43               0.04
             u_FCO43             0.01
Hill_2021    label2                 L
             M*                   2.0
             u_M*                 0.7
Miller_2018  label2                 L
             Mgas                 3.3
             Mdyn                24.0
             u_Mgas               1.5
             V                 -379.0
             method               Cii
Hill_2020    method              CO43
             pu_Mdyn         0.081081
             pu_Mgas         0.333333
Hill_2021    pu_M*               0.35
Miller_2018  pu_Mgas         0.030303
derived      Miller/Hill         11.0
Hill_2020    Mgas_boosted    0.658433
             CO43/Cii        0.014815
            

In [17]:
df = df.sort_index(axis=1, level=0)

In [18]:
df

Unnamed: 0_level_0,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,...,Hill_2021,Hill_2021,Miller_2018,Miller_2018,Miller_2018,Miller_2018,Miller_2018,Miller_2018,Miller_2018,derived
Unnamed: 0_level_1,CO43/Cii,FCO43,FCii,Mdyn,Mgas,Mgas/CO43,Mgas_CO43,Mgas_boosted,label2,method,...,pu_M*,u_M*,Mdyn,Mgas,V,label2,method,pu_Mgas,u_Mgas,Miller/Hill
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
C1,0.058126,0.98,16.86,27.0,7.5,7.653061,13.625205,16.460824,A,CO43,...,0.935135,17.3,115.0,12.0,-90.0,A,CO43,0.058333,2.1,1.6
C2,0.030612,0.27,8.82,6.9,2.1,7.777778,7.127776,4.609031,J,CO43,...,0.353846,2.3,23.0,2.2,-481.0,J,CO43,0.090909,0.5,1.047619
C3,0.070976,0.56,7.89,9.9,4.3,7.678571,6.376208,9.437539,B,CO43,...,,,84.0,11.2,-124.0,B,CO43,0.035714,2.0,2.604651
C4,0.064407,0.38,5.9,13.8,3.0,7.894737,4.768013,6.584329,D,CO43,...,,,175.0,8.4,-33.0,D,CO43,0.035714,1.5,2.8
C5,0.026975,0.14,5.19,18.0,1.1,7.857143,4.194236,2.414254,F,CO43,...,,,124.0,3.4,395.0,F,CO43,0.058824,0.7,3.090909
C6,0.085271,0.44,5.16,3.7,3.4,7.727273,4.169991,7.46224,C,CO43,...,0.309353,4.3,14.0,6.7,603.0,C,CO43,0.044776,1.2,1.970588
C7,0.034574,0.13,3.76,3.6,1.0,7.692308,3.038598,2.194776,K,CO43,...,0.75,1.8,15.0,3.1,631.0,K,Cii,0.032258,1.4,3.1
C8,0.084239,0.31,3.68,2.1,2.4,7.741935,2.973947,5.267464,E,CO43,...,0.5,2.5,24.0,4.8,84.0,E,CO43,0.0625,0.9,2.0
C9,0.035821,0.12,3.35,10.8,0.9,7.5,2.707262,1.975299,I,CO43,...,,,53.0,2.2,310.0,I,CO43,0.045455,0.5,2.444444
C10,0.047297,0.14,2.96,3.6,1.1,7.857143,2.392088,2.414254,H,CO43,...,0.777778,0.7,44.0,4.4,-719.0,H,Cii,0.045455,2.0,4.0


In [19]:
summary_gas

Unnamed: 0_level_0,Hill_2020,Hill_2020,Hill_2020,Miller_2018,Miller_2018
Unnamed: 0_level_1,Mgas,Mgas_boosted,Mgas_CO43,Mgas,method
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
C1,7.5,16.460824,13.625205,12.0,CO43
C2,2.1,4.609031,7.127776,2.2,CO43
C3,4.3,9.437539,6.376208,11.2,CO43
C4,3.0,6.584329,4.768013,8.4,CO43
C5,1.1,2.414254,4.194236,3.4,CO43
C6,3.4,7.46224,4.169991,6.7,CO43
C7,1.0,2.194776,3.038598,3.1,Cii
C8,2.4,5.267464,2.973947,4.8,CO43
C9,0.9,1.975299,2.707262,2.2,CO43
C10,1.1,2.414254,2.392088,4.4,Cii
