In [1]:
import numpy as np
import pandas
import pyperclip

What this code does:

1. Load data files
2. Compute gas masses of galaxies
3. Compute stellar masses of galaxies
4. Compute halo virulent masses of galaxies
5. Save masses to a CSV file

# 1. Load data files

In [2]:
# load data from Hill papers into pandas.DataFrame objects
# set index to be the (C1, C3, C6, ...) labels
df_Hill_2020 = pandas.read_fwf('data_from_papers/Hill_2020.csv').set_index('label')
df_Hill_2021 = pandas.read_fwf('data_from_papers/Hill_2021.csv').set_index('label')

In [3]:
# construct a dictionary mapping from the old lettered
# labels (A, B, C, ...) to the new Cx labels (C1, C3, C6, ...)
# uses contents of Hill_2020.csv file
label_map = {k:v for v,k in df_Hill_2020['label2'][~df_Hill_2020['label2'].isna()].iteritems()}
label_map

{'A': 'C1',
 'J': 'C2',
 'B': 'C3',
 'D': 'C4',
 'F': 'C5',
 'C': 'C6',
 'K': 'C7',
 'E': 'C8',
 'I': 'C9',
 'H': 'C10',
 'L': 'C11',
 'G': 'C13',
 'N': 'C14',
 'M': 'C17'}

In [4]:
# load data from Miller paper into pandas.DataFrame object
df_Miller_2018 = pandas.read_fwf('data_from_papers/Miller_2018.csv')
# add a new column to the Miller data with the new (C1, C3, C6, ...) labels
df_Miller_2018['label'] = df_Miller_2018['label2'].map(label_map)
# set the index of the Miller DataFrame to be the (C1, C3, C6, ...) labels
df_Miller_2018 = df_Miller_2018.set_index('label')

In [5]:
# combine all the data into a single pandas.DataFrame object
# nested column names segregate based on data source
df = pandas.concat((df_Hill_2020, df_Hill_2021, df_Miller_2018), keys=('Hill_2020', 'Hill_2021', 'Miller_2018'), axis=1)

In [6]:
# check that all of the old labels (A, B, C, ...) line up in the data from each paper
assert(all(df['Hill_2020', 'label2'].isna() | (df['Hill_2020', 'label2'] == df['Hill_2021', 'label2']) & (df['Hill_2020', 'label2'] == df['Miller_2018', 'label2'])))

# df.loc[:,[('Hill_2020', 'label2'), ('Hill_2021', 'label2'), ('Miller_2018', 'label2')]]

In [7]:
# add a column to the Hill_2020 data with the method used for computing gas estimates
# all values Hill reported were computed using the CO34 method
# method: https://stackoverflow.com/questions/67477744/pandas-numpy-where-and-numpy-nan/
df['Hill_2020', 'method'] = np.where(df['Hill_2020', 'Mgas'].notna(), 'CO43', None)
df['Hill_2020', 'method'].fillna(np.nan, inplace=True) # convert None to NaN

In [8]:
# compute percent uncertainties, add as columns to df
df['Hill_2020', 'pu_Mdyn'] = df['Hill_2020', 'u_Mdyn'] / df['Hill_2020', 'Mdyn']
df['Hill_2020', 'pu_Mgas'] = df['Hill_2020', 'u_Mgas'] / df['Hill_2020', 'Mgas']
df['Hill_2021', 'pu_M*'] = df['Hill_2021', 'u_M*'] / df['Hill_2021', 'M*']
df['Miller_2018', 'pu_Mgas'] = df['Hill_2020', 'u_Mgas'] / df['Miller_2018', 'Mgas']

In [9]:
# compute ratio of Miller's Mgas to Hill's Mgas, add as a column to df
df['derived', 'Miller/Hill'] = df['Miller_2018', 'Mgas'] / df['Hill_2020', 'Mgas']

In [10]:
# let's look at what we've got
df

Unnamed: 0_level_0,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,...,Miller_2018,Miller_2018,Miller_2018,Miller_2018,Hill_2020,Hill_2020,Hill_2020,Hill_2021,Miller_2018,derived
Unnamed: 0_level_1,label2,Mdyn,u_Mdyn,Mgas,u_Mgas,FCii,u_FCii,FCO43,u_FCO43,SFR,...,Mdyn,u_Mgas,V,method,method,pu_Mdyn,pu_Mgas,pu_M*,pu_Mgas,Miller/Hill
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
C1,A,27.0,1.1,7.5,0.7,16.86,0.2,0.98,0.03,980.0,...,115.0,2.1,-90.0,CO43,CO43,0.040741,0.093333,0.935135,0.058333,1.6
C2,J,6.9,0.3,2.1,0.2,8.82,0.13,0.27,0.02,200.0,...,23.0,0.5,-481.0,CO43,CO43,0.043478,0.095238,0.353846,0.090909,1.047619
C3,B,9.9,1.0,4.3,0.4,7.89,0.12,0.56,0.02,840.0,...,84.0,2.0,-124.0,CO43,CO43,0.10101,0.093023,,0.035714,2.604651
C4,D,13.8,1.7,3.0,0.3,5.9,0.15,0.38,0.02,540.0,...,175.0,1.5,-33.0,CO43,CO43,0.123188,0.1,,0.035714,2.8
C5,F,18.0,1.5,1.1,0.2,5.19,0.15,0.14,0.02,400.0,...,124.0,0.7,395.0,CO43,CO43,0.083333,0.181818,,0.058824,3.090909
C6,C,3.7,0.2,3.4,0.3,5.16,0.07,0.44,0.02,590.0,...,14.0,1.2,603.0,CO43,CO43,0.054054,0.088235,0.309353,0.044776,1.970588
C7,K,3.6,1.4,1.0,0.1,3.76,0.09,0.13,0.01,78.0,...,15.0,1.4,631.0,Cii,CO43,0.388889,0.1,0.75,0.032258,3.1
C8,E,2.1,0.2,2.4,0.3,3.68,0.12,0.31,0.02,460.0,...,24.0,0.9,84.0,CO43,CO43,0.095238,0.125,0.5,0.0625,2.0
C9,I,10.8,0.8,0.9,0.1,3.35,0.11,0.12,0.02,170.0,...,53.0,0.5,310.0,CO43,CO43,0.074074,0.111111,,0.045455,2.444444
C10,H,3.6,0.4,1.1,0.2,2.96,0.1,0.14,0.02,180.0,...,44.0,2.0,-719.0,Cii,CO43,0.111111,0.181818,0.777778,0.045455,4.0


# 2. Gas masses

## Boost Hill's gas masses to agree with Miller's gas masses

Hill's gas masses are likely underestimates since Hill chose $\alpha_{CO} = 1$ (p. 13). Instead, $\alpha_{CO}$ is probably around 2 or 3. We boost all of Hill's gas estimates by a common factor to correct for this.

To estimate the boost factor, we take the mean ratio of Miller's data to Hill's data. When computing this boost factor, we only consider galaxies for which we have good mass estimates from Hill and Miller based on $CO_{3-4}$ emission (as opposed to the less-reliable $[C\,II]$ method).

In [11]:
# find indeces in df where both Miller's and Hill's gas mass estimates were computed from CO34 emission
indices_Mill_Hill_CO = (df['Miller_2018', 'method'] == 'CO43') & (df['Hill_2020', 'method'] == 'CO43')
# find mean ratio of Miller's gas masses to Hill's, for these galaxies
Miller_Hill_ratio = df.loc[indices_Mill_Hill_CO, ('derived', 'Miller/Hill')].mean()
print('Miller_Hill_ratio:', Miller_Hill_ratio)

Miller_Hill_ratio: 2.1947764976321746


In [12]:
# add a new column to df that is Hill's gas measurements, but boosted by this factor
df['Hill_2020', 'Mgas_boosted'] = df['Hill_2020', 'Mgas'] * Miller_Hill_ratio
df

Unnamed: 0_level_0,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,Hill_2020,...,Miller_2018,Miller_2018,Miller_2018,Hill_2020,Hill_2020,Hill_2020,Hill_2021,Miller_2018,derived,Hill_2020
Unnamed: 0_level_1,label2,Mdyn,u_Mdyn,Mgas,u_Mgas,FCii,u_FCii,FCO43,u_FCO43,SFR,...,u_Mgas,V,method,method,pu_Mdyn,pu_Mgas,pu_M*,pu_Mgas,Miller/Hill,Mgas_boosted
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
C1,A,27.0,1.1,7.5,0.7,16.86,0.2,0.98,0.03,980.0,...,2.1,-90.0,CO43,CO43,0.040741,0.093333,0.935135,0.058333,1.6,16.460824
C2,J,6.9,0.3,2.1,0.2,8.82,0.13,0.27,0.02,200.0,...,0.5,-481.0,CO43,CO43,0.043478,0.095238,0.353846,0.090909,1.047619,4.609031
C3,B,9.9,1.0,4.3,0.4,7.89,0.12,0.56,0.02,840.0,...,2.0,-124.0,CO43,CO43,0.10101,0.093023,,0.035714,2.604651,9.437539
C4,D,13.8,1.7,3.0,0.3,5.9,0.15,0.38,0.02,540.0,...,1.5,-33.0,CO43,CO43,0.123188,0.1,,0.035714,2.8,6.584329
C5,F,18.0,1.5,1.1,0.2,5.19,0.15,0.14,0.02,400.0,...,0.7,395.0,CO43,CO43,0.083333,0.181818,,0.058824,3.090909,2.414254
C6,C,3.7,0.2,3.4,0.3,5.16,0.07,0.44,0.02,590.0,...,1.2,603.0,CO43,CO43,0.054054,0.088235,0.309353,0.044776,1.970588,7.46224
C7,K,3.6,1.4,1.0,0.1,3.76,0.09,0.13,0.01,78.0,...,1.4,631.0,Cii,CO43,0.388889,0.1,0.75,0.032258,3.1,2.194776
C8,E,2.1,0.2,2.4,0.3,3.68,0.12,0.31,0.02,460.0,...,0.9,84.0,CO43,CO43,0.095238,0.125,0.5,0.0625,2.0,5.267464
C9,I,10.8,0.8,0.9,0.1,3.35,0.11,0.12,0.02,170.0,...,0.5,310.0,CO43,CO43,0.074074,0.111111,,0.045455,2.444444,1.975299
C10,H,3.6,0.4,1.1,0.2,2.96,0.1,0.14,0.02,180.0,...,2.0,-719.0,Cii,CO43,0.111111,0.181818,0.777778,0.045455,4.0,2.414254


## Estimate gas masses based on Hill's Cii when CO43 flux is too weak

Hill (2020) calculated gas masses based on the measured $CO_{3-4}$ flux FCO43, but sometimes this flux was too weak to get a good result. We can estimate what the gas mass should have been by:
1. Coming up with a scaling factor relating the $[C\,II]$ flux FCii and the $CO_{3-4}$ flux FCO43.
2. Using the scaling factor to estimate what the $CO_{3-4}$ flux should have been based on the $[C\,II]$ flux.
3. Estimating the gas mass using the scaling factor that relates gas mass to $CO_{3-4}$ flux.

Note: we want to compute what the gas estimate should be *after* we have applied the boost above.

In [13]:
# find the indices where Hill had sufficient CO43 flux to estimate gas mass
indices_Hill_CO43 = df['Hill_2020', 'method'] == 'CO43'

# add column of ratio of CO43 flux to Cii flux
# leave values as NaN if CO43 flux was too small to estimate gas mass
df['Hill_2020', 'CO43/Cii'] = np.where(indices_Hill_CO43, 
                                       df['Hill_2020', 'FCO43'] / df['Hill_2020', 'FCii'],
                                       np.nan)
# compute mean ratio for galaxies where CO43 flux was large enough to compute gas mass
ratio_CO43_Cii = df['Hill_2020', 'CO43/Cii'].mean()

# add column of ratio of Mgas to CO43 flux
# this is discussed in the paper, but it is more convenient to grab it from the table
# leave values as NaN if CO43 flux was too small to estimate gas mass
df['Hill_2020', 'Mgas/CO43'] = np.where(indices_Hill_CO43, 
                                       df['Hill_2020', 'Mgas'] / df['Hill_2020', 'FCO43'],
                                       np.nan)
# compute mean ratio for galaxies where CO43 flux was large enough to compute gas mass
ratio_Mgas_CO43 = df['Hill_2020', 'Mgas/CO43'].mean()

# add column of Mgas estimates based on Cii
df['Hill_2020', 'Mgas_Cii'] = Miller_Hill_ratio * ratio_Mgas_CO43 * ratio_CO43_Cii * df['Hill_2020', 'FCii']
# observe that we also apply the boost from above, to bring in line with Miller's values


In [14]:
df['Hill_2020', 'Mgas/CO43']

label
C1     7.653061
C2     7.777778
C3     7.678571
C4     7.894737
C5     7.857143
C6     7.727273
C7     7.692308
C8     7.741935
C9     7.500000
C10    7.857143
C11    7.500000
C12         NaN
C13    8.000000
C14    6.666667
C15         NaN
C16         NaN
C17         NaN
C18         NaN
C19         NaN
C20         NaN
C21         NaN
C22         NaN
C23         NaN
Name: (Hill_2020, Mgas/CO43), dtype: float64

In [15]:
6.4 / np.nan

nan

## Choose "final" gas mass

For the simulation gas masses, we will use the boosted Hill estimates as these are considered more reliable than Miller's. We will use CO43 where available and Cii otherwise.

In [16]:
# use CO43 estimates where Hill was able to use this method, use Cii estimates otherwise
df['final', 'Mgas'] = np.where(
    df['Hill_2020', 'method'] == 'CO43', # condition: method was CO43
    df['Hill_2020', 'Mgas_boosted'], # result when Mgas calculated from CO43
    df['Hill_2020', 'Mgas_Cii']) # result when Mgas calculated from Cii
df['final', 'Mgas_method'] = np.where(
    df['Hill_2020', 'method'] == 'CO43', # condition: method was CO43
    'CO43', # result when Mgas calculated from CO43
    'Cii') # result when Mgas calculated from Cii
df['final', 'Mgas_source'] = 'Hill_2020'

# check that we have Mgas results for all galaxies
assert(all(df['final', 'Mgas'].notna()))

In [17]:
df['final']

Unnamed: 0_level_0,Mgas,Mgas_method,Mgas_source
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C1,16.460824,CO43,Hill_2020
C2,4.609031,CO43,Hill_2020
C3,9.437539,CO43,Hill_2020
C4,6.584329,CO43,Hill_2020
C5,2.414254,CO43,Hill_2020
C6,7.46224,CO43,Hill_2020
C7,2.194776,CO43,Hill_2020
C8,5.267464,CO43,Hill_2020
C9,1.975299,CO43,Hill_2020
C10,2.414254,CO43,Hill_2020


## Save gas masses to LaTeX table

In [18]:
# let's sort the DataFrame by the paper name
df = df.sort_index(axis=1, level=0)

In [19]:
# let's have a look at all of the gas mass estimates we have
summary_gas = df.loc[:,[('Hill_2020', 'Mgas'), ('Hill_2020', 'Mgas_boosted'), ('Hill_2020', 'Mgas_Cii'), ('Miller_2018', 'Mgas'), ('Miller_2018', 'method')]]
summary_gas

Unnamed: 0_level_0,Hill_2020,Hill_2020,Hill_2020,Miller_2018,Miller_2018
Unnamed: 0_level_1,Mgas,Mgas_boosted,Mgas_Cii,Mgas,method
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
C1,7.5,16.460824,13.625205,12.0,CO43
C2,2.1,4.609031,7.127776,2.2,CO43
C3,4.3,9.437539,6.376208,11.2,CO43
C4,3.0,6.584329,4.768013,8.4,CO43
C5,1.1,2.414254,4.194236,3.4,CO43
C6,3.4,7.46224,4.169991,6.7,CO43
C7,1.0,2.194776,3.038598,3.1,Cii
C8,2.4,5.267464,2.973947,4.8,CO43
C9,0.9,1.975299,2.707262,2.2,CO43
C10,1.1,2.414254,2.392088,4.4,Cii


In [20]:
summary_gas.to_latex('table_gas_mass/table_generated.tex', float_format='%.1f', na_rep=' ', bold_rows=True)

# 3. Stellar masses

Where Hill (2021) has good estimates of stellar mass, we use these, and otherwise we use Doug's conversion factor to compute stellar masses from gas masses.

In [21]:
# scale factor: ratio of stellar mass to gas mass
# assumes a gas mass fraction of 0.7
# derived in Rennehan 2020
ratio_stellar_gas = 0.428

In [22]:
# the maximum fractional uncertainty we allow in Hill's stellar masses
# stellar masses with more than this uncertainty will be rejected;
# we'll use Doug's conversion factor for those galaxies
max_stellar_pu = 0.6

In [23]:
# use Hill's estimates where Hill had little uncertainty, use scale factor otherwise
df['final', 'M*'] = np.where(
    df['Hill_2021', 'pu_M*'] <= max_stellar_pu, # condition: fractional uncertainty small
    df['Hill_2021', 'M*'], # small uncertainty: use Hill 2021
    ratio_stellar_gas * df['final', 'Mgas']) # large uncertainty: use scale factor
df['final', 'M*_method'] = np.where(
    df['Hill_2021', 'pu_M*'] <= max_stellar_pu, # condition: fractional uncertainty small
    'SEDfit', # small uncertainty: use Hill 2021
    'scale') # large uncertainty: use scale factor
df['final', 'M*_source'] = np.where(
    df['Hill_2021', 'pu_M*'] <= max_stellar_pu, # condition: fractional uncertainty small
    'Hill_2021', # small uncertainty: use Hill 2021
    df['final', 'Mgas_source']) # large uncertainty: use scale factor

# check that we have M* results for all galaxies
assert(all(df['final', 'M*'].notna()))

# 4. Halo masses

We follow Rennehan 2018 in assuming Mvir = 100 M*, where Mvir is the virulent mass of the dark matter halo.

In [24]:
ratio_halo_stellar = 100.
df['final', 'Mvir'] = ratio_halo_stellar * df['final', 'M*']

In [25]:
df['final']

Unnamed: 0_level_0,Mgas,Mgas_method,Mgas_source,M*,M*_method,M*_source,Mvir
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C1,16.460824,CO43,Hill_2020,7.045233,scale,Hill_2020,704.523256
C2,4.609031,CO43,Hill_2020,6.5,SEDfit,Hill_2021,650.0
C3,9.437539,CO43,Hill_2020,4.039267,scale,Hill_2020,403.926667
C4,6.584329,CO43,Hill_2020,2.818093,scale,Hill_2020,281.809302
C5,2.414254,CO43,Hill_2020,1.033301,scale,Hill_2020,103.330078
C6,7.46224,CO43,Hill_2020,13.9,SEDfit,Hill_2021,1390.0
C7,2.194776,CO43,Hill_2020,0.939364,scale,Hill_2020,93.936434
C8,5.267464,CO43,Hill_2020,5.0,SEDfit,Hill_2021,500.0
C9,1.975299,CO43,Hill_2020,0.845428,scale,Hill_2020,84.542791
C10,2.414254,CO43,Hill_2020,1.033301,scale,Hill_2020,103.330078


# 5. Save to CSV

In [26]:
df['final'].to_csv('galaxy_masses.csv')