In [1]:
import copy
import glob
import h5py
import itertools
import numpy as np
import os
import pandas as pd
import scipy
import scipy.interpolate
import tqdm

In [2]:
import contextlib
import io
import sys

In [3]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import palettable

In [4]:
import yt
import trident
import unyt as u

In [5]:
import kalepy as kale

In [6]:
import verdict
import trove

# Parameters

In [7]:
# Load parameters
pm = trove.link_params_to_config(
    '/Users/zhafen/repos/cgm_modeling_challenge/sample2.trove',
    script_id = 'nb.1',
)

No data directory at /Users/zhafen/repos/cgm_modeling_challenge/data/trove/sample2/individual_lsfs
Creating one.


In [8]:
# Analysis parameters
seed = pm['seed']
np.random.seed( seed )
rng = np.random.default_rng( seed )
load_existing_sightlines = True
verbose = False

In [9]:
extracted_sightlines_dir = './data/Mandelker2020'

In [10]:
ray_data_dir = os.path.join( pm['data_dir'], 'rays' )
os.makedirs( ray_data_dir, exist_ok=True )
spectra_data_dir = os.path.join( pm['data_dir'], 'spectra' )
os.makedirs( spectra_data_dir, exist_ok=True )

In [11]:
redshift = pm['redshift']

In [12]:
# Spectra parameters
ions = [
    'H I',
    'O I',
    'C II',
    'C III',
    'C IV',
    'N II',
    'N III',
    'Si II',
    'Si III',
    'Si IV',
    'N V',
    'O VI',
    'Mg II'
]
fields = [
    'H_p0_number_density', 
    'O_p0_number_density',
    'C_p1_number_density',
    'C_p2_number_density',
    'C_p3_number_density',
    'N_p1_number_density',
    'N_p2_number_density',
    'Si_p1_number_density',
    'Si_p2_number_density',
    'Si_p3_number_density',
    'N_p4_number_density',
    'O_p5_number_density',
    'Mg_p1_number_density'
]
snr = 30

In [13]:
bonus_ions = pm['bonus_ions']
bonus_ions_lsf_fwhm = pm['bonus_ions_lsf_fwhm']

# Setup Spectrum Generator and Line Database

In [14]:
# Objects for use
ldb = trident.LineDatabase(None)

read_sets: Using set file -- 
  /Users/zhafen/repos/linetools/linetools/lists/sets/llist_v1.3.ascii
Loading abundances from Asplund2009
Abundances are relative by number on a logarithmic scale with H=12


In [15]:
sg_cos = trident.SpectrumGenerator('COS-G130M', line_database=ldb )
sg_cos_160 = trident.SpectrumGenerator('COS-G160M', line_database=ldb )

yt : [INFO     ] 2022-02-08 11:50:52,410 Setting instrument to COS-G130M


yt : [INFO     ] 2022-02-08 11:50:52,413 Setting instrument to COS-G160M


In [16]:
# Spectrum Generator for Mg II from ground
lambda_mg = ldb.select_lines( 'Mg', 'II', 2796 )[0].wavelength * ( 1. + redshift )
sg_mg = trident.SpectrumGenerator(
    lambda_min = lambda_mg - 30.,
    lambda_max = lambda_mg + 30.,
    dlambda = 0.01,
    line_database = ldb,
)
lambda_mg *= u.angstrom

yt : [INFO     ] 2022-02-08 11:50:52,419 Setting instrument to Custom


In [17]:
spectrum_sg_tags = [ '_G130', '_G160', '_MgII']
sg_lambdas = [ None, None, lambda_mg ]

In [18]:
sgs_bonus_ions = []
sgs_bonus_ions_tags = []
for element, ion_state in tqdm.tqdm( bonus_ions, bar_format=pm['bar_format'] ):
    lines = ldb.select_lines( element, ion_state )
    i_line = np.argmax([ _.f_value for _ in lines ])
    lambda_ion = lines[i_line].wavelength * ( 1. + redshift )
    sg_ion = trident.SpectrumGenerator(
        lambda_min = lambda_ion - 40.,
        lambda_max = lambda_ion + 40.,
        dlambda = 0.01,
        line_database = ldb,
    )
    sgs_bonus_ions.append( sg_ion )
    sgs_bonus_ions_tags.append( '_{}-{}-{:.1f}'.format( element, ion_state, lines[i_line].wavelength ) )
    sg_lambdas.append( lambda_ion * u.angstrom )

       0%|          | 0/0 [00:00<?, ?it/s]

       0%|          | 0/0 [00:00<?, ?it/s]




In [19]:
if pm['include_individual_HI_lines']:

    # Add Hydrogen
    lines = ldb.select_lines( 'H', 'I' )
    for line in lines:
        if line.wavelength < 916.:
            continue
        lambda_line = line.wavelength * ( 1. + redshift )
        sg_line = trident.SpectrumGenerator(
            lambda_min = lambda_line - 40.,
            lambda_max = lambda_line + 40.,
            dlambda = 0.01,
            line_database = ldb,
        )

        sgs_bonus_ions.append( sg_line )
        sgs_bonus_ions_tags.append( '_H-I-{:.1f}'.format( line.wavelength ) )
        sg_lambdas.append( lambda_line * u.angstrom )

In [20]:
sgs = [ sg_cos, sg_cos_160, sg_mg ] + sgs_bonus_ions
spectrum_sg_tags += sgs_bonus_ions_tags

In [21]:
# Replace error functions with versions that give small errors
if pm['flux_error'] is not None:
    for sg in sgs:
        sg.error_func = lambda x: x * pm['flux_error']

In [22]:
# Choose lsfs
sg_lsfs = [ None, None, 7., ] + [ bonus_ions_lsf_fwhm, ] * len( sgs_bonus_ions )

In [23]:
def plot_ion( sg, element, ion_state, width=6. ):
    '''Save a plot of a particular part of the spectrum for inspection.'''
    
    lines = ldb.select_lines( element, ion_state )
    
    wavelengths = np.array([ _.wavelength for _ in lines ])
    adjusted_wavelengths = wavelengths * ( 1 + redshift )
    
    data_subdir = '{}/ion_spectra/{}{}'.format( pm['data_dir'], element, ion_state, )
    os.makedirs( data_subdir, exist_ok=True )
    for k, lambda_a in enumerate( adjusted_wavelengths ):
        if lambda_a - width/2. < sg.lambda_min or lambda_a + width/2 > sg.lambda_max:
            continue
            
        spectrum_fp = '{}/spectrum_{:.1f}_sl{:04d}.{}'.format( data_subdir, lambda_a, i, '{}' )
        sg.plot_spectrum(
            spectrum_fp.format( 'png' ),
            lambda_limits = [ lambda_a - width/2, lambda_a + width/2 ]
        )
        sg.save_spectrum(
            spectrum_fp.format( 'h5' ),
        )

# Generate Rays and Spectra

In [24]:
# Sightline filepaths
sightline_fps = glob.glob( os.path.join( extracted_sightlines_dir, '*', '*' ) )

In [25]:
# Waaaay too much output otherwise
yt.utilities.logger.disable_stream_logging()

In [26]:
for i, sightline_fp in enumerate( sightline_fps ):
    
    if pm['selected_sightlines'] is not None:
        if i not in pm['selected_sightlines']:
            continue

    print( '{} / {}'.format( i, len( sightline_fps ) ) )
    
    # Load data
    columns = [ 'x', 'density', 'temperature', 'metallicity', 'velocity_los' ]
    units = [ 'kpc', 'g/cm**3', 'K', 'Zsun', 'km/s' ]
    ray_df = pd.read_csv( sightline_fp, sep='\s+', names=columns )

    # Convert metallicity to Zsun
    ray_df['metallicity'] /= 0.02

    # Turn into a dictionary for creating a yt dataset
    ray_dict = {}
    for j, key in enumerate( columns ):

        # We add this separately
        if key == 'x':
            continue

        ray_dict[key] = yt.YTArray( ray_df[key].values, units[j] )

    # Get sightline location in space
    locstring = os.path.split( os.path.split( sightline_fp )[0] )[1]
    _, x1, y1, z1, _, x2, y2, z2 = locstring.split( '_' )
    
    # Check order
    if x1 > x2:
        x1, x2 = x2, x1
    if y1 > y2:
        y1, y2 = y2, y1
    if z1 > z2:
        z1, z2 = z2, z1
    
    # Forma
    start = np.array([ x1, y1, z1 ]).astype( float )
    end = np.array([ x2, y2, z2 ]).astype( float )

    # Code to physical conversion
    length_code_units = np.linalg.norm( end - start )
    length_kpc = ray_df['x'].values[-1] - ray_df['x'].values[0]
    position_code_units_to_kpc = length_kpc / length_code_units

    # Add dl
    dl = ray_df['x'][1] - ray_df['x'][0]
    # ray_dict['dl'] = dl * u.kpc
    ray_dict['dl'] = yt.YTArray(
        np.full( ray_df['x'].shape, dl ),
        'kpc',
    )

    # Add location parameters
    for j, key in enumerate( [ 'x', 'y', 'z' ]):
        ray_dict[key] = yt.YTArray(
            np.linspace( start[j], end[j], ray_df['x'].size ) * position_code_units_to_kpc,
            'kpc',
        )
        dxi_value = ray_dict[key][1] - ray_dict[key][0]
        ray_dict['d' + key] = yt.YTArray(
            np.full( ray_dict[key].shape, dxi_value ),
            'kpc',
        )

    # Add redshift parameters
    ray_dict['redshift'] = np.full( ray_dict['density'].shape, redshift )
    z_vel = np.sqrt( ( 1 + ray_dict['velocity_los'] / u.c) / ( 1 - ray_dict['velocity_los'] / u.c) ) - 1.
    ray_dict['redshift_eff'] = ( 1. + redshift )*( 1. + z_vel ) - 1.

    # Other needed terms
    extra_attrs = {"data_type": "yt_light_ray", "dimensionality": 3}
    field_types = dict([(field, "grid") for field in ray_dict.keys()])

    # Format dataset dict
    ds = {
        "current_time": 0.,
        "current_redshift": 0.,
        "cosmological_simulation": 0.,
        "domain_left_edge": np.array([ 0., 0., 0. ]) * position_code_units_to_kpc * u.kpc,
        "domain_right_edge": np.array([ 1., 1., 1. ]) * position_code_units_to_kpc * u.kpc,
        "periodicity": [True]*3,
    }

    # Save as a dataset
    ray_filename = os.path.join( ray_data_dir, 'ray_{:03d}.h5'.format( i ) )
    yt.save_as_dataset(
        ds,
        ray_filename,
        ray_dict,
        field_types = field_types,
        extra_attrs = extra_attrs,
    )
    
    # Make spectra only for selected sightlines
    if i not in pm['selected_sightlines']:
        continue

    # Reload
    ray = yt.load( ray_filename )

    # temporary fix for yt-4.0 ytdata selection issue
    ray.domain_left_edge = ray.domain_left_edge.to('code_length')
    ray.domain_right_edge = ray.domain_right_edge.to('code_length')

    trident.add_ion_fields(ray, ions=ions, line_database=ldb)

    # Loop through spectra generators
    for m, sg in enumerate( tqdm.tqdm( sgs, bar_format=pm['bar_format'] ) ):
        
        sg = copy.copy( sg )

        # Without the redirection the logging may still happen despite asking it not to (nicely).... Not sure why
        f = io.StringIO()
        with contextlib.redirect_stderr( f ):
            # Make the spectrum
            sg.make_spectrum( ray, lines=ions, store_observables=True, min_tau=1e-4 )
            
        # Plot individual ions for inspection
        for ion in ions:
            plot_ion( sg, *ion.split() )
            
        # LSF
        if pm['use_avg_lsf']:
            sg.apply_lsf( filename='avg_COS.txt' ) 
        else:
            lsf = sg_lsfs[m]
            if lsf is None:
                sg.apply_lsf()
            else:
                fwhm_v = lsf * u.km / u.s
                fwhm_lambda = sg_lambdas[m] * fwhm_v / u.c
                width_lambda = fwhm_lambda / ( 2 * np.sqrt( 2. * np.log( 2 ) ) )
                bin_width = width_lambda / sg.dlambda
                sg.apply_lsf( function='gaussian', width=bin_width )
        
        # Noise
        sg.add_gaussian_noise( snr )

        # Save
        sg.save_spectrum(
            '{}/spectrum{}_sl{:04d}.h5'.format( spectra_data_dir, spectrum_sg_tags[m], i )
        )
        sg.plot_spectrum(
            '{}/spectrum{}_sl{:04d}.png'.format( spectra_data_dir, spectrum_sg_tags[m], i )
        )

3 / 20


       0%|          | 0/3 [00:00<?, ?it/s]

      33%|███▎      | 1/3 [00:21<00:42, 21.32s/it]

      67%|██████▋   | 2/3 [00:41<00:20, 20.55s/it]

     100%|██████████| 3/3 [01:01<00:00, 20.28s/it]

     100%|██████████| 3/3 [01:01<00:00, 20.43s/it]




In [27]:
# Plot ray properties
for i, sightline_fp in enumerate( tqdm.tqdm( sightline_fps, bar_format=pm['bar_format'] ) ):
    
    if pm['selected_sightlines'] is not None:
        if i not in pm['selected_sightlines']:
            continue

    # Load data
    columns = [ 'x', 'density', 'temperature', 'metallicity', 'velocity_los' ]
    units = [ 'kpc', 'g/cm**3', 'K', 'Zsun', 'km/s' ]
    ray_df = pd.read_csv( sightline_fp, sep='\s+', names=columns )

    # Convert metallicity to Zsun
    ray_df['metallicity'] /= 0.02
    
    # Create a column density column
    ray_df['column_density'] = (
        yt.YTArray( ray_df['density'].values, 'g/cm**3' ) *
        ( ray_df['x'][1] - ray_df['x'][0] ) * u.kpc
    ).to( 'mp/cm**2.' )
    columns.append( 'column_density' )
    units.append( 'mp/cm**2' )

    # Plot
    y_values = columns[1:]
    n_rows = len( y_values )
    fig = plt.figure( figsize=(12, 6*n_rows), facecolor='w' )
    ax = plt.gca()

    gs = gridspec.GridSpec(n_rows,1)

    for j, key in enumerate( columns[1:] ):
        ax = plt.subplot( gs[j,:] )

        ax.plot(
            ray_df['x'],
            ray_df[key],
            color = 'k',
            linewidth = 3,
        )

        if key != 'velocity_los':
            ax.set_yscale( 'log' )

        ax.set_ylabel( '{} [{}]'.format( key, units[j+1]), fontsize=22 )
        ax.set_xlabel( 'x [kpc]', fontsize=22 )

        ax.set_xlim( ray_df['x'].min(), ray_df['x'].max() )

        ax.tick_params(
            labelsize = 14,
            size = 10,
            width = 2,
        )
        ax.tick_params(
            which = 'minor',
            size = 5,
            width = 1.5,
        )

    savefile = os.path.join( pm['data_dir'], 'rays', 'ray_{:03d}.png'.format( i ) )
    plt.savefig(
        savefile,
        bbox_inches = 'tight',
    )
    
    plt.close()

       0%|          | 0/20 [00:00<?, ?it/s]

      20%|██        | 4/20 [00:01<00:04,  3.45it/s]

     100%|██████████| 20/20 [00:01<00:00, 17.25it/s]


