In [1]:
import pandas as pd
import re
import numpy as np

# Define a Function for Reading in the .DAT Files #

In [2]:
def read_snana_dat_table(path):
    with open(path, 'r') as f:
        lines = f.readlines()

    # Find the line with "VARLIST:" and extract variable names
    varlist_line = None  # Initialize line index to None

    for i, line in enumerate(lines):
        if line.strip().startswith("VARLIST:"):
            varlist_line = i
            # Remove "VARLIST:" prefix and split the remaining string by whitespace
            # to get a list of column names for the DataFrame
            varnames = line.strip().replace("VARLIST:", "").split()
            break

    # Read data lines between VARLIST and END_PHOTOMETRY
    # and store them into a data_lines list
    data_lines = []
    for line in lines[varlist_line + 1:]:
        # Breaks the code when we reach the end of the data file. 
        if line.strip().startswith("END_PHOTOMETRY"):
            break
        if line.strip().startswith("OBS:"):
            # Remove "OBS:" and keep the rest
            # strips leading and trailing whitespace
            data_lines.append(re.sub(r'^OBS:\s*', '', line.strip()))

    # Converts the data_lines list into a DataFrame
    # Split each data line into a list of values
    # Create a DataFrame using the split rows and column names

    split_rows = [line.split() for line in data_lines]
    df = pd.DataFrame(split_rows, columns=varnames)

    # Convert numerical columns to float (when its possible)
    # Remember: the .dat file is read as strings.
    for col in df.columns:
        try:
            df[col] = pd.to_numeric(df[col])
        except ValueError:
            pass
            print(f"Column '{col}' could not be converted to numeric, keeping as string.")

    return df


# Read in the NOSCATTER DATA #

In [3]:
# Load SNANA .DAT photometry table
noscatter_dat = read_snana_dat_table('/Users/pittsburghgraduatestudent/repos/pippin_learn/MC_BAYSN_NOSCATTER/MC_BAYSN_NOSCATTER_SN000001.DAT')
print(noscatter_dat.head())
noscatter_dat

Column 'BAND' could not be converted to numeric, keeping as string.
Column 'FIELD' could not be converted to numeric, keeping as string.
          MJD    BAND FIELD   FLUXCAL  FLUXCALERR  PHOTFLAG  GAIN    ZPT  \
0  61794.0825  LSST-r   DDF  132721.0      5.2012      4096   1.0  36.73   
1  61795.0967  LSST-g   DDF  168515.0      2.7386      6144   1.0  38.38   
2  61795.1033  LSST-i   DDF  109874.0      1.5482      4096   1.0  39.16   
3  61795.1130  LSST-r   DDF  150376.0      1.7502      4096   1.0  39.23   
4  61800.1048  LSST-r   DDF  199163.0      6.4314      4096   1.0  36.71   

    PSF  SKY_SIG  SIM_MAGOBS  
0  1.60    243.8     14.6835  
1  1.59    332.6     14.4213  
2  1.38   1156.0     14.8982  
3  1.49    786.2     14.5610  
4  2.02    246.5     14.2556  


Unnamed: 0,MJD,BAND,FIELD,FLUXCAL,FLUXCALERR,PHOTFLAG,GAIN,ZPT,PSF,SKY_SIG,SIM_MAGOBS
0,61794.0825,LSST-r,DDF,132721.0,5.2012,4096,1.0,36.73,1.6,243.8,14.6835
1,61795.0967,LSST-g,DDF,168515.0,2.7386,6144,1.0,38.38,1.59,332.6,14.4213
2,61795.1033,LSST-i,DDF,109874.0,1.5482,4096,1.0,39.16,1.38,1156.0,14.8982
3,61795.113,LSST-r,DDF,150376.0,1.7502,4096,1.0,39.23,1.49,786.2,14.561
4,61800.1048,LSST-r,DDF,199163.0,6.4314,4096,1.0,36.71,2.02,246.5,14.2556
5,61800.1287,LSST-g,DDF,219159.0,7.1556,4096,1.0,36.58,2.03,148.5,14.1452
6,61802.0967,LSST-g,DDF,218760.0,3.1493,4096,1.0,38.36,1.89,334.2,14.1539
7,61802.1033,LSST-i,DDF,111339.0,1.5759,4096,1.0,39.14,1.73,1166.0,14.8767
8,61802.113,LSST-r,DDF,199301.0,2.0444,4096,1.0,39.2,2.13,790.1,14.246
9,61808.0995,LSST-r,DDF,162949.0,3.9417,4096,1.0,37.56,1.52,674.6,14.4763


# Read in the PLUSSCATTER DATA #

In [4]:
# Load SNANA .DAT photometry table
plusscatter_dat = read_snana_dat_table('/Users/pittsburghgraduatestudent/repos/pippin_learn/MC_BAYSN_PLUSSCATTER/MC_BAYSN_PLUSSCATTER_SN000001.DAT')
print(plusscatter_dat.head())
plusscatter_dat

Column 'BAND' could not be converted to numeric, keeping as string.
Column 'FIELD' could not be converted to numeric, keeping as string.
          MJD    BAND FIELD   FLUXCAL  FLUXCALERR  PHOTFLAG  GAIN    ZPT  \
0  61794.0825  LSST-r   DDF  147745.0      5.4869      4096   1.0  36.73   
1  61795.0967  LSST-g   DDF  186588.0      2.8815      6144   1.0  38.38   
2  61795.1033  LSST-i   DDF  115119.0      1.5844      4096   1.0  39.16   
3  61795.1130  LSST-r   DDF  163372.0      1.8241      4096   1.0  39.23   
4  61800.1048  LSST-r   DDF  204407.0      6.5153      4096   1.0  36.71   

    PSF  SKY_SIG  SIM_MAGOBS  
0  1.60    243.8     14.5788  
1  1.59    332.6     14.3153  
2  1.38   1156.0     14.8429  
3  1.49    786.2     14.4724  
4  2.02    246.5     14.2208  


Unnamed: 0,MJD,BAND,FIELD,FLUXCAL,FLUXCALERR,PHOTFLAG,GAIN,ZPT,PSF,SKY_SIG,SIM_MAGOBS
0,61794.0825,LSST-r,DDF,147745.0,5.4869,4096,1.0,36.73,1.6,243.8,14.5788
1,61795.0967,LSST-g,DDF,186588.0,2.8815,6144,1.0,38.38,1.59,332.6,14.3153
2,61795.1033,LSST-i,DDF,115119.0,1.5844,4096,1.0,39.16,1.38,1156.0,14.8429
3,61795.113,LSST-r,DDF,163372.0,1.8241,4096,1.0,39.23,1.49,786.2,14.4724
4,61800.1048,LSST-r,DDF,204407.0,6.5153,4096,1.0,36.71,2.02,246.5,14.2208
5,61800.1287,LSST-g,DDF,232146.0,7.3643,4096,1.0,36.58,2.03,148.5,14.0964
6,61802.0967,LSST-g,DDF,223239.0,3.1813,4096,1.0,38.36,1.89,334.2,14.1218
7,61802.1033,LSST-i,DDF,120822.0,1.6409,4096,1.0,39.14,1.73,1166.0,14.8054
8,61802.113,LSST-r,DDF,204698.0,2.0718,4096,1.0,39.2,2.13,790.1,14.2166
9,61808.0995,LSST-r,DDF,169415.0,4.0186,4096,1.0,37.56,1.52,674.6,14.423


# Isolate only the G Band from Both the Scatter and PLus Scatter #

In [5]:
# Filter only g-band using a mask (LSST-g)
plsusscatter_g = plusscatter_dat[plusscatter_dat['BAND'] == 'LSST-g']
print(plsusscatter_g)

noscatter_g = noscatter_dat[noscatter_dat['BAND'] == 'LSST-g']
print(noscatter_g)

           MJD    BAND FIELD   FLUXCAL  FLUXCALERR  PHOTFLAG  GAIN    ZPT  \
1   61795.0967  LSST-g   DDF  186588.0      2.8815      6144   1.0  38.38   
5   61800.1287  LSST-g   DDF  232146.0      7.3643      4096   1.0  36.58   
6   61802.0967  LSST-g   DDF  223239.0      3.1813      4096   1.0  38.36   
11  61813.0993  LSST-g   DDF  116657.0      2.1083      4096   1.0  38.57   
15  61817.0707  LSST-g   DDF   87102.8      2.0080      4096   1.0  38.34   
19  61822.1866  LSST-g   DDF   58621.5      4.6023      4096   1.0  36.12   
21  61824.0354  LSST-g   DDF   51425.5      1.5143      4096   1.0  38.38   
24  61827.0407  LSST-g   DDF   43186.0      3.1369      4096   1.0  36.61   
26  61832.0284  LSST-g   DDF   33616.5      1.2084      4096   1.0  38.42   

     PSF  SKY_SIG  SIM_MAGOBS  
1   1.59    332.6     14.3153  
5   2.03    148.5     14.0964  
6   1.89    334.2     14.1218  
11  1.64   1387.0     14.8267  
15  2.14    336.4     15.1493  
19  3.04    139.1     15.5747  
21  1

### **Standard Deviation Formula**

$\huge \sigma = \sqrt{\frac{\sum_{i=1}^{N} (x_i - \mu)^2}{N}}$

Where:

- **$\sigma$** = population standard deviation  
- **$N$** = size of the population  
- **$x_i$** = each value in the population  
- **$\mu$** = population mean

### **RMS Scatter of Fluxes**


$\huge \sigma_{\text{scatter}} = \sqrt{\frac{1}{N}\sum_{i=1}^{N}(F_{+,i} - F_{-,i})^2}$  

Where: 

- **$\sigma_{\text{scatter}}$** = RMS deviation between scatter and no-scatter fluxes  
- **$N$** = number of observations (data points)  
- **$F_{+,i}$** = flux with scatter for the $i^{\text{th}}$ observation  
- **$F_{-,i}$** = flux without scatter for the $i^{\text{th}}$ observation  

**Conceptually:** We are measuring: how far the scatter simulation deviates from the no-scatter (baseline) fluxes.

In [6]:
sigma_scatter_g = np.sqrt(np.mean((plsusscatter_g['FLUXCAL'] - noscatter_g['FLUXCAL'])**2))
print(f"Standard Deviation of Scatter g-band: {sigma_scatter_g}")

Standard Deviation of Scatter g-band: 9618.830917817173


# For Loop that Automates the Above For Each Band g, r, i #

In [10]:
bands = ["LSST-g", "LSST-r", "LSST-i"]

for band in bands:

    # Filter the specific band
    plus_band = plusscatter_dat[plusscatter_dat['BAND'] == band].reset_index(drop=True)
    no_band = noscatter_dat[noscatter_dat['BAND'] == band].reset_index(drop=True)

    # Compute RMS scatter
    sigma_scatter = np.sqrt(np.mean((plus_band['FLUXCAL'] - no_band['FLUXCAL'])**2))

    print(f"RMS Scatter for {band}: {sigma_scatter:.4f}")

RMS Scatter for LSST-g: 9618.8309
RMS Scatter for LSST-r: 7889.7502
RMS Scatter for LSST-i: 8666.9915


# CHECK THE AV AND THETA Values #

Starting with a slightly modified read_dump_file, which is similar to `read_snana_dat_table` but this time there is no `END PHOTOMETRY` delimeter that terminates the file.

In [None]:

def read_dump_file(path):

    with open(path, 'r') as f:
        lines = f.readlines()

    # Find the line with VARNAMES
    varnames_line = None
    for i, line in enumerate(lines):
        if line.startswith("VARNAMES:"):
            varnames_line = i
            varnames = line.replace("VARNAMES:", "").strip().split()
            break

    # Extract all data lines that start with "SN:" (simulation entries)
    data_lines = []
    for line in lines[varnames_line + 1:]:
        if line.startswith("SN:"):
            # Remove the "SN:" prefix, strip leading/trailing spaces, 
            # and split the remaining text into separate values
            cleaned_line = line.replace("SN:", "").strip().split()
            data_lines.append(cleaned_line)

    # Load into DataFrame
    df = pd.DataFrame(data_lines, columns=varnames)

    # Convert numeric columns
    for col in df.columns:
        try:
            df[col] = pd.to_numeric(df[col])
        except ValueError:
            pass

    return df

In [19]:
# ---- Load both DUMP files ----
noscatter_df = read_dump_file("/Users/pittsburghgraduatestudent/repos/pippin_learn/MC_BAYSN_NOSCATTER/MC_BAYSN_NOSCATTER.DUMP")
plusscatter_df = read_dump_file("/Users/pittsburghgraduatestudent/repos/pippin_learn/MC_BAYSN_PLUSSCATTER/MC_BAYSN_PLUSSCATTER.DUMP")
plusscatter_df

Unnamed: 0,CID,GENTYPE,SNTYPE,NON1A_INDEX,GENZ,LIBID,RA,DEC,MWEBV,MU,...,RV,THETA,DELTAM,PEAKMJD,SNRMAX,PEAKMAG_u,PEAKMAG_g,PEAKMAG_r,PEAKMAG_i,PEAKMAG_z
0,1,1,1,0,0.01,273,60.245327,-50.091457,0.006813,33.1753,...,2.61,0.554415,-9,61800.83,98802.9,-9,14.099,14.2128,14.765,-9
1,2,1,1,0,0.01,747,64.476082,-49.020351,0.015744,33.1752,...,2.61,-0.567146,-9,61816.105,106208.0,-9,14.0818,14.0742,14.7428,-9
2,3,1,1,0,0.01,907,63.167053,-49.799702,0.012889,33.1753,...,2.61,-1.02228,-9,61005.574,93976.7,-9,14.1967,14.3052,15.0011,-9
3,4,1,1,0,0.01,935,150.292969,1.716146,0.016677,33.1725,...,2.61,-1.70118,-9,61883.101,175257.0,-9,13.9598,14.053,14.8159,-9
4,5,1,1,0,0.01,936,53.876953,-27.11186,0.009305,33.1759,...,2.61,-0.678682,-9,61096.072,115757.0,-9,14.0387,14.0719,14.7896,-9
5,6,1,1,0,0.01,1143,52.294922,-28.29155,0.006838,33.1759,...,2.61,-0.261059,-9,61750.187,164658.0,-9,13.8833,14.0088,14.6915,-9
6,7,1,1,0,0.01,1157,8.814433,-44.498932,0.008977,33.1766,...,2.61,1.58577,-9,61704.436,131983.0,-9,14.3086,14.3643,14.9436,-9
7,8,1,1,0,0.01,1311,148.974609,0.596842,0.023197,33.1726,...,2.61,-0.679735,-9,61762.015,108136.0,-9,13.9527,14.022,14.649,-9
8,11,1,1,0,0.01,1529,63.75,-48.532253,0.010303,33.1753,...,2.61,0.458364,-9,61363.857,98268.7,-9,14.2029,14.283,14.8488,-9
9,12,1,1,0,0.01,1627,36.298828,-3.882372,0.025242,33.1768,...,2.61,0.201035,-9,61390.223,121624.0,-9,14.5168,14.5275,14.9899,-9


In [23]:
# ---- Merge on (CID) ----
# no and plus delimit the data sets. 
merged_df = noscatter_df.merge(
    plusscatter_df,
    on="CID",
    suffixes=("_no", "_plus")
)
# ---- Compare AV & THETA ----
# True if the values match, False otherwise
merged_df["AV_match"] = merged_df["AV_no"] == merged_df["AV_plus"]
merged_df["THETA_match"] = merged_df["THETA_no"] == merged_df["THETA_plus"]

# ---- Print summary ----
# # Count mismatches
# Note Had to invert because .sum() counts True as 1 and False as 0
n_mismatch_av = (~merged_df["AV_match"]).sum()
n_mismatch_theta = (~merged_df["THETA_match"]).sum()

print(f"AV mismatches: {n_mismatch_av}")
print(f"THETA mismatches: {n_mismatch_theta}")

merged_df

AV mismatches: 0
THETA mismatches: 0


Unnamed: 0,CID,GENTYPE_no,SNTYPE_no,NON1A_INDEX_no,GENZ_no,LIBID_no,RA_no,DEC_no,MWEBV_no,MU_no,...,DELTAM_plus,PEAKMJD_plus,SNRMAX_plus,PEAKMAG_u_plus,PEAKMAG_g_plus,PEAKMAG_r_plus,PEAKMAG_i_plus,PEAKMAG_z_plus,AV_match,THETA_match
0,1,1,1,0,0.01,273,60.245327,-50.091457,0.006813,33.1753,...,-9,61800.83,98802.9,-9,14.099,14.2128,14.765,-9,True,True
1,2,1,1,0,0.01,747,64.476082,-49.020351,0.015744,33.1752,...,-9,61816.105,106208.0,-9,14.0818,14.0742,14.7428,-9,True,True
2,3,1,1,0,0.01,907,63.167053,-49.799702,0.012889,33.1753,...,-9,61005.574,93976.7,-9,14.1967,14.3052,15.0011,-9,True,True
3,4,1,1,0,0.01,935,150.292969,1.716146,0.016677,33.1725,...,-9,61883.101,175257.0,-9,13.9598,14.053,14.8159,-9,True,True
4,5,1,1,0,0.01,936,53.876953,-27.11186,0.009305,33.1759,...,-9,61096.072,115757.0,-9,14.0387,14.0719,14.7896,-9,True,True
5,6,1,1,0,0.01,1143,52.294922,-28.29155,0.006838,33.1759,...,-9,61750.187,164658.0,-9,13.8833,14.0088,14.6915,-9,True,True
6,7,1,1,0,0.01,1157,8.814433,-44.498932,0.008977,33.1766,...,-9,61704.436,131983.0,-9,14.3086,14.3643,14.9436,-9,True,True
7,8,1,1,0,0.01,1311,148.974609,0.596842,0.023197,33.1726,...,-9,61762.015,108136.0,-9,13.9527,14.022,14.649,-9,True,True
8,11,1,1,0,0.01,1529,63.75,-48.532253,0.010303,33.1753,...,-9,61363.857,98268.7,-9,14.2029,14.283,14.8488,-9,True,True
9,12,1,1,0,0.01,1627,36.298828,-3.882372,0.025242,33.1768,...,-9,61390.223,121624.0,-9,14.5168,14.5275,14.9899,-9,True,True
