In [1]:
allcols = set()
allcolsls = []

In [2]:
import numpy as np
import pandas as pd
import pylab as plt
from tqdm.auto import tqdm
from astropy.io import ascii
import warnings
import os

In [3]:
import sys

sys.path.append("../")
import sidhelpers

## 1. See how many spectra (%) can be opened by Astropy

In [4]:
# Read spectra metadata
spectra_info = pd.read_csv("../1. download ALL wise data/wiserep_spectra_combined.csv")

# keep p60 only
# spectra_info = spectra_info[spectra_info["Telescope"] == "P60"]

# spectra_info = pd.read_csv("../1. download ALL wise data/wiserep_spectra_combined.csv")
# spectra_info = spectra_info.iloc[:500]

In [5]:
spectra_info["Obj. Type"].value_counts()

Obj. Type
SN Ia         26168
SN II          6925
SN IIn         2328
SN Ic          1854
SN IIP         1835
              ...  
TDE-H-He          2
NA/Unknown        2
Light-Echo        1
SN Ien            1
Blazar            1
Name: count, Length: 65, dtype: int64

In [6]:
filenames = spectra_info["Ascii file"].values

In [7]:
# df = ascii.read(f"../1. download ALL wise data/wiserep_data/spectra/{fn}").to_pandas()

In [8]:
# spectra_info["Spec. units"].value_counts().index[0]

In [9]:
# most efficient way to iterate
# https://stackoverflow.com/questions/7837722/what-is-the-most-efficient-way-to-loop-through-dataframes-with-pandas

flux_counter = 0
readerror_counter = 0
errs = []
captured_warnings = []

for fn, wl_unit, spec_unit, flux_ucoeff, lambda_min, lambda_max, del_lambda in tqdm(
    zip(
        spectra_info["Ascii file"],
        spectra_info["WL Units"],
        spectra_info["Spec. units"],
        spectra_info["Flux Unit Coefficient"],
        spectra_info["Lambda-min"],
        spectra_info["Lambda-max"],
        spectra_info["Del-Lambda"],
    ),
    total=len(spectra_info),
):
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always")

        try:
            # --- File Reading ---
            df = sidhelpers.read_spectra(
                f"../1. download ALL wise data/wiserep_data/spectra/{fn}"
            )
            allcols.update(df.columns)
            allcolsls = allcolsls + list(df.columns)

            # --- Validation ---
            # Put check_spectrafile inside the 'with' block as well,
            # as it can also generate warnings (e.g., unit conversion).
            sidhelpers.check_spectrafile(df, wl_unit, spec_unit, lambda_min, lambda_max, del_lambda)

        except Exception as e:
            # Handle errors as before (capture or count based on type)
            # Apply the fix for error message checking we discussed previously:
            if "Error 11." in str(e): # Check for flux unit errors (11.1 or 11.2)
                flux_counter += 1
            elif isinstance(e, ValueError) and "Error 2." in str(e): # Check for read errors (2.1 or 2.2)
                 readerror_counter += 1
            # Catch all other validation errors
            errs.append([fn, e])
            # Continue to the next file even if an error occurs
            continue

    for w in warning_list:
        captured_warnings.append({
            'spectra_fn': fn,
            'warning_category': w.category.__name__,
            'warning_message': str(w.message),
            'source_file': os.path.basename(w.filename), # Get just the filename part
            'source_lineno': w.lineno
        })


  0%|          | 0/54005 [00:00<?, ?it/s]

In [10]:
# Convert errors to DataFrame (as before)
errdf = pd.DataFrame(errs, columns=["Ascii file", "error"])
if not errdf.empty:
    errdf = errdf.set_index("Ascii file")
    errdf.to_csv("errdf_19april.csv")
    print(f"Saved {len(errdf)} errors to errdf_18april.csv")
else:
    print("No errors encountered.")

Saved 8943 errors to errdf_18april.csv


In [11]:
# Convert captured warnings to DataFrame
warnings_df = pd.DataFrame(captured_warnings)
if not warnings_df.empty:
    warnings_df.to_csv("warnings_log_19april.csv", index=False)
    print(f"Saved {len(warnings_df)} warnings to warnings_log_18april.csv")
else:
    print("No warnings captured.")



In [12]:
df_unique = pd.DataFrame({"columns": list(allcols)})
df_unique.to_csv("unique_columns_ALL_19april.csv", index=False)
# df_unique

In [13]:
print(f"Summary Counts:")
print(f"\nFiles Processed: {len(spectra_info)}")
print(f"Total Validation Errors (logged in errdf): {len(errdf)}") # Estimate based on errdf content
print(f"-\tRead Errors (counted): {readerror_counter}") # Note: This counts Error 2.1/2.2 specifically
print(f"-\tFlux Unit Validation Errors (counted): {flux_counter}")
print(f"\nWarnings Captured (logged in warnings_df): {len(warnings_df)}")


Summary Counts:

Files Processed: 54005
Total Validation Errors (logged in errdf): 8943
-	Read Errors (counted): 975
-	Flux Unit Validation Errors (counted): 5241



In [14]:
# import astropy.units as u
# import astropy.constants as const
# from astropy.units import equivalencies

# # Define the flux in mJy
# flux_mJy = 1.0 * u.mJy  # Example value of 1 mJy

# # To convert between frequency and wavelength units, we need to specify the wavelength
# wavelength = 5500 * u.AA  # Example: optical wavelength at 5500 Angstroms

# # Create the equivalency between frequency and wavelength flux densities
# # This accounts for the non-linear relationship between frequency and wavelength
# fnu_to_flambda = u.spectral_density(wavelength)

# # Do the conversion
# flux_flambda = flux_mJy.to(u.erg / u.s / u.cm**2 / u.AA, equivalencies=fnu_to_flambda)

# print(f"Flux density at {wavelength}:")
# print(f"{flux_mJy} = {flux_flambda}")