In [8]:
# Fetch Host Redshift from TNS for each object and append to ztf_cleansed.csv
from pathlib import Path
import pandas as pd
import requests
from bs4 import BeautifulSoup

project_root = Path.cwd().parent
folder_name = input("Enter the run folder name: ").strip()
run_folder = project_root / "runs" / folder_name
if not run_folder.exists():
    raise FileNotFoundError(f"Run folder not found: {run_folder}")

ztf_cleansed_path = project_root / "ztf_cleansed.csv"
ztf_df = pd.read_csv(ztf_cleansed_path)
print(f"Loaded {len(ztf_df)} objects from ztf_cleansed.csv")

Loaded 9 objects from ztf_cleansed.csv


In [9]:
# HTTP headers required by TNS
TNS_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
}

def get_tns_id(iauid):
    s = str(iauid).strip()
    if s.upper().startswith("SN"):
        return s[2:]
    return s

def _parse_field_value(html_text, field_class):
    """Extract value from TNS field div. E.g. field_class='host_redshift' or 'redshift'.
    TNS uses <div class=\"field field-{name}\"><span class=\"name\">...</span><div class=\"value\"><b>0.022</b></div></div>
    """
    soup = BeautifulSoup(html_text, "html.parser")
    field = soup.find("div", class_=lambda c: c and field_class in (c if isinstance(c, str) else " ".join(c)))
    if field:
        value_div = field.find("div", class_="value")
        if value_div:
            return value_div.get_text(strip=True)
    return None

def parse_host_redshift(html_text):
    """Extract Host Redshift from TNS object page; fall back to Redshift if missing."""
    z = _parse_field_value(html_text, "host_redshift")
    if z is not None:
        return z
    return _parse_field_value(html_text, "redshift")

session = requests.Session()
session.headers.update(TNS_HEADERS)
host_redshifts = []
num_total = len(ztf_df)

for idx, row in enumerate(ztf_df.itertuples(index=False), 1):
    tns_id = get_tns_id(row.IAUID)
    obj_url = f"https://www.wis-tns.org/object/{tns_id}"
    print(f"[{idx}/{num_total}] {row.ZTFID} (TNS {tns_id})...", end=" ")
    try:
        resp = session.get(obj_url, timeout=30)
        resp.raise_for_status()
    except requests.RequestException as e:
        print(f"Failed: {e}")
        host_redshifts.append(None)
        continue
    z = parse_host_redshift(resp.text)
    host_redshifts.append(z)
    print(z if z else "—")

ztf_df["host_redshift"] = host_redshifts
ztf_df.to_csv(ztf_cleansed_path, index=False)
print(f"\nAppended host_redshift to {ztf_cleansed_path}")

[1/9] ZTF19aaagfme (TNS 2022okv)... —
[2/9] ZTF19aaairqh (TNS 2019bp)... —
[3/9] ZTF19aaarhtg (TNS 2020pki)... —
[4/9] ZTF19aabmybj (TNS 2019on)... —
[5/9] ZTF19aabvfwn (TNS 2019jf)... —
[6/9] ZTF19aabyheu (TNS 2019kf)... —
[7/9] ZTF19aabyppp (TNS 2019kg)... —
[8/9] ZTF19aabyuze (TNS 2019ro)... —
[9/9] ZTF19aacgslb (TNS 2019np)... Failed: 429 Client Error: Too Many Requests for url: https://www.wis-tns.org/object/2019np

Appended host_redshift to /Users/david/Code/msc/ztf_cleansed.csv
