# 3. TNS host redshifts

Fetches **host redshift** (or redshift) from the [Transient Name Server](https://www.wis-tns.org/) for each object in the catalogue. Results are cached in `tns_data.csv` at the project root (ZTFID, IAUID, host_redshift) and merged into `ztf_cleansed.csv` as the column **host_redshift**.

In [3]:
# Fetch Host Redshift from TNS for each object and append to tns_data.csv
from pathlib import Path
import time
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

project_root = Path.cwd().parent
folder_name = input("Enter the run folder name: ").strip()
run_folder = project_root / "runs" / folder_name
if not run_folder.exists():
    raise FileNotFoundError(f"Run folder not found: {run_folder}")

ztf_cleansed_path = project_root / "ztf_cleansed.csv"
tns_data_path = project_root / "tns_data.csv"
ztf_df = pd.read_csv(ztf_cleansed_path)
print(f"Loaded {len(ztf_df)} objects from ztf_cleansed.csv")

# Load TNS cache
if tns_data_path.exists():
    tns_cache = pd.read_csv(tns_data_path)
else:
    tns_cache = pd.DataFrame(columns=["ZTFID", "IAUID", "host_redshift"])

# check if we have this data. if not we need to retreive it.
cached_ztfids = set(tns_cache["ZTFID"].astype(str))
to_fetch = ztf_df[~ztf_df["ZTFID"].astype(str).isin(cached_ztfids)]
indices_to_fetch = to_fetch.index.tolist()
num_to_fetch = len(indices_to_fetch)
num_total = len(ztf_df)
print(f"In tns_data cache: {len(cached_ztfids)}. To fetch: {num_to_fetch}")

Enter the run folder name: run3
Loaded 4999 objects from ztf_cleansed.csv
In tns_data cache: 5010. To fetch: 0


In [4]:
# HTTP headers required by TNS
TNS_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
}

def get_tns_id(iauid):
    s = str(iauid).strip()
    if s.upper().startswith("SN"):
        return s[2:]
    return s

def _parse_field_value(html_text, field_class):
    soup = BeautifulSoup(html_text, "html.parser")
    field = soup.find("div", class_=lambda c: c and field_class in (c if isinstance(c, str) else " ".join(c)))
    if field:
        value_div = field.find("div", class_="value")
        if value_div:
            return value_div.get_text(strip=True)
    return None

def parse_host_redshift(html_text):
    z = _parse_field_value(html_text, "host_redshift")
    if z is not None:
        return z
    return _parse_field_value(html_text, "redshift")

session = requests.Session()
session.headers.update(TNS_HEADERS)
fetched_this_run = 0

for i, idx in enumerate(indices_to_fetch, 1):
    row = ztf_df.loc[idx]
    tns_id = get_tns_id(row.IAUID)
    obj_url = f"https://www.wis-tns.org/object/{tns_id}"
    print(f"[{i}/{num_to_fetch}] {row.ZTFID} (TNS {tns_id})...", end=" ")
    try:
        resp = session.get(obj_url, timeout=30)
        resp.raise_for_status()
    except requests.RequestException as e:
        print(f"Failed: {e}")
        if getattr(e, "response", None) is not None and e.response.status_code == 429:
            print(" Rate limited, waiting 60s...", flush=True)
            time.sleep(30)
        else:
            time.sleep(5)
        continue
    z_str = parse_host_redshift(resp.text)
    z_val = pd.to_numeric(z_str, errors="coerce") if z_str else np.nan
    new_row = pd.DataFrame([{"ZTFID": row.ZTFID, "IAUID": row.IAUID, "host_redshift": z_val}])
    tns_cache = pd.concat([tns_cache, new_row], ignore_index=True)
    tns_data_path.parent.mkdir(parents=True, exist_ok=True)
    tns_cache.to_csv(tns_data_path, index=False)
    fetched_this_run += 1
    print(z_str if z_str else "-")
    time.sleep(5)

# Merge tns_data into ztf_df and save ztf_cleansed.csv
ztf_df["tns_redshift"] = ztf_df["ZTFID"].map(
    tns_cache.drop_duplicates("ZTFID", keep="last").set_index("ZTFID")["host_redshift"]
)
ztf_df.to_csv(ztf_cleansed_path, index=False)
n_already = num_total - num_to_fetch
total_filled = ztf_df["tns_redshift"].notna().sum()
print(f"\nAlready in cache: {n_already}, fetched this run: {fetched_this_run}, total with tns_redshift: {total_filled}")
print(f"TNS cache: {tns_data_path}")
print(f"ztf_cleansed: {ztf_cleansed_path}")


Already in cache: 4999, fetched this run: 0, total with tns_redshift: 4989
TNS cache: /Users/david/Code/msc/tns_data.csv
ztf_cleansed: /Users/david/Code/msc/ztf_cleansed.csv
