In [1]:
from pathlib import Path
import pandas as pd
import sys

REPO_ROOT = Path.cwd().parent.resolve()
if str(REPO_ROOT) not in sys.path:
    sys.path.append(str(REPO_ROOT))
from src.qc_data import load_csv_safely, build_object_level_dataset, phaseB_report

IN_PATH = REPO_ROOT / "data/qc_finalized_data_patched.csv"
OUT_PATH = REPO_ROOT / "data/model_dataset_object_level.csv"
REPORT_PATH = REPO_ROOT / "results/data_card_phaseB.txt"

df = load_csv_safely(IN_PATH)
print("Loaded:", IN_PATH, "shape:", df.shape)
print("Unique objects (varname):", df["varname"].nunique())

# Build object-level dataset (Option A: median aggregation, HÎ² rows for features)
df_obj = build_object_level_dataset(
    df,
    agg="median",
    use_hbeta_rows_for_features=True,
    add_optional_features=True,   # will include fwhm/logL if available
)

print("Object-level dataset shape:", df_obj.shape)
display(df_obj.head(10))

OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
df_obj.to_csv(OUT_PATH, index=False)
print("Saved:", OUT_PATH)

REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
REPORT_PATH.write_text(phaseB_report(df, df_obj), encoding="utf-8")
print("Saved report:", REPORT_PATH)


Loaded: /home/arsalan_gh/qc-project/data/qc_finalized_data_patched.csv shape: (261, 80)
Unique objects (varname): 67
Object-level dataset shape: (67, 9)


Unnamed: 0,varname,tau_cent_median,sigma_line_median,fwhm_median,logL_median,n_measurements_total,n_hbeta_rows,target_hbeta_log10,target_hbeta_source
0,1,13.4,948.0,1396.5,43.7,6,4,7.222456,website
1,2,89.8,1783.0,4165.0,44.75,3,1,8.379306,website
2,3,17.4,3787.0,6901.0,43.92,4,1,8.320873,website
3,4,24.75,1251.0,2115.0,43.425,4,4,7.569725,website
4,5,27.9,1514.0,2539.0,43.94,7,3,7.710117,website
5,6,47.1,1921.5,5410.0,43.745,3,3,8.068037,website
6,7,16.0,1868.5,4652.5,43.6,4,4,7.611829,website
7,8,146.9,1971.0,2012.0,44.85,3,1,8.680118,website
8,10,23.85,1115.0,1504.5,43.62,10,4,7.292478,website
9,11,150.1,1306.0,3002.0,45.13,2,1,8.331994,website


Saved: /home/arsalan_gh/qc-project/data/model_dataset_object_level.csv
Saved report: /home/arsalan_gh/qc-project/results/data_card_phaseB.txt
