In [1]:
import pandas as pd
import os
from psycopg2 import connect
from munch import munchify
from yaml import safe_load

def connect_db(table_name: str = "maps_apr25_original") -> pd.DataFrame:
    with open(os.getenv("CONFIG_PATH")) as f:
        cfg = munchify(safe_load(f))
    config_db = cfg.db
    cnx = connect(
        host=config_db.host,
        dbname=config_db.dbname,
        user=config_db.user,
        password=config_db.psswd,
    )
    query = f"select file_name, map_quality from {table_name} where map_quality = 1;"
    maps = pd.read_sql(query, con=cnx)
    return maps

In [2]:
quality_df = connect_db()
quality_df = quality_df.sort_values(by="file_name").reset_index(drop=True)
quality_df.head()

  maps = pd.read_sql(query, con=cnx)


Unnamed: 0,file_name,map_quality
0,J0000+0248_C_2016_01_03_pet_map.fits,1
1,J0000+0248_S_2022_01_07_pet_map.fits,1
2,J0000+0248_X_2016_01_03_pet_map.fits,1
3,J0000+0248_X_2022_01_07_pet_map.fits,1
4,J0000+0307_C_2015_12_23_pet_map.fits,1


In [3]:
classification = pd.read_csv("/home/zagorulia/ml/classification.csv", index_col=0)
classification['source'] = classification.file.apply(lambda x: x.split('_')[0])
classification.head()

Unnamed: 0,file,prob,pred,source
0,J0000+0248_C_2016_01_03_pet_map.fits,8.9e-05,0,J0000+0248
1,J0000+0248_S_2022_01_07_pet_map.fits,2.4e-05,0,J0000+0248
2,J0000+0248_X_2016_01_03_pet_map.fits,0.08169,0,J0000+0248
3,J0000+0248_X_2022_01_07_pet_map.fits,0.001513,0,J0000+0248
4,J0000+0307_C_2015_12_23_pet_map.fits,0.027874,0,J0000+0307


In [4]:
classification_filtered = quality_df.merge(
    classification,
    left_on="file_name",
    right_on="file",
    how="left"
)[["file_name", "map_quality", "prob", "pred", "source"]]
classification_filtered.head()

Unnamed: 0,file_name,map_quality,prob,pred,source
0,J0000+0248_C_2016_01_03_pet_map.fits,1,8.9e-05,0,J0000+0248
1,J0000+0248_S_2022_01_07_pet_map.fits,1,2.4e-05,0,J0000+0248
2,J0000+0248_X_2016_01_03_pet_map.fits,1,0.08169,0,J0000+0248
3,J0000+0248_X_2022_01_07_pet_map.fits,1,0.001513,0,J0000+0248
4,J0000+0307_C_2015_12_23_pet_map.fits,1,0.027874,0,J0000+0307


In [5]:
agg = (
    classification_filtered
    .groupby("source", as_index=False)
    .agg(
        mean_prob=("prob", "mean"),
        mean_pred=("pred", "mean"),
        n=("prob", "size"),
    )
)
agg.head()

Unnamed: 0,source,mean_prob,mean_pred,n
0,J0000+0248,0.020829,0.0,4
1,J0000+0307,0.31772,0.285714,7
2,J0000+030B,0.365602,0.5,2
3,J0000+0816,0.999955,1.0,1
4,J0000+1139,0.499542,0.5,2


In [6]:
path = "../data/rfc_vs_4fgl_dr4.txt"

def cs(a, b):
    return (a - 1, b)

colspecs = [
    cs(1, 8),      # Bname
    cs(10, 19),    # Jname

    cs(22, 23),    # VLBI RA hour
    cs(25, 26),    # VLBI RA min
    cs(28, 36),    # VLBI RA sec

    cs(38, 38),    # VLBI Dec sign
    cs(39, 40),    # VLBI Dec deg
    cs(42, 43),    # VLBI Dec arcmin
    cs(45, 52),    # VLBI Dec arcsec

    cs(57, 73),    # Fermi name
    cs(76, 80),    # Nsig
    cs(84, 88),    # Dist (arcmin)
]


names = [
    "Bname", "Jname",
    "vlbi_ra_h", "vlbi_ra_m", "vlbi_ra_s",
    "vlbi_dec_sign", "vlbi_dec_deg", "vlbi_dec_arcmin", "vlbi_dec_arcsec",
    "fermi_name", "nsig", "dist_arcmin",
]

fermi = pd.read_fwf(
    path,
    colspecs=colspecs,
    names=names,
    comment="#",
    header=None
)
fermi.head()

Unnamed: 0,Bname,Jname,vlbi_ra_h,vlbi_ra_m,vlbi_ra_s,vlbi_dec_sign,vlbi_dec_deg,vlbi_dec_arcmin,vlbi_dec_arcsec,fermi_name,nsig,dist_arcmin
0,2358+474,J0001+4742,0,1,19.041802,+,47,42,0.72074,4FGL J0001.2+4741,0.829,1.095
1,2358-080,J0001-0746,0,1,18.024917,-,7,46,26.92209,4FGL J0001.2-0747,2.099,1.503
2,2358-004,J0001-0011,0,1,21.466915,-,0,11,40.31503,4FGL J0001.4-0010,1.683,2.434
3,2358+209,J0001+2113,0,1,32.370489,+,21,13,36.27481,4FGL J0001.5+2113,0.562,0.537
4,2358-422,J0001-4155,0,1,32.757127,-,41,55,25.33111,4FGL J0001.6-4156,1.355,1.757


In [7]:
final = (
    agg
    .merge(fermi[['Jname', 'fermi_name']],
           left_on='source',
           right_on='Jname',
           how='left')
    .drop(columns='Jname')
)

In [8]:
final = final.rename(columns={"mean_prob": "probability", "source": "Jname"})
final.head()

Unnamed: 0,Jname,probability,mean_pred,n,fermi_name
0,J0000+0248,0.020829,0.0,4,
1,J0000+0307,0.31772,0.285714,7,
2,J0000+030B,0.365602,0.5,2,
3,J0000+0816,0.999955,1.0,1,
4,J0000+1139,0.499542,0.5,2,


In [9]:
final[["Jname", "probability", "fermi_name"]].to_csv("morph_rfc_fermi_ass_full_filtered.csv", index=False)