In [18]:
from cambio.methods_prs import PRSConfig, ComplementPRS
import pandas as pd
locus_cols = {
    "CFH_A": "C___8355565_10",
    "CFH_B": "C___2530278_10",
    "C3":    "C__26330755_10",
    "CFI":   "C__34681305_20",
    "ARMS2": "C__29934973_20",
}
cfg = PRSConfig(risk_alleles={"CFH_A":"T","CFH_B":"A","C3":"C","CFI":"C","ARMS2":"T"})

prs = ComplementPRS(
    data="/Users/hannes/Documents/cambio-analysis/2025_07_24_CAMBio_Tabelle_für_Hannes_Urbat.csv",
    locus_cols=locus_cols,
    config=cfg,
    id_col="Alias",
    aggregate=True,         # pro Patient zusammenfassen
    missing="zero",         # fehlende Marker als 0 zählen (pragmatisch)
    normalize=False         # optional True: durch #beobachteter Marker teilen
)
prs_patients = prs.compute_patientlevel()
prs_patients.head()

Unnamed: 0,Alias,C___8355565_10,C___2530278_10,C__26330755_10,C__34681305_20,C__29934973_20,CFH_A_riskcount,CFH_B_riskcount,C3_riskcount,CFI_riskcount,ARMS2_riskcount,n_observed_markers,pPRS_raw,pPRS_z,pPRS_quintile
0,1,CT,C,G,C,GT,1.0,,,,1.0,2,2.0,-0.025613,
1,10,CT,AC,G,T,GT,1.0,1.0,,,1.0,3,3.0,0.794002,
2,100,,,,,,,,,,,0,0.0,-1.664843,
3,101,,,,,,,,,,,0,0.0,-1.664843,
4,102,,,,,,,,,,,0,0.0,-1.664843,


In [19]:
prs_patients["Alias_num"] = pd.to_numeric(prs_patients["Alias"], errors="coerce")
prs_patients = prs_patients.sort_values(["Alias_num","Alias"], na_position="last").drop(columns="Alias_num")
prs_patients.head(10)


Unnamed: 0,Alias,C___8355565_10,C___2530278_10,C__26330755_10,C__34681305_20,C__29934973_20,CFH_A_riskcount,CFH_B_riskcount,C3_riskcount,CFI_riskcount,ARMS2_riskcount,n_observed_markers,pPRS_raw,pPRS_z,pPRS_quintile
0,1,CT,C,G,C,GT,1.0,,,,1.0,2,2.0,-0.025613,
13,2,T,AC,C,TC,G,,1.0,,1.0,,2,2.0,-0.025613,
23,3,CT,AC,G,C,G,1.0,1.0,,,,2,2.0,-0.025613,
44,5,,,,,,,,,,,0,0.0,-1.664843,
54,6,C,C,G,C,G,,,,,,0,0.0,-1.664843,
64,7,T,A,G,C,GT,,,,,1.0,1,1.0,-0.845228,
75,8,T,AC,CG,C,G,,1.0,1.0,,,2,2.0,-0.025613,
85,9,C,C,CG,T,GT,,,1.0,,1.0,2,2.0,-0.025613,
1,10,CT,AC,G,T,GT,1.0,1.0,,,1.0,3,3.0,0.794002,
5,11,CT,AC,CG,C,G,1.0,1.0,1.0,,,3,3.0,0.794002,


In [8]:
cols = [c for c in prs_patients.columns if c.endswith("_riskcount")] + ["n_observed_markers","pPRS_raw","pPRS_z","pPRS_quintile"]
prs_patients[cols].head()

Unnamed: 0,CFH_A_riskcount,CFH_B_riskcount,C3_riskcount,CFI_riskcount,ARMS2_riskcount,n_observed_markers,pPRS_raw,pPRS_z,pPRS_quintile
0,1.0,,,,1.0,2,2.0,-0.025613,
1,1.0,1.0,,,1.0,3,3.0,0.794002,
2,,,,,,0,0.0,-1.664843,
3,,,,,,0,0.0,-1.664843,
4,,,,,,0,0.0,-1.664843,


In [9]:
for c in [x for x in prs_patients.columns if x.endswith("_riskcount")]:
    print("\n", c)
    print(prs_patients[c].value_counts(dropna=False))
print("\nAnteil fehlend pro Marker:")
print(prs_patients[[x for x in prs_patients.columns if x.endswith("_riskcount")]].isna().mean().round(3))


 CFH_A_riskcount
CFH_A_riskcount
NaN    54
1.0    42
Name: count, dtype: int64

 CFH_B_riskcount
CFH_B_riskcount
1.0    48
NaN    47
0.0     1
Name: count, dtype: int64

 C3_riskcount
C3_riskcount
NaN    53
1.0    43
Name: count, dtype: int64

 CFI_riskcount
CFI_riskcount
NaN    59
1.0    35
0.0     2
Name: count, dtype: int64

 ARMS2_riskcount
ARMS2_riskcount
NaN    68
1.0    27
0.0     1
Name: count, dtype: int64

Anteil fehlend pro Marker:
CFH_A_riskcount    0.562
CFH_B_riskcount    0.490
C3_riskcount       0.552
CFI_riskcount      0.615
ARMS2_riskcount    0.708
dtype: float64


In [17]:
# einen Alias zum Prüfen auswählen (z.B. 1 oder 102)
aid = "25"  # anpassen

locus_cols = {
    "CFH_A": "C___8355565_10",
    "CFH_B": "C___2530278_10",
    "C3":    "C__26330755_10",
    "CFI":   "C__34681305_20",
    "ARMS2": "C__29934973_20",
}

risk_cols = [f"{m}_riskcount" for m in locus_cols]
orig_cols = list(locus_cols.values())

row = prs_patients.loc[prs_patients["Alias"] == aid, orig_cols + risk_cols + ["pPRS_raw","n_observed_markers"]]
row.T  # transponiert anzeigen

Unnamed: 0,19
C___8355565_10,T
C___2530278_10,A
C__26330755_10,CG
C__34681305_20,TC
C__29934973_20,
CFH_A_riskcount,
CFH_B_riskcount,
C3_riskcount,1.0
CFI_riskcount,1.0
ARMS2_riskcount,
