In [34]:
import pandas as pd
from src.config import RAW_DATA_DIR, TABLES_DIR

In [35]:
df = pd.read_csv(RAW_DATA_DIR / 'tabular_data/kfre_plosmed_v2_0.csv')

In [36]:
df = df[["age", "female", "time", "dm", "htn", "hf", "cvd",
        "neph_known", "epi_egfr", "acr_mgmmol", "death", "esrd"]]

In [37]:
# Rename columns
df = df.rename(columns={
    "epi_egfr": "egfr",
    "acr_mgmmol": "acr",
})

In [38]:
# Filter
df = df[df["neph_known"].notna()]

In [39]:
# Encoding
binary_map = {
    1: True,
    0: False
}

df["female"] = df["female"].apply(lambda x: binary_map[x])
df["htn"] = df["htn"].apply(lambda x: binary_map[x])
df["dm"] = df["dm"].apply(lambda x: binary_map[x])
df["cvd"] = df["cvd"].apply(lambda x: binary_map[x])
df["hf"] = df["hf"].apply(lambda x: binary_map[x])
df["neph_known"] = df["neph_known"].apply(lambda x: binary_map[x])

In [40]:
df.to_csv(TABLES_DIR / 'kfre.csv', index=False)
df

Unnamed: 0,age,female,time,dm,htn,hf,cvd,neph_known,egfr,acr,death,esrd
0,80,True,3226,False,True,False,False,False,57.0,4.2,0,0
2,92,True,2344,False,False,False,True,False,32.0,0.9,0,0
3,67,True,699,True,True,False,False,False,54.0,6.5,0,0
8,64,True,2933,False,False,False,False,True,15.0,116.9,0,0
9,81,False,2771,True,True,False,True,False,55.0,3.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
35528,86,True,2722,False,True,False,False,False,36.0,7.4,0,0
35532,60,True,552,False,False,False,False,False,58.0,3.9,0,0
35533,85,False,1840,False,True,False,False,False,58.0,1.4,0,0
35534,71,False,78,False,False,False,True,False,59.0,2.6,1,0


In [42]:
import math

def kidney_failure_risk(age, sex, egfr, acr, years=5):
    """
    Calculate the Kidney Failure Risk Equation (KFRE).
    
    Parameters:
    age (int or float): Age in years
    sex (str): 'male' or 'female'
    egfr (float): Estimated GFR (mL/min/1.73m²)
    acr (float): Urine albumin-to-creatinine ratio (mg/g)
    years (int): Prediction horizon (2 or 5 years)
    
    Returns:
    float: Predicted risk of kidney failure (0–1)
    """
    
    # Encode sex: male=1, female=0
    sex_val = 1 if sex.lower() == "male" else 0
    
    # Coefficients (simplified example; actual coefficients differ by horizon)
    if years == 2:
        intercept = -0.554
    elif years == 5:
        intercept = -1.957
    else:
        raise ValueError("Only 2-year or 5-year risk supported")
    
    # Linear predictor (example coefficients)
    x = (0.220 * age) + (0.246 * sex_val) + (0.451 * egfr) + (0.556 * math.log1p(acr)) + intercept
    
    # Risk equation
    risk = 1 - math.exp(-math.exp(x))
    return risk

# Example usage:
age = 71
sex = "female"
egfr = 10
acr = 113

risk_5yr = kidney_failure_risk(age, sex, egfr, acr, years=5)
print(f"Predicted 5-year kidney failure risk: {risk_5yr:.2%}")


Predicted 5-year kidney failure risk: 100.00%
