In [1]:
import pandas as pd
import statsmodels.formula.api as smf
from sklearn.utils import resample

# ─── 1) Load & prep ──────────────────────────────────────────────
df = pd.read_excel("C:/Users/Admin/Desktop/Proiect grasime abdominala/RAW_DATA_ANONIMIZED/ANONIMIZED_DATABASE.xlsx")
df['STRATUM']      = df['SEX'].astype(str) + "_" + df['AGE_CATEGORY'].astype(str)
counts             = df['STRATUM'].value_counts()
df['WEIGHT']       = 1 / df['STRATUM'].map(counts)
df['SEX_AGE_INT']  = df['SEX'] * df['AGE_CATEGORY']
df['OBESE2']       = df['WEIGHT_STATUS_DICHOTOMIAL'].astype(int)

# ─── (optional) 2) Bootstrap-balance ─────────────────────────────
def balance_bootstrap(df, group_col, seed=42):
    n = df[group_col].value_counts().max()
    parts = []
    for g, sub in df.groupby(group_col):
        parts.append(resample(sub, replace=True, n_samples=n, random_state=seed))
    return pd.concat(parts, ignore_index=True)

# if you want to bootstrap instead of weighting, uncomment:
# df = balance_bootstrap(df, 'STRATUM')

# ─── 3) Extended WLS formulas ────────────────────────────────────
formula_vat = (
    "VAT_SURFACE_MAN ~ VAT_SURFACE_THRESH"
    "+ OBESE2 + SEX + AGE_CATEGORY + SEX_AGE_INT"
    "+ OBESE2:SEX + OBESE2:AGE_CATEGORY"
)
formula_sat = (
    "SAT_SURFACE_MAN ~ SAT_SURFACE_THRESH"
    "+ OBESE2 + SEX + AGE_CATEGORY + SEX_AGE_INT"
    "+ OBESE2:SEX + OBESE2:AGE_CATEGORY"
)

# ─── 4) Fit on full data using inverse-frequency weights ─────────
wls_vat = smf.wls(formula_vat, data=df, weights=df['WEIGHT']).fit()
wls_sat = smf.wls(formula_sat, data=df, weights=df['WEIGHT']).fit()

# ─── 5) Print full coefficient tables ────────────────────────────
print("\n— VAT WLS (with OBESE×SEX and OBESE×AGE interactions) —")
print(wls_vat.summary2().tables[1][['Coef.','Std.Err.','P>|t|']])

print("\n— SAT WLS (with OBESE×SEX and OBESE×AGE interactions) —")
print(wls_sat.summary2().tables[1][['Coef.','Std.Err.','P>|t|']])
# ─── 6) Print Bland-Altman_VAT and SAT ────────────────────────────
# VAT Bland–Altman
y_vat_true = df['VAT_SURFACE_MAN'].values
y_vat_pred = wls_vat.predict(df).values
diff_vat   = y_vat_true - y_vat_pred
bias_vat   = diff_vat.mean()
sd_vat     = diff_vat.std(ddof=1)
loa_vat    = 1.96 * sd_vat
print(f"\nVAT Bland–Altman:")
print(f"  Bias       = {bias_vat:.2f} cm²")
print(f"  LoA lower  = {bias_vat - loa_vat:.2f} cm²")
print(f"  LoA upper  = {bias_vat + loa_vat:.2f} cm²")

# SAT Bland–Altman
y_sat_true = df['SAT_SURFACE_MAN'].values
y_sat_pred = wls_sat.predict(df).values
diff_sat   = y_sat_true - y_sat_pred
bias_sat   = diff_sat.mean()
sd_sat     = diff_sat.std(ddof=1)
loa_sat    = 1.96 * sd_sat
print(f"\nSAT Bland–Altman:")
print(f"  Bias       = {bias_sat:.2f} cm²")
print(f"  LoA lower  = {bias_sat - loa_sat:.2f} cm²")
print(f"  LoA upper  = {bias_sat + loa_sat:.2f} cm²")


— VAT WLS (with OBESE×SEX and OBESE×AGE interactions) —
                        Coef.  Std.Err.         P>|t|
Intercept            4.257409  2.932818  1.518095e-01
VAT_SURFACE_THRESH   1.111252  0.033876  5.065768e-40
OBESE2               6.930376  6.497752  2.904343e-01
SEX                  4.285945  7.630637  5.764290e-01
AGE_CATEGORY         1.036674  4.836606  8.310094e-01
SEX_AGE_INT         -3.831637  7.301263  6.016622e-01
OBESE2:SEX           1.711959  6.962413  8.066098e-01
OBESE2:AGE_CATEGORY -2.166261  7.257525  7.663644e-01

— SAT WLS (with OBESE×SEX and OBESE×AGE interactions) —
                        Coef.  Std.Err.         P>|t|
Intercept            1.799515  1.460389  2.226752e-01
SAT_SURFACE_THRESH   1.034140  0.007219  8.924563e-78
OBESE2              -1.897819  3.166230  5.511670e-01
SEX                 -0.549954  3.155034  8.622086e-01
AGE_CATEGORY         0.497594  2.001859  8.045453e-01
SEX_AGE_INT          0.457811  2.942061  8.768637e-01
OBESE2:SEX           1