# CAMBIO
Statistical analysis of CAMBI study results.
This workbench is intended to give a first overview of CAMBIO results.
We start from the (preliminary) output data file from CAMBIO. We first iterate over all numerical columns and perform a t test (to compare AMD and Control). We discard data from rows without valid numerical results or that are empty. We do not model left/right eye data, thus, this is just for a very first look and is not intended to be a final analyis. We then perform an analogous analysis on the categorical data using a chi-squared test.

### import necessary Python libraries (our library s called cambio)

In [1]:
from pathlib import Path
import pandas as pd
from statsmodels.stats.multitest import multipletests
from cambio import Cambio
import numpy as np

### Input path to data using dialog

In [2]:
# input() ## enter path to CAMBio_Tabelle

### Convert relative path to absolute path and initialize Cambio object

In [4]:
input_file = "/Users/hannes/Documents/cambio-analysis/2025_07_24_CAMBio_Tabelle_für_Hannes_Urbat.csv"
input_file = "../../../Desktop/HPOstuff/2025_07_24_CAMBio_Tabelle.txt"
input_path = Path(input_file).resolve()
cambio = Cambio(cambio_path=input_path)

  df[col] = pd.to_numeric(df[col], errors='ignore')


### Perform t tests on all numerical columns

In [5]:
t_test_d_list = cambio.perform_t_tests()

### Print sorted results

In [6]:
for res in t_test_d_list:
    col = res['column']
    t = res['t_stat']
    p = res['p_val']
    print(f"{col:<20} t={t:>8.4f}  p={p:>10.4g}")
    for group, mean in res['diag']['means'].items():
        std = res['diag']['stds'][group]
        n = res['diag']['n_obs'][group]
        print(f"    {group:<10} mean={mean:.3f}  std={std:.3f}  n={n}")
    print()

Age at Admission     t=  6.4667  p= 4.746e-09
    AMD        mean=79.093  std=5.584  n=43
    Control    mean=70.385  std=7.523  n=52

FI Particle Counts CD9 (fraction of total) t=  2.9345  p=  0.005288
    AMD        mean=0.095  std=0.051  n=27
    Control    mean=0.061  std=0.031  n=23

FI Particle Counts CD81 (fraction of total) t=  2.4085  p=   0.02163
    AMD        mean=0.202  std=0.177  n=28
    Control    mean=0.117  std=0.061  n=26

Fluorescence Intensity C3 CD9 (ExoView) t=  2.2031  p=   0.03164
    AMD        mean=1.059  std=0.042  n=33
    Control    mean=1.039  std=0.029  n=29

Fluorescence per particle FH CD9 (ExoView) t=  2.1309  p=   0.03811
    AMD        mean=187.847  std=145.285  n=34
    Control    mean=127.879  std=70.454  n=29

Height (cm)          t= -2.0692  p=   0.04409
    AMD        mean=162.805  std=26.759  n=43
    Control    mean=171.481  std=6.944  n=52

FI Particle Counts CD41a (fraction of total) t=  2.0160  p=   0.05074
    AMD        mean=0.295  std=0

In [None]:
results = cambio.perform_t_tests()

# Ausgabe der ersten paar Ergebnisse inkl. Beobachtungszahlen
for r in results:
    print(f"{r['column']}: {r['diag']['n_obs']}")

Age at Admission: {'AMD': 43, 'Control': 52}
FI Particle Counts CD9 (fraction of total): {'AMD': 27, 'Control': 23}
FI Particle Counts CD81 (fraction of total): {'AMD': 28, 'Control': 26}
Fluorescence Intensity C3 CD9 (ExoView): {'AMD': 33, 'Control': 29}
Fluorescence per particle FH CD9 (ExoView): {'AMD': 34, 'Control': 29}
Height (cm): {'AMD': 43, 'Control': 52}
FI Particle Counts CD41a (fraction of total): {'AMD': 23, 'Control': 18}
Fluorescence per particle FH CD41a (ExoView): {'AMD': 34, 'Control': 29}
C3 Particle Counts CD9 (fraction of total): {'AMD': 27, 'Control': 23}
FI Particle Counts CD63 (fraction of total): {'AMD': 24, 'Control': 18}
Fluorescence per particle C3 CD41a (ExoView): {'AMD': 34, 'Control': 29}
RPE lift (Drusen) area 5 mm circle (mm2): {'AMD': 15, 'Control': 17}
FH Particle Counts CD41a (fraction of total): {'AMD': 24, 'Control': 18}
RPE lift (Drusen) area 3 mm circle (mm2): {'AMD': 15, 'Control': 17}
FH Particle Counts CD81 (fraction of total): {'AMD': 28, 'Co

In [None]:
# Chi²-Tests für alle kategorialen Variablen
chi2_results = cambio.perform_chi2_tests()

In [None]:
# Ausgabe im gleichen Stil wie beim t‑Test
for res in chi2_results:
    print(f"{res['column']:<20} test={res['test']:<12} p={res['p_val']:.4g}")
    print(res['diag']['counts'])  # Kontingenztabelle
    print()

No AMD               test=chi2         p=1.767e-35
No AMD            No  Yes
Group                    
AMD               78    0
Control            1   83
Control (Gliose)   0    2

Healthy Retina       test=chi2         p=2.151e-33
Healthy Retina    No  Yes
Group                    
AMD               78    0
Control            3   79
Control (Gliose)   0    2

Early AMD            test=chi2         p=6.682e-12
Early AMD         No  Yes
Group                    
AMD               38   40
Control           82    2
Control (Gliose)   2    0

Dry AMD              test=chi2         p=8.243e-09
Dry AMD           No  Yes
Group                    
AMD               50   28
Control           84    0
Control (Gliose)   2    0

Geographic Atrophy   test=chi2         p=0.0002164
Geographic Atrophy  No  Yes
Group                      
AMD                 64   14
Control             84    0
Control (Gliose)     2    0

Job (most recent)    test=chi2         p=0.000478
Job (most recent)  Altenpflege

In [None]:
# Prüfen, ob person_df und eye_df korrekt aufgebaut wurden
print("=== PERSON DATAFRAME ===")
print("Anzahl Zeilen:", cambio.person_df.shape[0])
print("Anzahl eindeutiger Aliase:", cambio.person_df["Alias"].nunique())
print(cambio.person_df.head())

print("\n=== EYE DATAFRAME ===")
print("Anzahl Zeilen:", cambio.eye_df.shape[0])
print("Anzahl eindeutiger Aliase:", cambio.eye_df["Alias"].nunique())
print(cambio.eye_df.head())

# Testen, ob in person_df wirklich keine doppelten Aliase vorkommen
duplicates = cambio.person_df["Alias"][cambio.person_df["Alias"].duplicated()]
print("\nDoppelte Aliase in person_df:", duplicates.unique())

# Testen, ob alle eye_vars-Spalten auch wirklich im eye_df sind
missing_eye_vars = [var for var in cambio.eye_vars if var not in cambio.eye_df.columns]
print("\nFehlende Eye_Vars-Spalten:", missing_eye_vars)

=== PERSON DATAFRAME ===
Anzahl Zeilen: 96
Anzahl eindeutiger Aliase: 96
   Alias    Group     Status Healthy Retina No AMD    Group Early AMD Dry AMD  \
0      1  Control  Completed            Yes    Yes  Control        No      No   
2      2  Control  Completed            Yes    Yes  Control        No      No   
4      3  Control  Completed            Yes    Yes  Control        No      No   
6      5      AMD  Completed             No     No      AMD       Yes      No   
8      6  Control  Completed            Yes    Yes  Control        No      No   

  Geographic Atrophy  Age at Admission  ... Genotyped C___8355565_10  \
0                 No                79  ...       Yes             CT   
2                 No                70  ...       Yes              T   
4                 No                70  ...       Yes             CT   
6                 No                84  ...        No            NaN   
8                 No                77  ...       Yes              C   

  C___2

In [None]:
# Überprüfen: hat person_df nur 1 Eintrag pro Alias?
print("person_df Aliase:", cambio.person_df["Alias"].nunique(), "Zeilen:", cambio.person_df.shape[0])

# Überprüfen: hat eye_df max. 2 Einträge pro Alias (L/R)?
eye_counts = cambio.eye_df.groupby("Alias")["Eye"].nunique()
print("Maximale Anzahl Augen pro Alias:", eye_counts.max())

# Falls du sehen willst, welche Aliase nur 1 Auge haben:
one_eye = eye_counts[eye_counts == 1].index.tolist()
print("Aliase mit nur einem Auge:", one_eye)

person_df Aliase: 96 Zeilen: 96
Maximale Anzahl Augen pro Alias: 2
Aliase mit nur einem Auge: [6, 7, 9, 10, 12, 14, 18, 21, 22, 24, 25, 32, 34, 40, 42, 43, 46, 48, 50, 55, 60, 69, 83, 87, 91, 93, 94, 95]


In [None]:
# Alle Spaltennamen in person_vars und ihr erkannter Typ
for col in cambio.person_vars:
    print(col, cambio._col_type.get(col))

Status categorical
Healthy Retina categorical
No AMD categorical
Group categorical
Early AMD categorical
Dry AMD categorical
Geographic Atrophy categorical
Age at Admission numeric
Birthday categorical
Gender categorical
Weight (kg) categorical
Height (cm) numeric
Alcohol Consumption (0-7 days/week) categorical
Smoking Status categorical
Job (most recent) categorical
Sun Exposure   (1-5) categorical
Food Vegetables categorical
Food Fruit categorical
Food Carbs categorical
Food OliveOil categorical
Food Nuts categorical
Food Dairy categorical
Food Sweets categorical
Food Fish categorical
Disease High Blood Pressure categorical
Disease Diabetes categorical
Disease RA categorical
Disease PNH categorical
Disease Crohns categorical
Disease Bullous Pemphigoid categorical
Disease Dermatomyositis categorical
Disease Psoriasis categorical
Disease Quinckes Edema categorical
Disease NMOSD categorical
Disease MS categorical
Disease Alzheimers categorical
Disease Guillain Barre Syndrome categorical