# 📅 **Sarcopenia Workshop - Day 2: Model Optimization and Clinical Integration**

## **🔖 Step 1: Recap and Objective Setting**


---



In [38]:
## **🔖 Step 1: Load Cleaned Data and Trained Models**
import pandas as pd
import joblib

# Load cleaned dataset
df = pd.read_csv("Cleaned_Sarcopenia_Data.csv")
print(f"✅ Cleaned dataset loaded. Shape: {df.shape}")

# Load trained models
pipeline_female = joblib.load("female_model.pkl")
pipeline_male = joblib.load("male_model.pkl")
print("✅ Trained models for male and female loaded.")

#---

## **🔖 Step 2: Quick Data Overview**
print(df.head())
print("📊 Data and models are ready for workflow analysis and improvements.")


✅ Cleaned dataset loaded. Shape: (1132, 15)
✅ Trained models for male and female loaded.
   ID   Age  Gender  Weight  Height   DM    CST   HT    BMI  Exercise  \
0   0  56.0       0    83.0   159.0  0.0   9.10  1.0  32.80         0   
1   1  52.0       0    75.0   162.0  1.0   9.30  0.0  28.58         0   
2   2  67.0       0    79.0   150.0  1.0  11.32  1.0  35.11         0   
3   3  60.0       0    60.0   155.0  0.0  11.20  0.0  24.97         2   
4   4  66.0       0    84.0   157.0  1.0  13.40  1.0  34.00         0   

   Education  Smoking  STAR   HGS  Sarc  
0          1      0.0  1.26  34.0   0.0  
1          4      0.0  1.63  28.0   0.0  
2          1      0.0  1.03  22.0   0.0  
3          1      0.0  1.78  28.0   0.0  
4          1      0.0  0.74  16.0   1.0  
📊 Data and models are ready for workflow analysis and improvements.


In [39]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
# 📊 Define features
numeric_features = ['Age', 'Weight', 'Height', 'CST', 'HGS', 'BMI']
categorical_features = ['DM', 'HT', 'Exercise', 'Education', 'Smoking']
target = 'Sarc'
# 📊 Split data by Gender
df_female = df[df['Gender'] == 0]
df_male = df[df['Gender'] == 1]

# 📊 Train-Test Split for Females
X_female = df_female[numeric_features + categorical_features]
y_female = df_female[target]
X_train_female, X_test_female, y_train_female, y_test_female = train_test_split(X_female, y_female, test_size=0.2, random_state=42)

# 📊 Train-Test Split for Males
X_male = df_male[numeric_features + categorical_features]
y_male = df_male[target]
X_train_male, X_test_male, y_train_male, y_test_male = train_test_split(X_male, y_male, test_size=0.2, random_state=42)

## **🔖 Step 2: Improving the Model**

In [40]:

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier

param_grid = {
    'classifier__n_estimators': [100, 200, 300],
    'classifier__learning_rate': [0.01, 0.1, 0.2],
    'classifier__max_depth': [3, 5, 7]
}

grid_search = GridSearchCV(estimator=pipeline_female, param_grid=param_grid, cv=5, scoring='roc_auc')
grid_search.fit(X_train_female, y_train_female)

print(f"✅ Best Parameters (Female Model): {grid_search.best_params_}")



✅ Best Parameters (Female Model): {'classifier__learning_rate': 0.1, 'classifier__max_depth': 7, 'classifier__n_estimators': 200}


---

## **🔖 Step 3: Predicting for Sample Patients**


In [41]:
sample_patients = pd.DataFrame({
    'Age': [70, 62, 55],
    'Weight': [68, 85, 72],
    'Height': [160, 175, 168],
    'CST': [13, 11, 10],
    'HGS': [18, 28, 34],
    'BMI': [26.5, 29.2, 25.4],
    'DM': [1, 0, 1],
    'HT': [0, 1, 0],
    'Exercise': [1, 0, 2],
    'Education': [3, 2, 4],
    'Smoking': [0, 1, 0],
    'Gender':[1,0,1]
})

predicted_risk = grid_search.best_estimator_.predict_proba(sample_patients.loc[:'Gender'])[:, 1]
print(f"🔍 Predicted Sarcopenia Risk for Sample Patients:\n{predicted_risk}")



🔍 Predicted Sarcopenia Risk for Sample Patients:
[5.58556046e-03 2.67397672e-06 1.42543863e-06]


---

## **🔖 Step 4: Implementing the Current Clinical Workflow**


In [47]:
def clinical_workflow(patient):
    if patient['Age'] >= 65:
        if patient['CST'] >= 12:
            return "Monitor with Ultrasound"
        elif (patient['Gender'] == 1 and patient['HGS'] < 32) or (patient['Gender'] == 0 and patient['HGS'] < 19):
            return "Monitor with Ultrasound"
        else:
            return "No Sarcopenia"
    else:
        if patient['HT'] == 1 or patient['DM'] == 1:
            if patient['CST'] >= 12:
                return "Monitor with Ultrasound"
            elif (patient['Gender'] == 1 and patient['HGS'] < 32) or (patient['Gender'] == 0 and patient['HGS'] < 19):
                return "Monitor with Ultrasound"
            else:
                return "No Sarcopenia"
        else:
            return "No Sarcopenia"

sample_patients['Clinical Decision'] = sample_patients.apply(clinical_workflow, axis=1)
sample_patients


Unnamed: 0,Age,Weight,Height,CST,HGS,BMI,DM,HT,Exercise,Education,Smoking,Gender,Clinical Decision,AI Decision
0,70,68,160,13,18,26.5,1,0,1,3,0,1,Monitor with Ultrasound,No Sarcopenia
1,62,85,175,11,28,29.2,0,1,0,2,1,0,No Sarcopenia,No Sarcopenia
2,55,72,168,10,34,25.4,1,0,2,4,0,1,No Sarcopenia,No Sarcopenia


---

## **🔖 Step 5: AI-Augmented Workflow with Dual Thresholds**


In [48]:
# Define high and low-risk thresholds
high_risk_threshold = 0.7  # High-risk patients go directly to ultrasound
low_risk_threshold = 0.3   # Low-risk patients are monitored without ultrasound

# 📌 Updated AI-Augmented Workflow (Gender-Specific Models)
def ai_augmented_workflow_dual_threshold_gender(patient_row):
    features = ['Age', 'Weight', 'Height', 'CST', 'HGS', 'BMI', 'DM', 'HT', 'Exercise', 'Education', 'Smoking']
    patient_features = pd.DataFrame([patient_row[features]])
    
    # Use the appropriate model based on gender
    if patient_row['Gender'] == 0:  # Female
        risk = pipeline_female.predict_proba(patient_features)[:, 1][0]
    else:  # Male
        risk = pipeline_male.predict_proba(patient_features)[:, 1][0]

    # Apply dual thresholds
    if risk >= high_risk_threshold:
        return "Sarcopenia"
    elif risk <= low_risk_threshold:
        return "No Sarcopenia"
    else:
        return "Monitor with Ultrasound"

# Apply the workflow to each patient in the sample
sample_patients['AI Decision'] = sample_patients.apply(ai_augmented_workflow_dual_threshold, axis=1)

# Display the updated DataFrame
print(sample_patients[['Age', 'CST', 'HGS', 'Gender', 'Clinical Decision', 'AI Decision']])


   Age  CST  HGS  Gender        Clinical Decision    AI Decision
0   70   13   18       1  Monitor with Ultrasound  No Sarcopenia
1   62   11   28       0            No Sarcopenia  No Sarcopenia
2   55   10   34       1            No Sarcopenia  No Sarcopenia


---

## **🔖 Step 6: Comparing Current vs. AI-Augmented Workflow**


In [50]:
# 📊 Apply Both Workflows to the Entire Training Data
train_data = df.copy()

train_data['Clinical Decision'] = train_data.apply(clinical_workflow, axis=1)
train_data['AI Decision'] = train_data.apply(ai_augmented_workflow_dual_threshold_gender, axis=1)

# 📈 Compare Results
clinical_ultrasound_count = train_data[train_data['Clinical Decision'] == "Monitor with Ultrasound"].shape[0]
ai_monitor_count = train_data[train_data['AI Decision'] == "Monitor with Ultrasound"].shape[0]

print(f"🔍 Clinical Workflow - Patients sent to Ultrasound: {clinical_ultrasound_count}")
print(f"📊 AI-Augmented Workflow - Patients monitored with Ultrasound: {ai_monitor_count}")

🔍 Clinical Workflow - Patients sent to Ultrasound: 434
📊 AI-Augmented Workflow - Patients monitored with Ultrasound: 84


---

## **🔖 Step 7: Benefits of AI in Clinical Decision-Making**


In [52]:
print("📌 AI Benefits:")
print("- Reduced unnecessary ultrasound scans.")
print("- Earlier detection of high-risk patients.")
print("- Optimized resource allocation.")



📌 AI Benefits:
- Reduced unnecessary ultrasound scans.
- Earlier detection of high-risk patients.
- Optimized resource allocation.


---

## **🔖 Step 8: Final Thoughts and Q&A**


In [53]:
print("✅ Workshop Completed! Thank you for participating.")


✅ Workshop Completed! Thank you for participating.
