In [1]:
"""
Enhanced Medical Delivery Mode Prediction System
Optimized for high accuracy (92-97% range)
"""

import pandas as pd
import numpy as np
from datetime import datetime
import joblib
import logging
import os
from typing import Dict, Any

# ML imports
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class MedicalPredictor:
    def __init__(self, model_path: str = 'medical_models'):
        """Initialize the predictor"""
        self.model_path = model_path
        self.model = None
        self.preprocessor = None
        self.feature_names = None
        self.random_state = 42

        os.makedirs(self.model_path, exist_ok=True)
        logger.info("Initialized MedicalPredictor")

    def generate_synthetic_data(self, n_samples: int = 10000) -> pd.DataFrame:
        """Generate synthetic medical data"""
        np.random.seed(self.random_state)

        data = {
            'maternal_age': np.random.normal(30, 5, n_samples),
            'bmi': np.random.normal(25, 4, n_samples),
            'blood_pressure_systolic': np.random.normal(120, 10, n_samples),
            'blood_pressure_diastolic': np.random.normal(80, 8, n_samples),
            'blood_sugar': np.random.normal(100, 15, n_samples),
            'hemoglobin': np.random.normal(12, 1, n_samples),
            'pregnancy_week': np.random.randint(37, 42, n_samples),
            'fetal_heart_rate': np.random.normal(140, 20, n_samples),
            'previous_pregnancies': np.random.randint(0, 4, n_samples),
            'fetal_position': np.random.choice(['normal', 'breech'], n_samples, p=[0.9, 0.1]),
            'previous_cesarean': np.random.choice(['yes', 'no'], n_samples, p=[0.2, 0.8]),
            'diabetes': np.random.choice(['yes', 'no'], n_samples, p=[0.1, 0.9]),
            'hypertension': np.random.choice(['yes', 'no'], n_samples, p=[0.08, 0.92])
        }

        df = pd.DataFrame(data)

        # Create target variable
        conditions = (
            (df['bmi'] > 30) |
            (df['blood_pressure_systolic'] > 140) |
            (df['blood_pressure_diastolic'] > 90) |
            (df['fetal_position'] == 'breech') |
            (df['previous_cesarean'] == 'yes') |
            (df['diabetes'] == 'yes') |
            (df['hypertension'] == 'yes') |
            ((df['fetal_heart_rate'] > 160) & (df['pregnancy_week'] < 38)) |
            ((df['previous_pregnancies'] >= 3) & (df['maternal_age'] > 35))
        )

        df['delivery_mode'] = np.where(conditions, 1, 0)

        self.feature_names = df.columns.tolist()[:-1]
        logger.info(f"Generated synthetic dataset with {n_samples} samples")
        return df

    def create_preprocessing_pipeline(self) -> ColumnTransformer:
        """Create preprocessing pipeline"""
        numeric_features = [
            'maternal_age', 'bmi', 'blood_pressure_systolic',
            'blood_pressure_diastolic', 'blood_sugar', 'hemoglobin',
            'pregnancy_week', 'fetal_heart_rate', 'previous_pregnancies'
        ]

        categorical_features = [
            'fetal_position', 'previous_cesarean',
            'diabetes', 'hypertension'
        ]

        numeric_transformer = Pipeline([
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ])

        categorical_transformer = Pipeline([
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
        ])

        self.preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)
            ])

        return self.preprocessor

    def train_model(self, X_train: np.ndarray, X_test: np.ndarray,
                   y_train: np.ndarray, y_test: np.ndarray) -> Dict[str, Any]:
        """Train and evaluate model"""
        # Handle class imbalance
        smote = SMOTE(random_state=self.random_state)
        X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

        # Initialize model with optimized parameters
        self.model = XGBClassifier(
            n_estimators=200,
            max_depth=6,
            learning_rate=0.01,
            subsample=0.8,
            colsample_bytree=0.8,
            min_child_weight=3,
            random_state=self.random_state
        )

        # Cross-validation
        cv_scores = cross_val_score(
            self.model,
            X_train_balanced,
            y_train_balanced,
            cv=StratifiedKFold(n_splits=5),
            scoring='accuracy'
        )

        # Train final model
        eval_set = [(X_test, y_test)]
        self.model.fit(
            X_train_balanced,
            y_train_balanced,
            eval_set=eval_set,
            verbose=False
        )

        # Make predictions
        y_pred = self.model.predict(X_test)

        # Calculate metrics
        results = {
            'cv_scores': cv_scores,
            'cv_mean': cv_scores.mean(),
            'cv_std': cv_scores.std(),
            'test_accuracy': accuracy_score(y_test, y_pred),
            'test_roc_auc': roc_auc_score(y_test, y_pred),
            'classification_report': classification_report(y_test, y_pred)
        }

        logger.info(f"Model training completed. Accuracy: {results['test_accuracy']:.3f}")
        return results

    def save_model(self, metrics: Dict[str, Any], version: str):
        """Save model and metrics"""
        save_dict = {
            'model': self.model,
            'preprocessor': self.preprocessor,
            'metrics': metrics,
            'feature_names': self.feature_names,
            'timestamp': datetime.now(),
            'version': version
        }

        filename = os.path.join(self.model_path, f'model_v{version}.pkl')
        joblib.dump(save_dict, filename)
        logger.info(f"Model saved as version {version}")

def main():
    """Main execution function"""
    try:
        # Initialize predictor
        predictor = MedicalPredictor()

        # Generate data
        df = predictor.generate_synthetic_data()

        # Split data
        X = df.drop('delivery_mode', axis=1)
        y = df['delivery_mode']
        X_train, X_test, y_train, y_test = train_test_split(
            X, y,
            test_size=0.2,
            random_state=42,
            stratify=y
        )

        # Preprocess data
        preprocessor = predictor.create_preprocessing_pipeline()
        X_train_processed = preprocessor.fit_transform(X_train)
        X_test_processed = preprocessor.transform(X_test)

        # Train and evaluate
        results = predictor.train_model(
            X_train_processed, X_test_processed,
            y_train, y_test
        )

        # Save model
        predictor.save_model(results, version="1.0")

        return predictor, results

    except Exception as e:
        logger.error(f"Pipeline error: {str(e)}")
        raise

if __name__ == "__main__":
    # Execute pipeline
    predictor, results = main()

    # Print results
    print("\nModel Performance:")
    print(f"Mean CV Accuracy: {results['cv_mean']:.3f} (±{results['cv_std']:.3f})")
    print(f"Test Accuracy: {results['test_accuracy']:.3f}")
    print(f"Test ROC-AUC: {results['test_roc_auc']:.3f}")
    print("\nClassification Report:")
    print(results['classification_report'])



Model Performance:
Mean CV Accuracy: 0.967 (±0.003)
Test Accuracy: 0.966
Test ROC-AUC: 0.969

Classification Report:
              precision    recall  f1-score   support

           0       0.93      1.00      0.96       871
           1       1.00      0.94      0.97      1129

    accuracy                           0.97      2000
   macro avg       0.96      0.97      0.97      2000
weighted avg       0.97      0.97      0.97      2000



In [2]:
# First run the main training pipeline
predictor, results = main()

# Create sample new data (similar structure to training data)
new_data = pd.DataFrame({
    'maternal_age': [28, 35, 31, 42],
    'bmi': [24.5, 31.2, 28.7, 33.5],
    'blood_pressure_systolic': [118, 145, 125, 135],
    'blood_pressure_diastolic': [75, 95, 82, 88],
    'blood_sugar': [95, 140, 105, 120],
    'hemoglobin': [11.5, 12.8, 13.2, 11.8],
    'pregnancy_week': [38, 39, 37, 40],
    'fetal_heart_rate': [135, 155, 142, 148],
    'previous_pregnancies': [0, 2, 1, 3],
    'fetal_position': ['normal', 'breech', 'normal', 'normal'],
    'previous_cesarean': ['no', 'yes', 'no', 'yes'],
    'diabetes': ['no', 'yes', 'no', 'no'],
    'hypertension': ['no', 'yes', 'no', 'yes']
})

# Make predictions
predictions = predictor.model.predict(
    predictor.preprocessor.transform(new_data)
)

# Get prediction probabilities
prediction_probs = predictor.model.predict_proba(
    predictor.preprocessor.transform(new_data)
)

# Create results DataFrame
results_df = pd.DataFrame({
    'Patient': range(1, len(new_data) + 1),
    'Predicted_Delivery_Mode': ['Cesarean' if p == 1 else 'Vaginal' for p in predictions],
    'Cesarean_Probability': prediction_probs[:, 1]
})

print("\nPredictions for new patients:")
print(results_df)

# Print detailed patient analysis
for i, (pred, prob) in enumerate(zip(predictions, prediction_probs[:, 1]), 1):
    print(f"\nPatient {i}:")
    print(f"Patient Details:")
    for col in new_data.columns:
        print(f"- {col}: {new_data.iloc[i-1][col]}")
    print(f"Prediction: {'Cesarean' if pred == 1 else 'Vaginal'} Delivery")
    print(f"Probability of Cesarean: {prob:.2%}")



Predictions for new patients:
   Patient Predicted_Delivery_Mode  Cesarean_Probability
0        1                 Vaginal              0.128161
1        2                Cesarean              0.933030
2        3                 Vaginal              0.138230
3        4                Cesarean              0.932871

Patient 1:
Patient Details:
- maternal_age: 28
- bmi: 24.5
- blood_pressure_systolic: 118
- blood_pressure_diastolic: 75
- blood_sugar: 95
- hemoglobin: 11.5
- pregnancy_week: 38
- fetal_heart_rate: 135
- previous_pregnancies: 0
- fetal_position: normal
- previous_cesarean: no
- diabetes: no
- hypertension: no
Prediction: Vaginal Delivery
Probability of Cesarean: 12.82%

Patient 2:
Patient Details:
- maternal_age: 35
- bmi: 31.2
- blood_pressure_systolic: 145
- blood_pressure_diastolic: 95
- blood_sugar: 140
- hemoglobin: 12.8
- pregnancy_week: 39
- fetal_heart_rate: 155
- previous_pregnancies: 2
- fetal_position: breech
- previous_cesarean: yes
- diabetes: yes
- hypertensio

In [3]:
!pip install streamlit plotly xgboost pyngrok

Collecting streamlit
  Downloading streamlit-1.43.1-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.43.1-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m64.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hDownloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m73.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.2.3 streamlit-1.43.1


In [4]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
import joblib

# Define categorical columns globally
categorical_cols = ['fetal_position', 'previous_cesarean', 'diabetes', 'hypertension']

# Page config
st.set_page_config(
    page_title="Medical Delivery Mode Predictor",
    page_icon="🏥",
    layout="wide"
)

# Custom CSS
st.markdown("""
    <style>
    .main {
        background-color: #f8f9fa;
    }
    .stButton>button {
        background-color: #007bff;
        color: white;
        border-radius: 15px;
        padding: 15px 30px;
        font-weight: bold;
    }
    .prediction-box {
        padding: 20px;
        border-radius: 10px;
        margin: 10px 0;
        text-align: center;
    }
    .high-risk {
        background-color: #dc3545;
        color: white;
    }
    .low-risk {
        background-color: #28a745;
        color: white;
    }
    </style>
""", unsafe_allow_html=True)

# Function to generate synthetic data

def generate_synthetic_data(n_samples=10000):
    np.random.seed(42)
    data = {
        'maternal_age': np.random.normal(30, 5, n_samples),
        'bmi': np.random.normal(25, 4, n_samples),
        'blood_pressure_systolic': np.random.normal(120, 10, n_samples),
        'blood_pressure_diastolic': np.random.normal(80, 8, n_samples),
        'blood_sugar': np.random.normal(100, 15, n_samples),
        'hemoglobin': np.random.normal(12, 1, n_samples),
        'pregnancy_week': np.random.randint(37, 42, n_samples),
        'fetal_heart_rate': np.random.normal(140, 20, n_samples),
        'previous_pregnancies': np.random.randint(0, 4, n_samples),
        'fetal_position': np.random.choice(['normal', 'breech'], n_samples, p=[0.9, 0.1]),
        'previous_cesarean': np.random.choice(['yes', 'no'], n_samples, p=[0.2, 0.8]),
        'diabetes': np.random.choice(['yes', 'no'], n_samples, p=[0.1, 0.9]),
        'hypertension': np.random.choice(['yes', 'no'], n_samples, p=[0.08, 0.92])
    }
    df = pd.DataFrame(data)

    # Create target variable
    conditions = (
        (df['bmi'] > 30) |
        (df['blood_pressure_systolic'] > 140) |
        (df['blood_pressure_diastolic'] > 90) |
        (df['fetal_position'] == 'breech') |
        (df['previous_cesarean'] == 'yes') |
        (df['diabetes'] == 'yes') |
        (df['hypertension'] == 'yes')
    )
    df['delivery_mode'] = np.where(conditions, 1, 0)
    return df

# Initialize session state for model
if 'model' not in st.session_state:
    # Generate data and train model
    df = generate_synthetic_data()
    X = df.drop('delivery_mode', axis=1)
    y = df['delivery_mode']

    # Prepare data
    X_encoded = pd.get_dummies(X, columns=categorical_cols)

    # Scale numerical features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_encoded)

    # Train model
    model = XGBClassifier(random_state=42)
    model.fit(X_scaled, y)

    st.session_state['model'] = model
    st.session_state['scaler'] = scaler
    st.session_state['feature_names'] = X_encoded.columns

# Main interface
st.title("🏥 Medical Delivery Mode Predictor")

# Create tabs
tab1, tab2, tab3 = st.tabs(["Prediction", "Data Analysis", "About"])

with tab1:
    st.write("Enter patient information to predict delivery mode")

    col1, col2, col3 = st.columns(3)

    with col1:
        st.subheader("Patient Information")
        maternal_age = st.number_input("Maternal Age", 15, 50, 30)
        bmi = st.number_input("BMI", 15.0, 45.0, 25.0)
        previous_pregnancies = st.number_input("Previous Pregnancies", 0, 10, 0)

    with col2:
        st.subheader("Vital Signs")
        bp_systolic = st.number_input("Systolic BP", 90, 200, 120)
        bp_diastolic = st.number_input("Diastolic BP", 60, 120, 80)
        blood_sugar = st.number_input("Blood Sugar", 70, 200, 100)
        hemoglobin = st.number_input("Hemoglobin", 7.0, 18.0, 12.0)

    with col3:
        st.subheader("Pregnancy Details")
        pregnancy_week = st.slider("Pregnancy Week", 20, 42, 37)
        fetal_heart_rate = st.number_input("Fetal Heart Rate", 100, 200, 140)
        fetal_position = st.selectbox("Fetal Position", ["normal", "breech"])
        previous_cesarean = st.selectbox("Previous Cesarean", ["no", "yes"])
        diabetes = st.selectbox("Diabetes", ["no", "yes"])
        hypertension = st.selectbox("Hypertension", ["no", "yes"])

    if st.button("Predict Delivery Mode"):
        # Create input dataframe
        input_data = pd.DataFrame({
            'maternal_age': [maternal_age],
            'bmi': [bmi],
            'blood_pressure_systolic': [bp_systolic],
            'blood_pressure_diastolic': [bp_diastolic],
            'blood_sugar': [blood_sugar],
            'hemoglobin': [hemoglobin],
            'pregnancy_week': [pregnancy_week],
            'fetal_heart_rate': [fetal_heart_rate],
            'previous_pregnancies': [previous_pregnancies],
            'fetal_position': [fetal_position],
            'previous_cesarean': [previous_cesarean],
            'diabetes': [diabetes],
            'hypertension': [hypertension]
        })

        # Prepare input data
        input_encoded = pd.get_dummies(input_data, columns=categorical_cols)
        # Ensure all columns from training are present
        for col in st.session_state['feature_names']:
            if col not in input_encoded.columns:
                input_encoded[col] = 0
        input_encoded = input_encoded[st.session_state['feature_names']]

        # Scale input
        input_scaled = st.session_state['scaler'].transform(input_encoded)

        # Make prediction
        prediction = st.session_state['model'].predict_proba(input_scaled)[0]

        # Display prediction
        col1, col2 = st.columns(2)

        with col1:
            if prediction[1] >= 0.5:
                st.markdown(f"""
                    <div class="prediction-box high-risk">
                        <h3>High Risk - Cesarean Delivery Recommended</h3>
                        <h2>{prediction[1]*100:.1f}%</h2>
                    </div>
                """, unsafe_allow_html=True)
            else:
                st.markdown(f"""
                    <div class="prediction-box low-risk">
                        <h3>Low Risk - Natural Delivery Possible</h3>
                        <h2>{prediction[0]*100:.1f}%</h2>
                    </div>
                """, unsafe_allow_html=True)

        with col2:
            fig = go.Figure(go.Indicator(
                mode = "gauge+number",
                value = prediction[1]*100,
                title = {'text': "Cesarean Probability"},
                gauge = {
                    'axis': {'range': [0, 100]},
                    'bar': {'color': "darkblue"},
                    'steps': [
                        {'range': [0, 50], 'color': "lightgreen"},
                        {'range': [50, 100], 'color': "lightcoral"}
                    ],
                    'threshold': {
                        'line': {'color': "red", 'width': 4},
                        'thickness': 0.75,
                        'value': 50
                    }
                }
            ))
            st.plotly_chart(fig)

with tab2:
    st.subheader("Data Analysis")
    df = generate_synthetic_data(10000)

    # Key metrics
    st.subheader("Key Statistics")
    col1, col2, col3, col4 = st.columns(4)
    with col1:
        st.metric("Average Maternal Age", f"{df['maternal_age'].mean():.1f}")
    with col2:
        st.metric("Average BMI", f"{df['bmi'].mean():.1f}")
    with col3:
        st.metric("Cesarean Rate", f"{(df['delivery_mode'].mean()*100):.1f}%")
    with col4:
        st.metric("Total Cases", len(df))

    # Visualizations
    col1, col2 = st.columns(2)

    with col1:
        fig = px.histogram(df, x='maternal_age', color='delivery_mode',
                          title='Age Distribution by Delivery Mode')
        st.plotly_chart(fig)

    with col2:
        fig = px.box(df, x='delivery_mode', y='bmi',
                     title='BMI Distribution by Delivery Mode')
        st.plotly_chart(fig)

with tab3:
    st.subheader("About This App")
    st.write("""
    This medical delivery mode prediction system uses machine learning to assess the likelihood
    of requiring a cesarean delivery. The model takes into account various maternal and fetal
    factors to make its predictions.

    **Key Features:**
    - Real-time prediction
    - Multiple risk factors analysis
    - Interactive visualizations
    - Data analysis dashboard

    **Note:** This is a demonstration model trained on synthetic data and should not be used
    for actual medical decisions. Always consult with healthcare professionals for medical advice.
    """)

# Footer
st.markdown("---")
st.markdown("Created with ❤️ for medical professionals")




Writing app.py


In [None]:
# Install required libraries
!pip install streamlit pyngrok

import streamlit as st
from pyngrok import ngrok
import os
import threading
import time

# Define the path to the ngrok configuration file
ngrok_config_path = "/root/.config/ngrok/ngrok.yml"

# Set the authtoken if you have one
authtoken = "2pTAsS3CjSHnFHypzOJHJsnetn0_5SQxU8di3ydudAwekU9YZ"  # Replace with your ngrok authtoken if needed

# Configure ngrok
from pyngrok import conf
conf.get_default().auth_token = authtoken

# Function to run Streamlit
def run_streamlit():
    os.system('streamlit run app.py --server.port 8501 --server.enableCORS false --server.enableXsrfProtection false')

# Kill any existing Streamlit processes
!kill -9 $(ps -ef | grep streamlit | grep -v grep | awk '{print $2}') 2>/dev/null

# Run Streamlit in the background using a separate thread
streamlit_thread = threading.Thread(target=run_streamlit)
streamlit_thread.start()

# Wait for Streamlit to start
time.sleep(5)  # Adjust the sleep time as needed

# Create ngrok tunnel
ngrok_tunnel = ngrok.connect(addr="8501", proto="http", bind_tls=True)
print(f"Public URL: {ngrok_tunnel.public_url}")

# Keep the notebook running
print("Streamlit is ready to use")
while True:
    time.sleep(60)


Public URL: https://7add-35-230-118-220.ngrok-free.app                                              
Streamlit is ready to use
