Set Up and Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

In [None]:
# Generate synthetic dataset for demonstration
np.random.seed(42)

Generate Synthetic Genetic Data (First Section)

In [None]:
def generate_synthetic_genetic_data(n_samples=1000):
    """
    Generate synthetic patient data including genetic markers and clinical factors
    """
    # Simulating genetic markers related to drug metabolism
    # CYP2D6 (drug metabolism), IFITM3 (flu susceptibility), IL17 (immune response)
    genetic_markers = {
        'CYP2D6_activity': np.random.choice(['poor', 'intermediate', 'normal', 'ultrarapid'], n_samples),
        'IFITM3_rs12252': np.random.choice(['CC', 'CT', 'TT'], n_samples),
        'IL17_expression': np.random.normal(50, 15, n_samples),
        'ACE2_receptor': np.random.normal(100, 25, n_samples),
        'HLA_type': np.random.choice(['A', 'B', 'C', 'DR', 'DQ'], n_samples)
    }

    # Clinical factors
    clinical_factors = {
        'age': np.random.randint(1, 90, n_samples),
        'weight_kg': np.random.normal(70, 15, n_samples).clip(min=10),
        'height_cm': np.random.normal(170, 15, n_samples).clip(min=50),
        'sex': np.random.choice(['M', 'F'], n_samples),
        'liver_function': np.random.normal(1.0, 0.2, n_samples).clip(min=0.3, max=1.5),
        'kidney_function': np.random.normal(1.0, 0.2, n_samples).clip(min=0.3, max=1.5),
        'symptom_severity': np.random.randint(1, 11, n_samples),
        'fever_celsius': np.random.normal(38.5, 1.0, n_samples),
        'days_since_symptoms': np.random.randint(0, 8, n_samples)
    }

    # Combine into a dataframe
    df = pd.DataFrame({**genetic_markers, **clinical_factors})

    # Calculate BMI
    df['bmi'] = df['weight_kg'] / ((df['height_cm']/100) ** 2)

    return df

Generate Synthetic Genetic Data (Medication Rules)

In [None]:
def add_medication_recommendations(df):
    """
    Add medication recommendations based on genetic and clinical factors
    """
    # Medication choice logic (simplified for demo)
    conditions = [
        # Oseltamivir (Tamiflu) works well for normal metabolizers with good kidney function
        (df['CYP2D6_activity'].isin(['normal', 'ultrarapid']) &
         (df['kidney_function'] > 0.7) &
         (df['symptom_severity'] >= 4)),

        # Zanamivir (Relenza) better for poor metabolizers or kidney issues
        ((df['CYP2D6_activity'] == 'poor') |
         (df['kidney_function'] <= 0.7) |
         (df['liver_function'] <= 0.7)),

        # Baloxavir (Xofluza) good for early treatment in otherwise healthy adults
        ((df['days_since_symptoms'] <= 2) &
         (df['age'] >= 12) &
         (df['symptom_severity'] >= 5) &
         (df['liver_function'] > 0.8))
    ]

    medications = ['oseltamivir', 'zanamivir', 'baloxavir']
    df['recommended_medication'] = np.select(conditions, medications, default='supportive_care')

    return df

Generate Synthetic Genetic Data (Dosage Calculations)

In [None]:
def calculate_dosages(df):
    """
    Calculate appropriate dosages based on patient characteristics
    """
    # Oseltamivir dosage (usually 30-75mg based on weight)
    base_oseltamivir = np.where(df['age'] < 12,
                               df['weight_kg'] * 2,  # 2mg/kg for children
                               75)                   # 75mg standard for adults

    metabolism_factor = np.select(
        [df['CYP2D6_activity'] == 'poor',
         df['CYP2D6_activity'] == 'intermediate',
         df['CYP2D6_activity'] == 'normal',
         df['CYP2D6_activity'] == 'ultrarapid'],
        [0.7, 0.85, 1.0, 1.15],
        default=1.0
    )

    kidney_factor = np.clip(df['kidney_function'], 0.5, 1.0)
    severity_factor = 1.0 + (df['symptom_severity'] - 5) * 0.05

    df['oseltamivir_dosage'] = np.round(
        base_oseltamivir * metabolism_factor * kidney_factor * severity_factor, -1)
    df.loc[df['oseltamivir_dosage'] > 90, 'oseltamivir_dosage'] = 90  # Cap at 90mg
    df.loc[df['oseltamivir_dosage'] < 30, 'oseltamivir_dosage'] = 30  # Minimum 30mg

    # Zanamivir dosage (fixed 10mg for inhalation)
    df['zanamivir_dosage'] = 10

    # Baloxavir dosage (weight-based)
    df['baloxavir_dosage'] = np.where(df['weight_kg'] < 80, 40, 80)

    # Set the dosage to match the recommended medication
    df['recommended_dosage'] = np.select(
        [df['recommended_medication'] == 'oseltamivir',
         df['recommended_medication'] == 'zanamivir',
         df['recommended_medication'] == 'baloxavir'],
        [df['oseltamivir_dosage'],
         df['zanamivir_dosage'],
         df['baloxavir_dosage']],
        default=0  # Supportive care has no medication dosage
    )

    return df

Generate Synthetic Genetic Data (Complete Function)

In [None]:
def generate_complete_dataset(n_samples=1000):
    """
    Generate complete synthetic dataset with genetic markers, clinical factors,
    medication recommendations, dosages, and simulated effectiveness
    """
    # Generate base data
    df = generate_synthetic_genetic_data(n_samples)

    # Add medication recommendations
    df = add_medication_recommendations(df)

    # Calculate appropriate dosages
    df = calculate_dosages(df)

    # Add simulated treatment effectiveness
    if_match_effect = np.random.normal(0.8, 0.1, n_samples)
    if_mismatch_effect = np.random.normal(0.5, 0.2, n_samples)

    df['treatment_effectiveness'] = np.where(
        df['recommended_medication'] != 'supportive_care',
        if_match_effect,
        if_mismatch_effect
    ) * 100

    # Keep only the needed columns for the final dataset
    final_columns = [
        'CYP2D6_activity', 'IFITM3_rs12252', 'IL17_expression', 'ACE2_receptor', 'HLA_type',
        'age', 'weight_kg', 'height_cm', 'sex', 'bmi', 'liver_function', 'kidney_function',
        'symptom_severity', 'fever_celsius', 'days_since_symptoms',
        'recommended_medication', 'recommended_dosage', 'treatment_effectiveness'
    ]

    return df[final_columns]

# Generate and save dataset
df = generate_complete_dataset(1500)
df.to_csv('flu_medication_genetic_dataset.csv', index=False)
print("Dataset generated and saved as 'flu_medication_genetic_dataset.csv'")

Dataset generated and saved as 'flu_medication_genetic_dataset.csv'


Data Preparation for Model Training

In [None]:
# Prepare data for model training
def prepare_data(df):
    """
    Prepare data for model training by splitting features and targets
    """
    # Split features and targets
    X = df.drop(['recommended_medication', 'recommended_dosage', 'treatment_effectiveness'], axis=1)
    y_med = df['recommended_medication']
    y_dosage = df['recommended_dosage']

    # Define categorical and numeric features
    categorical_features = ['CYP2D6_activity', 'IFITM3_rs12252', 'HLA_type', 'sex']
    numeric_features = [col for col in X.columns if col not in categorical_features]

    # Create preprocessor
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ])

    # Split data
    X_train, X_test, y_med_train, y_med_test, y_dosage_train, y_dosage_test = train_test_split(
        X, y_med, y_dosage, test_size=0.2, random_state=42)

    return X_train, X_test, y_med_train, y_med_test, y_dosage_train, y_dosage_test, preprocessor

# Prepare the data
X_train, X_test, y_med_train, y_med_test, y_dosage_train, y_dosage_test, preprocessor = prepare_data(df)

Medication Recommendation Model

In [None]:
def build_medication_model(X_train, y_med_train, preprocessor):
    """
    Build and train medication recommendation model
    """
    # Create pipeline for medication recommendation
    med_pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
    ])

    # Train model
    med_pipeline.fit(X_train, y_med_train)

    return med_pipeline

# Build medication model
med_model = build_medication_model(X_train, y_med_train, preprocessor)

# Evaluate medication model
med_accuracy = med_model.score(X_test, y_med_test)
print(f"Medication recommendation model accuracy: {med_accuracy:.4f}")

# Save model
joblib.dump(med_model, 'flu_medication_model.pkl')
print("Medication model saved as 'flu_medication_model.pkl'")

Medication recommendation model accuracy: 1.0000
Medication model saved as 'flu_medication_model.pkl'


Dosage Prediction Model

In [None]:
def build_dosage_model(X_train, y_med_train, y_dosage_train, preprocessor):
    """
    Build and train dosage prediction model (only for patients who need medication)
    """
    # Only train on rows where medication is recommended (not supportive care)
    mask_train = y_med_train != 'supportive_care'

    if sum(mask_train) > 0:  # Make sure we have data to train on
        dosage_pipeline = Pipeline([
            ('preprocessor', preprocessor),
            ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
        ])

        # Train model
        dosage_pipeline.fit(X_train[mask_train], y_dosage_train[mask_train])

        return dosage_pipeline
    else:
        print("No medication recommendations in training data.")
        return None

# Build dosage model
dosage_model = build_dosage_model(X_train, y_med_train, y_dosage_train, preprocessor)

# Evaluate dosage model
if dosage_model is not None:
    mask_test = y_med_test != 'supportive_care'
    dosage_r2 = dosage_model.score(X_test[mask_test], y_dosage_test[mask_test])
    print(f"Dosage prediction model R² score: {dosage_r2:.4f}")

    # Save model
    joblib.dump(dosage_model, 'flu_dosage_model.pkl')
    print("Dosage model saved as 'flu_dosage_model.pkl'")

Dosage prediction model R² score: 0.9973
Dosage model saved as 'flu_dosage_model.pkl'


Prediction Function for New Patients

In [None]:
def predict_flu_medication(patient_data):
    """
    Predict medication and dosage for a new patient

    Args:
        patient_data: DataFrame containing patient's genetic and clinical data
                      with the same columns as the training data

    Returns:
        dict: Containing recommended medication and dosage
    """
    # Load models
    med_model = joblib.load('flu_medication_model.pkl')
    dosage_model = joblib.load('flu_dosage_model.pkl')

    # Predict medication
    medication = med_model.predict(patient_data)[0]

    # If supportive care, no dosage needed
    if medication == 'supportive_care':
        return {
            'recommended_medication': medication,
            'recommended_dosage': 0,
            'recommendation': "Supportive care only (rest, fluids, antipyretics as needed)"
        }

    # Predict dosage
    dosage = round(dosage_model.predict(patient_data)[0], 1)

    # Format recommendation
    instructions = {
        'oseltamivir': f"{dosage}mg twice daily for 5 days",
        'zanamivir': f"{dosage}mg by inhalation twice daily for 5 days",
        'baloxavir': f"Single dose of {dosage}mg"
    }

    return {
        'recommended_medication': medication,
        'recommended_dosage': dosage,
        'recommendation': f"{medication.capitalize()}: {instructions[medication]}"
    }

Example Usage with a Sample Patient

In [None]:
def get_genetic_information():
    """
    Get patient's genetic information from user input
    """
    print("\n--- Genetic Information ---")

    # CYP2D6 activity
    print("\nCYP2D6 activity (affects drug metabolism):")
    print("1. Poor metabolizer")
    print("2. Intermediate metabolizer")
    print("3. Normal metabolizer")
    print("4. Ultrarapid metabolizer")
    cyp2d6_options = {1: 'poor', 2: 'intermediate', 3: 'normal', 4: 'ultrarapid'}
    while True:
        try:
            cyp2d6_choice = int(input("Enter option (1-4): "))
            if 1 <= cyp2d6_choice <= 4:
                cyp2d6_activity = cyp2d6_options[cyp2d6_choice]
                break
            else:
                print("Please enter a number between 1 and 4.")
        except ValueError:
            print("Please enter a valid number.")

    # IFITM3 variant
    print("\nIFITM3 rs12252 variant (affects flu susceptibility):")
    print("1. CC (higher risk)")
    print("2. CT (moderate risk)")
    print("3. TT (lower risk)")
    ifitm3_options = {1: 'CC', 2: 'CT', 3: 'TT'}
    while True:
        try:
            ifitm3_choice = int(input("Enter option (1-3): "))
            if 1 <= ifitm3_choice <= 3:
                ifitm3_rs12252 = ifitm3_options[ifitm3_choice]
                break
            else:
                print("Please enter a number between 1 and 3.")
        except ValueError:
            print("Please enter a valid number.")

    # IL17 expression
    while True:
        try:
            il17_expression = float(input("\nIL17 expression level (normal range 35-65): "))
            if 0 <= il17_expression <= 100:
                break
            else:
                print("Please enter a value between 0 and 100.")
        except ValueError:
            print("Please enter a valid number.")

    # ACE2 receptor
    while True:
        try:
            ace2_receptor = float(input("\nACE2 receptor density (normal range 75-125): "))
            if 0 <= ace2_receptor <= 200:
                break
            else:
                print("Please enter a value between 0 and 200.")
        except ValueError:
            print("Please enter a valid number.")

    # HLA type
    print("\nHLA type (immune system marker):")
    print("1. Type A")
    print("2. Type B")
    print("3. Type C")
    print("4. Type DR")
    print("5. Type DQ")
    hla_options = {1: 'A', 2: 'B', 3: 'C', 4: 'DR', 5: 'DQ'}
    while True:
        try:
            hla_choice = int(input("Enter option (1-5): "))
            if 1 <= hla_choice <= 5:
                hla_type = hla_options[hla_choice]
                break
            else:
                print("Please enter a number between 1 and 5.")
        except ValueError:
            print("Please enter a valid number.")

    return {
        'CYP2D6_activity': cyp2d6_activity,
        'IFITM3_rs12252': ifitm3_rs12252,
        'IL17_expression': il17_expression,
        'ACE2_receptor': ace2_receptor,
        'HLA_type': hla_type
    }

In [None]:
def get_basic_clinical_information():
    """
    Get patient's basic clinical information (age, weight, height, sex)
    """
    print("\n--- Clinical Information ---")

    # Age
    while True:
        try:
            age = int(input("\nPatient age (years): "))
            if 0 <= age <= 120:
                break
            else:
                print("Please enter a valid age between 0 and 120.")
        except ValueError:
            print("Please enter a valid number.")

    # Weight
    while True:
        try:
            weight_kg = float(input("\nPatient weight (kg): "))
            if 2 <= weight_kg <= 300:
                break
            else:
                print("Please enter a valid weight between 2 and 300 kg.")
        except ValueError:
            print("Please enter a valid number.")

    # Height
    while True:
        try:
            height_cm = float(input("\nPatient height (cm): "))
            if 40 <= height_cm <= 250:
                break
            else:
                print("Please enter a valid height between 40 and 250 cm.")
        except ValueError:
            print("Please enter a valid number.")

    # Sex
    print("\nBiological sex:")
    print("1. Male")
    print("2. Female")
    while True:
        try:
            sex_choice = int(input("Enter option (1-2): "))
            if sex_choice == 1:
                sex = 'M'
                break
            elif sex_choice == 2:
                sex = 'F'
                break
            else:
                print("Please enter 1 for Male or 2 for Female.")
        except ValueError:
            print("Please enter a valid number.")

    # Calculate BMI
    bmi = weight_kg / ((height_cm/100) ** 2)

    return {
        'age': age,
        'weight_kg': weight_kg,
        'height_cm': height_cm,
        'sex': sex,
        'bmi': bmi
    }

In [None]:
def get_medical_status_information():
    """
    Get patient's current medical status (liver/kidney function, symptoms)
    """
    # Liver function
    while True:
        try:
            liver_function = float(input("\nLiver function score (0.3-1.5, where 1.0 is normal): "))
            if 0.3 <= liver_function <= 1.5:
                break
            else:
                print("Please enter a value between 0.3 and 1.5.")
        except ValueError:
            print("Please enter a valid number.")

    # Kidney function
    while True:
        try:
            kidney_function = float(input("\nKidney function score (0.3-1.5, where 1.0 is normal): "))
            if 0.3 <= kidney_function <= 1.5:
                break
            else:
                print("Please enter a value between 0.3 and 1.5.")
        except ValueError:
            print("Please enter a valid number.")

    # Symptom severity
    while True:
        try:
            symptom_severity = int(input("\nSymptom severity (1-10, where 10 is most severe): "))
            if 1 <= symptom_severity <= 10:
                break
            else:
                print("Please enter a value between 1 and 10.")
        except ValueError:
            print("Please enter a valid number.")

    # Fever
    while True:
        try:
            fever_celsius = float(input("\nFever temperature (°C): "))
            if 35 <= fever_celsius <= 42:
                break
            else:
                print("Please enter a valid temperature between 35 and 42 °C.")
        except ValueError:
            print("Please enter a valid number.")

    # Days since symptoms
    while True:
        try:
            days_since_symptoms = int(input("\nDays since symptoms first appeared: "))
            if 0 <= days_since_symptoms <= 14:
                break
            else:
                print("Please enter a value between 0 and 14 days.")
        except ValueError:
            print("Please enter a valid number.")

    return {
        'liver_function': liver_function,
        'kidney_function': kidney_function,
        'symptom_severity': symptom_severity,
        'fever_celsius': fever_celsius,
        'days_since_symptoms': days_since_symptoms
    }

In [None]:
def get_user_input():
    """
    Get complete patient data from user input
    """
    print("\n==== FLU MEDICATION RECOMMENDATION SYSTEM ====")
    print("Please enter the patient's information:")

    # Get genetic information
    genetic_info = get_genetic_information()

    # Get basic clinical information
    clinical_info = get_basic_clinical_information()

    # Get medical status information
    medical_status = get_medical_status_information()

    # Combine all data
    patient_data = {**genetic_info, **clinical_info, **medical_status}

    # Convert to DataFrame
    patient_df = pd.DataFrame({key: [value] for key, value in patient_data.items()})

    return patient_df

In [None]:
def user_prediction_interface():
    """
    Interactive interface for user to input patient data and get predictions
    """
    print("\n" + "="*50)
    print("FLU MEDICATION AND DOSAGE PREDICTION SYSTEM")
    print("="*50)

    # Check if models exist
    try:
        med_model = joblib.load('flu_medication_model.pkl')
        dosage_model = joblib.load('flu_dosage_model.pkl')
    except FileNotFoundError:
        print("Error: Model files not found. Please run the system to generate models first.")
        return

    while True:
        # Get patient data from user
        patient_data = get_user_input()

        # Make prediction
        result = predict_flu_medication(patient_data)

        # Display results
        display_results(result)

        # Ask if user wants to make another prediction
        again = input("\nWould you like to enter another patient? (y/n): ").lower()
        if again != 'y':
            print("\nThank you for using the Flu Medication Prediction System.")
            break

In [None]:
def display_results(result):
    """
    Display prediction results in a user-friendly format
    """
    print("\n" + "="*50)
    print("RECOMMENDATION RESULTS")
    print("="*50)

    # Display medication recommendation
    print(f"Recommended Medication: {result['recommended_medication'].upper()}")

    # Display dosage if applicable
    if result['recommended_medication'] != 'supportive_care':
        print(f"Recommended Dosage: {result['recommended_dosage']} mg")

    # Display full recommendation
    print(f"\nFull Recommendation: {result['recommendation']}")

    # Display disclaimer
    print("\nIMPORTANT: This is a research model only. Always consult a healthcare professional.")
    print("="*50)

In [None]:
def main():
    """
    Run the complete system from data generation to model building and user prediction interface
    """
    # Generate dataset
    df = generate_complete_dataset(1500)
    df.to_csv('flu_medication_genetic_dataset.csv', index=False)

    # Prepare data
    print("\nPreparing data for model training...")
    X_train, X_test, y_med_train, y_med_test, y_dosage_train, y_dosage_test, preprocessor = prepare_data(df)

    # Build and evaluate medication model
    print("\nBuilding medication recommendation model...")
    med_model = build_medication_model(X_train, y_med_train, preprocessor)
    med_accuracy = med_model.score(X_test, y_med_test)
    print(f"Medication recommendation model accuracy: {med_accuracy:.4f}")

    # Build and evaluate dosage model
    print("\nBuilding dosage prediction model...")
    dosage_model = build_dosage_model(X_train, y_med_train, y_dosage_train, preprocessor)
    if dosage_model is not None:
        mask_test = y_med_test != 'supportive_care'
        dosage_r2 = dosage_model.score(X_test[mask_test], y_dosage_test[mask_test])
        print(f"Dosage prediction model R² score: {dosage_r2:.4f}")

    # Launch user prediction interface
    user_prediction_interface()



In [None]:
# Allow running the prediction interface separately if models already exist
if __name__ == "__main__":
    import sys

    # Check if user wants to skip dataset generation and model training
    if len(sys.argv) > 1 and sys.argv[1] == "--predict-only":
        print("Skipping dataset generation and model training...")
        user_prediction_interface()
    else:
        main()


Preparing data for model training...

Building medication recommendation model...
Medication recommendation model accuracy: 0.9867

Building dosage prediction model...
Dosage prediction model R² score: 0.9665

FLU MEDICATION AND DOSAGE PREDICTION SYSTEM

==== FLU MEDICATION RECOMMENDATION SYSTEM ====
Please enter the patient's information:

--- Genetic Information ---

CYP2D6 activity (affects drug metabolism):
1. Poor metabolizer
2. Intermediate metabolizer
3. Normal metabolizer
4. Ultrarapid metabolizer
