In [2]:
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
import joblib

# Train models and save them
def train_and_save_models(train_file='Training_with_estimated_time.csv'):
    # Load training data
    train_data = pd.read_csv(train_file)

    # Features and target columns
    X_train = train_data.drop(columns=['prognosis', 'estimated_time'])
    y_train_prognosis = train_data['prognosis']
    y_train_time = train_data['estimated_time']

    # Encode prognosis labels
    label_encoder = LabelEncoder()
    y_train_prognosis_encoded = label_encoder.fit_transform(y_train_prognosis)

    # Train models
    model_prognosis = xgb.XGBClassifier()
    model_prognosis.fit(X_train, y_train_prognosis_encoded)

    model_time = xgb.XGBRegressor()
    model_time.fit(X_train, y_train_time)

    # Save models and label encoder
    joblib.dump(model_prognosis, 'model_prognosis.pkl')
    joblib.dump(model_time, 'model_time.pkl')
    joblib.dump(label_encoder, 'label_encoder.pkl')

    print("Models and label encoder saved successfully!")

# Call this to train and save the models
train_and_save_models()


Models and label encoder saved successfully!


In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, mean_absolute_error
import joblib

# Load models and encoder
model_prognosis = joblib.load('model_prognosis.pkl')
model_time = joblib.load('model_time.pkl')
label_encoder = joblib.load('label_encoder.pkl')

# Evaluate models using Testing.csv
def evaluate_models(test_file='Testing_with_estimated_time.csv'):
    test_data = pd.read_csv(test_file)

    # Prepare test inputs and true labels
    X_test = test_data.drop(columns=['prognosis', 'estimated_time'])
    y_test_prognosis = test_data['prognosis']
    y_test_time = test_data['estimated_time']

    # Encode prognosis labels
    y_test_prognosis_encoded = label_encoder.transform(y_test_prognosis)

    # Make predictions
    y_pred_prognosis_encoded = model_prognosis.predict(X_test)
    y_pred_prognosis = label_encoder.inverse_transform(y_pred_prognosis_encoded)
    y_pred_time = model_time.predict(X_test)

    # Calculate metrics
    prognosis_accuracy = accuracy_score(y_test_prognosis_encoded, y_pred_prognosis_encoded)
    time_mae = mean_absolute_error(y_test_time, y_pred_time)

    print(f"Prognosis Accuracy: {prognosis_accuracy * 100:.2f}%")
    print(f"Mean Absolute Error for Estimated Time: {time_mae:.2f} minutes")

    # Display predictions
    results = pd.DataFrame({
        'Actual Prognosis': y_test_prognosis,
        'Predicted Prognosis': y_pred_prognosis,
        'Actual Estimated Time': y_test_time,
        'Predicted Estimated Time': [round(time, 2) for time in y_pred_time]
    })

    print("\nSample Predictions:")
    print(results.head())

# Evaluate the models
evaluate_models()



Prognosis Accuracy: 97.62%
Mean Absolute Error for Estimated Time: 0.22 minutes

Sample Predictions:
      Actual Prognosis  Predicted Prognosis  Actual Estimated Time  \
0     Fungal infection     Fungal infection                     10   
1              Allergy              Allergy                     25   
2                 GERD                 GERD                     30   
3  Chronic cholestasis  Chronic cholestasis                     35   
4        Drug Reaction        Drug Reaction                     20   

   Predicted Estimated Time  
0                      10.0  
1                      25.0  
2                      30.0  
3                      35.0  
4                      20.0  
