In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

file_path = r'expanded_synthetic_health_data (1).csv'
data = pd.read_csv(file_path)


label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])


X = data.drop(columns='Severity')
y = data['Severity']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Support Vector Machine (SVM)": SVC(),
    "k-Nearest Neighbors (k-NN)": KNeighborsClassifier()
}



for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)


    y_pred = model.predict(X_test)


    print(f"Classification Report for {model_name}:\n")
    print(classification_report(y_test, y_pred, target_names=label_encoders['Severity'].classes_))
    print("\n" + "="*80 + "\n")


conversion_dict = {
    "Gender": {"Female": 0, "Male": 1, "Unknown": 2},
    "Smoking": {"No": 0, "Yes": 1},
    "Pain Scale": {str(i): i for i in range(11)},
    "Age Group": {"False": 0, "True": 1},

}


def convert_input(column, value):

    for key in conversion_dict:
        if key in column:
            return conversion_dict[key].get(value, value)

    try:
        return float(value)
    except ValueError:

        return label_encoders[column].transform([value])[0]


def get_user_input():
    print("Select a model for prediction:")
    for i, model_name in enumerate(models.keys(), 1):
        print(f"{i}. {model_name}")

    model_choice = int(input("Enter the number corresponding to your choice: ")) - 1
    selected_model_name = list(models.keys())[model_choice]
    selected_model = models[selected_model_name]

    print(f"\nYou selected: {selected_model_name}\n")

    print("Please enter the following details for prediction:")
    user_data = {}


    for column in X.columns:
        if "Gender" in column:
            print(f"Options for {column}: Female, Male, Unknown")
        elif "Age Group" in column:
            print(f"Options for {column}: False, True")
        elif column == "Smoking":
            print(f"Options for {column}: No, Yes")
        elif column == "Pain Scale":
            print(f"Enter value for {column} (Example: 0 to 10)")
        elif column in label_encoders:
            # Convert all classes to strings to avoid TypeError
            options = [str(item) for item in label_encoders[column].classes_]
            print(f"Options for {column}: {', '.join(options)}")
        else:
            print(f"Enter value for {column}")

        value = input(f"Value for {column}: ")
        user_data[column] = [convert_input(column, value)]

    user_df = pd.DataFrame(user_data)


    severity_prediction = selected_model.predict(user_df)
    severity_class = label_encoders['Severity'].inverse_transform(severity_prediction)

    print(f"\nPredicted Severity: {severity_class[0]}")


get_user_input()


Classification Report for Logistic Regression:

              precision    recall  f1-score   support

        Mild       0.93      0.92      0.93       255
    Moderate       0.76      0.62      0.68       231
      Severe       0.85      0.93      0.89       514

    accuracy                           0.86      1000
   macro avg       0.85      0.82      0.83      1000
weighted avg       0.85      0.86      0.85      1000



Classification Report for Support Vector Machine (SVM):

              precision    recall  f1-score   support

        Mild       0.89      1.00      0.94       255
    Moderate       0.99      0.43      0.60       231
      Severe       0.79      0.94      0.86       514

    accuracy                           0.84      1000
   macro avg       0.89      0.79      0.80      1000
weighted avg       0.86      0.84      0.82      1000



Classification Report for k-Nearest Neighbors (k-NN):

              precision    recall  f1-score   support

        Mild       

ValueError: y contains previously unseen labels: 'sudden'

In [4]:
import joblib

# Example using the Logistic Regression model
model = models['Logistic Regression']

# Save the trained model to a file
joblib.dump(model, 'logistic_regression_model.pkl')

# Save label encoders too, if needed
joblib.dump(label_encoders, 'label_encoders.pkl')


['label_encoders.pkl']

In [5]:
joblib.dump(model, 'logistic_regression_model.pkl')


['logistic_regression_model.pkl']