In [None]:
from transformers import pipeline
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch

# Step 1: Load the model and check if CUDA is available
device = 0 if torch.cuda.is_available() else -1  # -1 means CPU, 0 means GPU
zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=device)

# Step 2: Load the dataset
train_df = pd.read_csv("Training.csv")
test_df = pd.read_csv("Testing.csv")
print(train_df.head())  # Check the first few rows


# Step 3: Prepare the Input Text (Symptoms -> Template)
def prepare_input_text(row, feature_columns):
    """
    Converts binary symptom columns into a single string of symptoms.
    """
    symptoms = [col for col in feature_columns if row[col] == 1]
    return " ".join(symptoms)  # Concatenate symptoms as a single string

# Step 4: Apply Zero-Shot Classification
def zero_shot_classification(df, feature_columns, candidate_labels):
    """
    Applies zero-shot classification on the dataset with a custom template.
    """
    predictions = []
    for idx, row in df.iterrows():
        # Generate a sentence template for the model
        text = prepare_input_text(row, feature_columns)
        template = f"This person has the symptoms of: {text}. What disease could this be?"

        # Apply zero-shot classification
        result = zero_shot_classifier(template, candidate_labels=candidate_labels)

        # Get the predicted label (the label with the highest probability)
        predicted_label = result['labels'][0]
        predictions.append(predicted_label)

    return predictions

# Step 5: Evaluate the Model Performance
def evaluate_model(y_true, y_pred):
    """
    Calculates evaluation metrics: accuracy, precision, recall, and F1 score.
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=1)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=1)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=1)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

# Step 6: Define Feature Columns and Candidate Labels
feature_columns = [
    "itching", "skin_rash", "nodal_skin_eruptions", "continuous_sneezing", "shivering", "chills",
    "joint_pain", "stomach_pain", "acidity", "ulcers_on_tongue", "muscle_wasting", "vomiting",
    "burning_micturition", "spotting_ urination", "fatigue", "weight_gain", "anxiety",
    "cold_hands_and_feets", "mood_swings", "weight_loss", "restlessness", "lethargy",
    "patches_in_throat", "irregular_sugar_level", "cough", "high_fever", "sunken_eyes",
    "breathlessness", "sweating", "dehydration", "indigestion", "headache", "yellowish_skin",
    "dark_urine", "nausea", "loss_of_appetite", "pain_behind_the_eyes", "back_pain",
    "constipation", "abdominal_pain", "diarrhoea", "mild_fever", "yellow_urine",
    "yellowing_of_eyes", "acute_liver_failure", "fluid_overload", "swelling_of_stomach",
    "swelled_lymph_nodes", "malaise", "blurred_and_distorted_vision", "phlegm",
    "throat_irritation", "redness_of_eyes", "sinus_pressure", "runny_nose", "congestion",
    "chest_pain", "weakness_in_limbs", "fast_heart_rate", "pain_during_bowel_movements",
    "pain_in_anal_region", "bloody_stool", "irritation_in_anus", "neck_pain", "dizziness",
    "cramps", "bruising", "obesity", "swollen_legs", "swollen_blood_vessels", "puffy_face_and_eyes",
    "enlarged_thyroid", "brittle_nails", "swollen_extremeties", "excessive_hunger",
    "extra_marital_contacts", "drying_and_tingling_lips", "slurred_speech", "knee_pain",
    "hip_joint_pain", "muscle_weakness", "stiff_neck", "swelling_joints", "movement_stiffness",
    "spinning_movements", "loss_of_balance", "unsteadiness", "weakness_of_one_body_side",
    "loss_of_smell", "bladder_discomfort", "foul_smell_of urine", "continuous_feel_of_urine",
    "passage_of_gases", "internal_itching", "toxic_look_(typhos)", "depression", "irritability",
    "muscle_pain", "altered_sensorium", "red_spots_over_body", "belly_pain", "abnormal_menstruation",
    "dischromic _patches", "watering_from_eyes", "increased_appetite", "polyuria", "family_history",
    "mucoid_sputum", "rusty_sputum", "lack_of_concentration", "visual_disturbances",
    "receiving_blood_transfusion", "receiving_unsterile_injections", "coma", "stomach_bleeding",
    "distention_of_abdomen", "history_of_alcohol_consumption", "fluid_overload",
    "blood_in_sputum", "prominent_veins_on_calf", "palpitations", "painful_walking",
    "pus_filled_pimples", "blackheads", "scurring", "skin_peeling", "silver_like_dusting",
    "small_dents_in_nails", "inflammatory_nails", "blister", "red_sore_around_nose",
    "yellow_crust_ooze"
]

# Candidate labels for classification (all unique 'prognosis' labels in the training set)
candidate_labels = train_df['prognosis'].unique().tolist()

# Step 7: Make Predictions on Test Data
test_predictions = zero_shot_classification(test_df, feature_columns, candidate_labels)

# Actual labels from the test set
y_true = test_df['prognosis'].tolist()

# Step 8: Evaluate the Performance
evaluate_model(y_true, test_predictions)
