In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import classification_report, log_loss
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_excel(r'C:\\Users\\HP\\Desktop\\updated_research_nutrition_data.xlsx', engine='openpyxl')

# Define the feature columns and the target variable
X = df.drop(columns=['Viral_Load_Status'])
y = df['Viral_Load_Status']

# Encode categorical variables
X = pd.get_dummies(X)
le = LabelEncoder()
y = le.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the models to be trained
models = {
    "Logistic Regression": LogisticRegression(max_iter=2000),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Support Vector Machine": SVC(probability=True),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "XGBoost": xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "LightGBM": lgb.LGBMClassifier()
}

# Train and evaluate each model
results = {}
log_losses = []
model_names = []
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    if hasattr(model, "predict_proba"):
        y_prob = model.predict_proba(X_test)
    else:
        y_prob = model.decision_function(X_test)
    
    # Store evaluation metrics and log loss score
    results[model_name] = {
        "Classification Report": classification_report(y_test, y_pred),
        "Log Loss": log_loss(y_test, y_prob)
    }
    
    log_losses.append(results[model_name]["Log Loss"])
    model_names.append(model_name)

# Plot Log Loss scores for all models with scores in the bar plot
plt.figure(figsize=(12, 8))
bars = plt.barh(model_names, log_losses, color='skyblue')
plt.xlabel('Log Loss')
plt.title('Log Loss Scores for Different Models')

# Add scores to the bars
for bar, score in zip(bars, log_losses):
    plt.text(bar.get_width(), bar.get_y() + bar.get_height()/2,
             f'{score:.2f}', va='center', ha='left', fontsize=10)

plt.show()

# Print classification reports for all models
for model_name, result in results.items():
    print(f"Model: {model_name}")
    print(result["Classification Report"])