# Import Required Classes

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import seaborn as sns

sns.set_style("darkgrid")

sys.path.append('../src')

In [None]:
import os
print("Current working directory:", os.getcwd())

In [None]:
from LightGBMBaselineClassifier import LightGBMBaselineClassifier
from LightGBMTuner import LightGBMTuner
from DiskIO import DiskIO

# Run Baseline LightGBM Classifier

In [None]:
baseline = LightGBMBaselineClassifier()
baseline.load_and_prepare_data()

In [None]:
baseline.train()

In [None]:
# Unpack the evaluation results: (train_results, test_results)
report, cm, stats = baseline.evaluate()

print("Classification Report:")
for k, v in stats.items():
    print(f"{k}: {v}%")

# Plot confusion matrix
baseline.plot_confusion_matrix(cm)

In [None]:
# Remove 'accuracy' because it's not a dict
accuracy = report.pop('accuracy', None)

# Convert to DataFrame
df_report = pd.DataFrame(report).T  # Transpose to have labels as rows

# Round float values
df_report = df_report.round(3)

# Add accuracy if present
if accuracy is not None:
    df_report.loc['accuracy'] = [None] * (df_report.shape[1] - 1) + [round(accuracy, 3)]

df_report

In [None]:
model_name = 'baseline'
baseline.save_model(pipeline=baseline.pipeline, label_encoder=baseline.label_encoder)

# Tuning - Nested CV

In [None]:
# Initialize and tune
tuner = LightGBMTuner(n_trials=30)
tuner.tune_hyperparameters()

In [None]:
# Print best parameters
print("Best Parameters Found:")
print(tuner.best_params)

# Train final model
tuner.train_final_model()

# Plot learning curve
tuner.plot_learning_curve()

In [None]:
# Train on full Hao datasets and save final model
tuner.train_and_save_final_model_on_full_data(suffix='final')

In [None]:
# Evaluate on external data (Kotliarov dataset)
report, cm, stats = tuner.evaluate_on_external_testset("../data/kotliarov.csv", suffix='final')

In [None]:
# Print results

# Remove 'accuracy' because it's not a dict
accuracy = report.pop('accuracy', None)

# Convert to DataFrame
df_report = pd.DataFrame(report).T  # Transpose to have labels as rows

# Round float values
df_report = df_report.round(3)

# Add accuracy if present
if accuracy is not None:
    df_report.loc['accuracy'] = [None] * (df_report.shape[1] - 1) + [round(accuracy, 3)]

df_report

In [None]:
# Print stats
print("Tuned Model - Classification Report:")
for k, v in stats.items():
    print(f"{k}: {v}%")

# Plot confusion matrix
tuner.plot_confusion_matrix(cm)

# Baseline vs Tuned

In [None]:
# Load training set to get feature names in correct order
hao = pd.read_csv("../data/hao.csv", index_col=0)
expected_features = hao.drop(columns=["label"]).columns

In [None]:
# Load Kotliarov test data (same format as hao.csv)
kotliarov = pd.read_csv("../data/kotliarov.csv", index_col=0)
X_kot = kotliarov.drop(columns=["label"])
y_kot = kotliarov["label"]

# Align Kotliarov features to match training features
X_kot_aligned = X_kot[expected_features]

In [None]:
io = DiskIO("../models")

baseline_model = io.load("LightGBM")
tuned_model = io.load("LightGBM", "final")
label_encoder = io.load("label_encoder_final")

In [None]:
# Get the list of class labels the encoder was originally trained on
known_labels = label_encoder.classes_

# Create a boolean mask to keep only samples with labels the encoder recognizes
# This is necessary to avoid transforming labels that were not seen during training (would raise an error)
valid_mask = y_kot.isin(known_labels)

# Align test features with training feature order and keep only valid rows
# Note: 'expected_features' must match the training set columns exactly
X_kot_aligned = X_kot[valid_mask][expected_features]

# Filter the test labels to include only valid rows (i.e., rows with known labels)
y_kot_filtered = y_kot[valid_mask]

# Transform the filtered string labels into numerical form using the trained encoder
y_kot_encoded = label_encoder.transform(y_kot_filtered)

In [None]:
from MetricsCore import MetricsCalculator

metrics_calculator = MetricsCalculator()

baseline_metrics = metrics_calculator.compute_from_model(baseline_model, X_kot_aligned, y_kot_encoded)
tuned_metrics = metrics_calculator.compute_from_model(tuned_model, X_kot_aligned, y_kot_encoded)

In [None]:
# Create DataFrame
df = pd.DataFrame({
    'Baseline': baseline_metrics,
    'Tuned': tuned_metrics
})

df.T.plot(kind='bar', figsize=(10, 6), rot=0)
plt.ylabel("Metric Score (%)")
plt.title("Model Performance on Kotliarov Dataset - LightGBM")
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
df

In [None]:
metrics = list(baseline_metrics.keys())
baseline_values = [baseline_metrics[m] for m in metrics]
tuned_values = [tuned_metrics[m] for m in metrics]
diff = [t - b for t, b in zip(tuned_values, baseline_values)]

# Extract the list of metric names from the baseline results (e.g., Accuracy, F1, etc.)
metrics = list(baseline_metrics.keys())

# Collect metric values for baseline and tuned models in the same order
baseline_values = [baseline_metrics[m] for m in metrics]
tuned_values = [tuned_metrics[m] for m in metrics]

# Compute the difference in performance for each metric (Tuned - Baseline)
# This helps identify which metrics improved and by how much
diff = [t - b for t, b in zip(tuned_values, baseline_values)]

plt.figure(figsize=(10, 5))
bars = plt.bar(metrics, diff, color='skyblue')
plt.axhline(0, color='black', linestyle='--')
plt.title("Tuned - Baseline Metric Differences on Kotliarov")
plt.ylabel("Difference in Score (%)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Predict labels on the Kotliarov test set using both baseline and tuned models
baseline_preds = baseline_model.predict(X_kot_aligned)
tuned_preds = tuned_model.predict(X_kot_aligned)

# Calculate the percentage of samples for which both models made the same prediction
agreement = np.mean(baseline_preds == tuned_preds)

# Display model agreement in percentage format
print(f"The two models agree on {agreement * 100:.2f}% of predictions.")

# Interpretation - SHAP

In [None]:
import shap

# Prepare background data for SHAP explainer
background_data = tuned_model.named_steps["scaler"].transform(X_kot_aligned.sample(100, random_state=42))

# Get the XGBoost model from the pipeline
xgb_model = tuned_model.named_steps["model"]

# Create SHAP explainer for the tuned model
explainer = shap.Explainer(xgb_model, background_data)

# Compute SHAP values for the (scaled) test set
X_kot_scaled = tuned_model.named_steps["scaler"].transform(X_kot_aligned)
shap_values = explainer(X_kot_scaled)

In [None]:
# Plot SHAP summary
shap.summary_plot(shap_values, X_kot_aligned, feature_names=expected_features, max_display=20, plot_type="bar")