In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Data
model_names = ['KNN', 'XGBoost', 'Logistic Regression', 'RandomForest', 'SVM']
accuracies_without_smote = [0.7675, 0.8546, 0.3222, 0.8860, 0.8801]
accuracies_with_smote = [0.9963, 0.8546, 0.1121, 0.9623, 0.9134]

# Settings
x = np.arange(len(model_names))
bar_width = 0.3
fig, ax = plt.subplots(figsize=(7, 5))

# Bars
bars1 = ax.bar(x - bar_width / 2, accuracies_without_smote, width=bar_width,
               label='Without SMOTE', color='steelblue', edgecolor='black')
bars2 = ax.bar(x + bar_width / 2, accuracies_with_smote, width=bar_width,
               label='With SMOTE', color='orange', edgecolor='black')

# Add value labels (always black text)
def add_labels(bars):
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, height + 0.015,
                f'{height:.4f}', ha='center', va='bottom',
                fontsize=5, color='black', fontweight='bold')

add_labels(bars1)
add_labels(bars2)

# Axes and ticks
ax.set_xlabel("Models", fontsize=16, fontweight='bold')
ax.set_ylabel("Accuracy", fontsize=16, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(model_names, fontsize=13, rotation=15)
ax.set_yticks(np.arange(0, 1.1, 0.1))
ax.tick_params(axis='y', labelsize=12)
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Legend
ax.legend(fontsize=11, loc='lower right', frameon=True)

# Layout
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Models
models = ['KNN', 'Random Forest', 'XGBoost', 'Logistic Regression', 'SVM']
log_loss_without_smote = [0.0905, 0.0221, 0.0003, 0.4345, 0.6504]
log_loss_with_smote = [0.006049, 3.0433e-05, 4.4951e-05, 0.396829, 0.1062]

# Setup
x = np.linspace(0, 4, 5)  # Reduced spacing between model groups
bar_width = 0.25
fig, ax = plt.subplots(figsize=(8, 5.5))

# Bar plots
bars1 = ax.bar(x - bar_width/2, log_loss_without_smote, width=bar_width, label='Without SMOTE',
               color='skyblue', edgecolor='black', log=True)
bars2 = ax.bar(x + bar_width/2, log_loss_with_smote, width=bar_width, label='With SMOTE',
               color='orange', edgecolor='black', log=True)

# Add labels above bars
def add_labels(bars):
    for bar in bars:
        height = bar.get_height()
        label_y = height * 1.4 if height < 0.01 else height * 1.08
        ax.text(bar.get_x() + bar.get_width() / 2, label_y,
                f'{height:.6f}', ha='center', fontsize=6, fontweight='light', color='black')

add_labels(bars1)
add_labels(bars2)

# Axis labels and ticks
ax.set_xlabel('Models', fontsize=14, fontweight='bold')
ax.set_ylabel('Log Loss (log scale)', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(models, fontsize=12, rotation=15)
ax.tick_params(axis='y', labelsize=12)
ax.grid(axis='y', linestyle='--', alpha=0.7, which='both')

# Legend and layout
ax.legend(fontsize=12, loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Your data from the screenshot
data = {
    'Model': ['XGBoost', 'KNN', 'Random Forest', 'SVM (RBF)', 'Logistic Regression'],
    'Avg F1 Score': [0.9946, 0.9780, 0.9675, 0.8308, 0.7813],
    'Time Taken (s)': [172.79, 7.49, 94.29, 5210.39, 326.36]
}

# Create DataFrame
df = pd.DataFrame(data)

# Set style
sns.set_style('whitegrid')

# Create figure and twin axis
fig, ax1 = plt.subplots(figsize=(10,6))
ax2 = ax1.twinx()

# Bar plot for F1 Score
colors = sns.color_palette("pastel")
bars = sns.barplot(x='Model', y='Avg F1 Score', data=df, ax=ax1, palette=colors)

# Line plot for Time Taken
line = ax2.plot(df['Model'], df['Time Taken (s)'], color='black', marker='o', linewidth=2, label='Time (s)')

# Titles and labels
ax1.set_ylabel('Avg F1 Score', fontsize=12)
ax2.set_ylabel('Time Taken (s)', fontsize=12)
ax1.set_xlabel('Model', fontsize=12)
plt.title('Avg F1 Score and Time per Model (with SMOTE)', fontsize=14)

# Legends
ax2.legend(loc='upper right')

# Show plot
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.metrics import precision_recall_curve, average_precision_score

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

# Step 1: Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,
                           n_classes=5, weights=[0.1, 0.2, 0.3, 0.25, 0.15],
                           random_state=42)

# Step 2: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Step 3: Apply SMOTE
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

# Step 4: Binarize labels
n_classes = len(np.unique(y_train))
y_test_bin = label_binarize(y_test, classes=np.unique(y_train))

# Step 5: Define classifiers
models = {
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
    "Linear SVM": OneVsRestClassifier(LinearSVC(max_iter=10000))
}

# Step 6: Plot Precision-Recall Curves
plt.figure(figsize=(8, 4))  # Reduced size
average_precisions = {}

for name, model in models.items():
    model.fit(X_train, y_train)

    if hasattr(model, "predict_proba"):
        y_score = model.predict_proba(X_test)
    else:
        y_score = model.decision_function(X_test)

    precision = dict()
    recall = dict()
    ap_score = []

    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test_bin[:, i], y_score[:, i])
        ap_score.append(average_precision_score(y_test_bin[:, i], y_score[:, i]))

    all_precision = np.unique(np.concatenate([precision[i] for i in range(n_classes)]))
    mean_recall = np.zeros_like(all_precision)

    for i in range(n_classes):
        mean_recall += np.interp(all_precision, precision[i][::-1], recall[i][::-1])

    mean_recall /= n_classes
    macro_ap = np.mean(ap_score)
    average_precisions[name] = macro_ap

    plt.plot(mean_recall, all_precision, label=f"{name} (AP = {macro_ap:.4f})")

# Final styling with darker text
plt.xlabel("Recall", color='black', fontsize=12)
plt.ylabel("Precision", color='black', fontsize=12)
plt.title("Macro-Average Precision-Recall Curves (After SMOTE)", color='black', fontsize=14)
plt.legend(loc="lower left", fontsize=9, facecolor='white', edgecolor='black')
plt.grid(True)
plt.tick_params(colors='black')  # Darker tick labels
plt.tight_layout()
plt.show()


In [None]:
pip install scikit-learn xgboost matplotlib
