In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

# Set random seed for reproducibility
np.random.seed(42)

### 1.a. Read the dataset

print("=" * 80)
print("PART 1(a): READING THE IRIS DATASET")
print("=" * 80)

df = pd.read_csv('iris.csv')
print(f"\nDataset shape: {df.shape}")
print(f"\nFirst few rows:")
print(df.head())
print(f"\nDataset info:")
print(df.info())
print(f"\nSpecies distribution:")
print(df['Species'].value_counts())
print()

### 1.b. Normalize the features

print("=" * 80)
print("PART 1(b): FEATURE NORMALIZATION")
print("=" * 80)

# Extract features and target
X = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values
y = df['Species'].values

print(f"\nOriginal feature ranges:")
for i, col in enumerate(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']):
    print(f"  {col}: [{X[:, i].min():.2f}, {X[:, i].max():.2f}]")

# Min-Max Normalization
X_normalized = np.zeros_like(X)
for i in range(X.shape[1]):
    min_val = X[:, i].min()
    max_val = X[:, i].max()
    X_normalized[:, i] = (X[:, i] - min_val) / (max_val - min_val)

print(f"\nNormalized feature ranges:")
for i, col in enumerate(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']):
    print(f"  {col}: [{X_normalized[:, i].min():.2f}, {X_normalized[:, i].max():.2f}]")
print()

### 1.c. Split the dataset (80:20)

print("=" * 80)
print("PART 1(c): TRAIN-TEST SPLIT (80:20)")
print("=" * 80)

# Manual train-test split
n_samples = len(X_normalized)
n_train = int(0.8 * n_samples)

# Shuffle indices
indices = np.random.permutation(n_samples)
train_indices = indices[:n_train]
test_indices = indices[n_train:]

X_train = X_normalized[train_indices]
X_test = X_normalized[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]

print(f"\nTotal samples: {n_samples}")
print(f"Training samples: {len(X_train)} ({len(X_train)/n_samples*100:.1f}%)")
print(f"Testing samples: {len(X_test)} ({len(X_test)/n_samples*100:.1f}%)")
print(f"\nTraining set species distribution:")
unique, counts = np.unique(y_train, return_counts=True)
for species, count in zip(unique, counts):
    print(f"  {species}: {count}")
print(f"\nTest set species distribution:")
unique, counts = np.unique(y_test, return_counts=True)
for species, count in zip(unique, counts):
    print(f"  {species}: {count}")
print()

### 1.d. Implement K-NN Classification

print("=" * 80)
print("PART 1(d): K-NN CLASSIFICATION IMPLEMENTATION")
print("=" * 80)

def euclidean_distance(point1, point2):
    """Calculate Euclidean distance between two points."""
    return np.sqrt(np.sum((point1 - point2) ** 2))

def knn_predict(X_train, y_train, X_test, k):
    """
    K-NN classification without weights.
    
    Args:
        X_train: Training features
        y_train: Training labels
        X_test: Test features
        k: Number of neighbors
    
    Returns:
        predictions: Predicted labels for test set
    """
    predictions = []
    
    for test_point in X_test:
        # Calculate distances to all training points
        distances = []
        for i, train_point in enumerate(X_train):
            dist = euclidean_distance(test_point, train_point)
            distances.append((dist, y_train[i]))
        
        # Sort by distance and get k nearest neighbors
        distances.sort(key=lambda x: x[0])
        k_nearest = distances[:k]
        
        # Get labels of k nearest neighbors
        k_nearest_labels = [label for _, label in k_nearest]
        
        # Majority voting
        most_common = Counter(k_nearest_labels).most_common(1)
        predictions.append(most_common[0][0])
    
    return np.array(predictions)

# Select 5 random K values where K <= sqrt(n)
n = len(X_train)
max_k = int(np.sqrt(n))
print(f"\nNumber of training samples (n): {n}")
print(f"Maximum K value (√n): {max_k}")

# Generate 5 random K values
k_values = sorted(np.random.choice(range(1, max_k + 1), size=5, replace=False))
print(f"Selected K values: {k_values}")
print()

### 1.e. Compute Metrics and Visualize Results

print("=" * 80)
print("PART 1(e): STANDARD K-NN PERFORMANCE EVALUATION")
print("=" * 80)

def compute_metrics(y_true, y_pred):
    """
    Compute precision, recall, and accuracy for multi-class classification.
    """
    classes = np.unique(y_true)
    
    precision_dict = {}
    recall_dict = {}
    
    for cls in classes:
        # True Positives: correctly predicted as cls
        tp = np.sum((y_pred == cls) & (y_true == cls))
        
        # False Positives: incorrectly predicted as cls
        fp = np.sum((y_pred == cls) & (y_true != cls))
        
        # False Negatives: incorrectly predicted as not cls
        fn = np.sum((y_pred != cls) & (y_true == cls))
        
        # Precision and Recall
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        
        precision_dict[cls] = precision
        recall_dict[cls] = recall
    
    # Overall accuracy
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    
    return precision_dict, recall_dict, accuracy

# Store results for standard K-NN
standard_results = {}

for k in k_values:
    print(f"\nEvaluating K={k}...")
    
    # Make predictions
    y_pred = knn_predict(X_train, y_train, X_test, k)
    
    # Compute metrics
    precision_dict, recall_dict, accuracy = compute_metrics(y_test, y_pred)
    
    standard_results[k] = {
        'predictions': y_pred,
        'precision': precision_dict,
        'recall': recall_dict,
        'accuracy': accuracy
    }
    
    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  Precision per class: {precision_dict}")
    print(f"  Recall per class: {recall_dict}")

# Create visualization
print("\nGenerating visualizations...")

# 1. Accuracy vs K
plt.figure(figsize=(15, 10))

plt.subplot(2, 3, 1)
accuracies = [standard_results[k]['accuracy'] for k in k_values]
plt.plot(k_values, accuracies, 'o-', linewidth=2, markersize=8)
plt.xlabel('K Value')
plt.ylabel('Accuracy')
plt.title('Standard K-NN: Accuracy vs K')
plt.grid(True, alpha=0.3)
plt.xticks(k_values)

# 2. Precision per class
plt.subplot(2, 3, 2)
species = list(standard_results[k_values[0]]['precision'].keys())
for cls in species:
    precisions = [standard_results[k]['precision'][cls] for k in k_values]
    plt.plot(k_values, precisions, 'o-', label=cls, linewidth=2, markersize=8)
plt.xlabel('K Value')
plt.ylabel('Precision')
plt.title('Standard K-NN: Precision per Class')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(k_values)

# 3. Recall per class
plt.subplot(2, 3, 3)
for cls in species:
    recalls = [standard_results[k]['recall'][cls] for k in k_values]
    plt.plot(k_values, recalls, 'o-', label=cls, linewidth=2, markersize=8)
plt.xlabel('K Value')
plt.ylabel('Recall')
plt.title('Standard K-NN: Recall per Class')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(k_values)

# 4. Average Precision and Recall
plt.subplot(2, 3, 4)
avg_precisions = [np.mean(list(standard_results[k]['precision'].values())) for k in k_values]
avg_recalls = [np.mean(list(standard_results[k]['recall'].values())) for k in k_values]
plt.plot(k_values, avg_precisions, 'o-', label='Avg Precision', linewidth=2, markersize=8)
plt.plot(k_values, avg_recalls, 's-', label='Avg Recall', linewidth=2, markersize=8)
plt.xlabel('K Value')
plt.ylabel('Score')
plt.title('Standard K-NN: Average Metrics')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(k_values)

# 5. Confusion Matrix for best K
best_k_standard = max(k_values, key=lambda k: standard_results[k]['accuracy'])
plt.subplot(2, 3, 5)
y_pred_best = standard_results[best_k_standard]['predictions']
conf_matrix = np.zeros((len(species), len(species)), dtype=int)
for i, true_cls in enumerate(species):
    for j, pred_cls in enumerate(species):
        conf_matrix[i, j] = np.sum((y_test == true_cls) & (y_pred_best == pred_cls))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=species, yticklabels=species)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title(f'Confusion Matrix (K={best_k_standard})')

# 6. Bar plot of metrics for best K
plt.subplot(2, 3, 6)
x = np.arange(len(species))
width = 0.35
precisions_best = [standard_results[best_k_standard]['precision'][cls] for cls in species]
recalls_best = [standard_results[best_k_standard]['recall'][cls] for cls in species]
plt.bar(x - width/2, precisions_best, width, label='Precision')
plt.bar(x + width/2, recalls_best, width, label='Recall')
plt.xlabel('Species')
plt.ylabel('Score')
plt.title(f'Metrics per Class (K={best_k_standard})')
plt.xticks(x, species, rotation=45)
plt.legend()
plt.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('1e_standard_knn_results.png', dpi=100, bbox_inches='tight')
plt.show()

print(f"\nBest K value (standard K-NN): {best_k_standard}")
print(f"Best accuracy: {standard_results[best_k_standard]['accuracy']:.4f}")
print()

### 1.f. Distance-Weighted K-NN

print("=" * 80)
print("PART 1(f): DISTANCE-WEIGHTED K-NN IMPLEMENTATION")
print("=" * 80)

def weighted_knn_predict(X_train, y_train, X_test, k, weight_func='inverse'):
    """
    Distance-weighted K-NN classification.
    
    Args:
        X_train: Training features
        y_train: Training labels
        X_test: Test features
        k: Number of neighbors
        weight_func: 'inverse' for 1/d or 'inverse_square' for 1/d²
    
    Returns:
        predictions: Predicted labels for test set
    """
    predictions = []
    
    for test_point in X_test:
        # Calculate distances to all training points
        distances = []
        for i, train_point in enumerate(X_train):
            dist = euclidean_distance(test_point, train_point)
            distances.append((dist, y_train[i]))
        
        # Sort by distance and get k nearest neighbors
        distances.sort(key=lambda x: x[0])
        k_nearest = distances[:k]
        
        # Calculate weighted votes
        class_weights = {}
        for dist, label in k_nearest:
            # Avoid division by zero
            if dist == 0:
                # If exact match, return that class immediately
                predictions.append(label)
                break
            
            if weight_func == 'inverse':
                weight = 1.0 / dist
            elif weight_func == 'inverse_square':
                weight = 1.0 / (dist ** 2)
            else:
                weight = 1.0  # fallback
            
            if label in class_weights:
                class_weights[label] += weight
            else:
                class_weights[label] = weight
        else:
            # Get class with maximum weight
            predicted_class = max(class_weights.items(), key=lambda x: x[1])[0]
            predictions.append(predicted_class)
    
    return np.array(predictions)

print(f"\nWeight functions:")
print(f"  1. w = 1/d (inverse distance)")
print(f"  2. w = 1/d² (inverse square distance)")
print(f"\nUsing same K values: {k_values}")
print()

### 1.g. Weighted K-NN Performance Evaluation

print("=" * 80)
print("PART 1(g): WEIGHTED K-NN PERFORMANCE EVALUATION")
print("=" * 80)

# Store results for weighted K-NN
weighted_results = {
    'inverse': {},
    'inverse_square': {}
}

for weight_type in ['inverse', 'inverse_square']:
    print(f"\n{'-'*40}")
    print(f"Weight function: 1/d" if weight_type == 'inverse' else f"Weight function: 1/d²")
    print(f"{'-'*40}")
    
    for k in k_values:
        print(f"\nEvaluating K={k}...")
        
        # Make predictions
        y_pred = weighted_knn_predict(X_train, y_train, X_test, k, weight_type)
        
        # Compute metrics
        precision_dict, recall_dict, accuracy = compute_metrics(y_test, y_pred)
        
        weighted_results[weight_type][k] = {
            'predictions': y_pred,
            'precision': precision_dict,
            'recall': recall_dict,
            'accuracy': accuracy
        }
        
        print(f"  Accuracy: {accuracy:.4f}")
        print(f"  Precision per class: {precision_dict}")
        print(f"  Recall per class: {recall_dict}")

# Visualization for weighted K-NN
print("\nGenerating visualizations for weighted K-NN...")

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Distance-Weighted K-NN Performance', fontsize=16)

# 1. Accuracy comparison
ax = axes[0, 0]
acc_standard = [standard_results[k]['accuracy'] for k in k_values]
acc_inverse = [weighted_results['inverse'][k]['accuracy'] for k in k_values]
acc_inverse_sq = [weighted_results['inverse_square'][k]['accuracy'] for k in k_values]
ax.plot(k_values, acc_standard, 'o-', label='Standard', linewidth=2, markersize=8)
ax.plot(k_values, acc_inverse, 's-', label='1/d', linewidth=2, markersize=8)
ax.plot(k_values, acc_inverse_sq, '^-', label='1/d²', linewidth=2, markersize=8)
ax.set_xlabel('K Value')
ax.set_ylabel('Accuracy')
ax.set_title('Accuracy Comparison')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(k_values)

# 2. Precision for 1/d
ax = axes[0, 1]
for cls in species:
    precisions = [weighted_results['inverse'][k]['precision'][cls] for k in k_values]
    ax.plot(k_values, precisions, 'o-', label=cls, linewidth=2, markersize=8)
ax.set_xlabel('K Value')
ax.set_ylabel('Precision')
ax.set_title('Precision per Class (1/d)')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(k_values)

# 3. Precision for 1/d²
ax = axes[0, 2]
for cls in species:
    precisions = [weighted_results['inverse_square'][k]['precision'][cls] for k in k_values]
    ax.plot(k_values, precisions, 'o-', label=cls, linewidth=2, markersize=8)
ax.set_xlabel('K Value')
ax.set_ylabel('Precision')
ax.set_title('Precision per Class (1/d²)')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(k_values)

# 4. Recall for 1/d
ax = axes[1, 0]
for cls in species:
    recalls = [weighted_results['inverse'][k]['recall'][cls] for k in k_values]
    ax.plot(k_values, recalls, 'o-', label=cls, linewidth=2, markersize=8)
ax.set_xlabel('K Value')
ax.set_ylabel('Recall')
ax.set_title('Recall per Class (1/d)')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(k_values)

# 5. Recall for 1/d²
ax = axes[1, 1]
for cls in species:
    recalls = [weighted_results['inverse_square'][k]['recall'][cls] for k in k_values]
    ax.plot(k_values, recalls, 'o-', label=cls, linewidth=2, markersize=8)
ax.set_xlabel('K Value')
ax.set_ylabel('Recall')
ax.set_title('Recall per Class (1/d²)')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(k_values)

# 6. Average metrics comparison
ax = axes[1, 2]
avg_prec_inv = [np.mean(list(weighted_results['inverse'][k]['precision'].values())) for k in k_values]
avg_rec_inv = [np.mean(list(weighted_results['inverse'][k]['recall'].values())) for k in k_values]
avg_prec_inv_sq = [np.mean(list(weighted_results['inverse_square'][k]['precision'].values())) for k in k_values]
avg_rec_inv_sq = [np.mean(list(weighted_results['inverse_square'][k]['recall'].values())) for k in k_values]
ax.plot(k_values, avg_prec_inv, 'o-', label='Precision (1/d)', linewidth=2, markersize=8)
ax.plot(k_values, avg_rec_inv, 's-', label='Recall (1/d)', linewidth=2, markersize=8)
ax.plot(k_values, avg_prec_inv_sq, '^-', label='Precision (1/d²)', linewidth=2, markersize=8)
ax.plot(k_values, avg_rec_inv_sq, 'v-', label='Recall (1/d²)', linewidth=2, markersize=8)
ax.set_xlabel('K Value')
ax.set_ylabel('Score')
ax.set_title('Average Metrics Comparison')
ax.legend(fontsize=8)
ax.grid(True, alpha=0.3)
ax.set_xticks(k_values)

plt.tight_layout()
plt.savefig('1g_weighted_knn_results.png', dpi=100, bbox_inches='tight')
plt.show()

# Find best configurations
best_k_inverse = max(k_values, key=lambda k: weighted_results['inverse'][k]['accuracy'])
best_k_inverse_sq = max(k_values, key=lambda k: weighted_results['inverse_square'][k]['accuracy'])

print(f"\n{'-'*80}")
print("BEST CONFIGURATIONS:")
print(f"{'-'*80}")
print(f"Weighted K-NN (1/d):")
print(f"  Best K: {best_k_inverse}")
print(f"  Accuracy: {weighted_results['inverse'][best_k_inverse]['accuracy']:.4f}")
print(f"\nWeighted K-NN (1/d²):")
print(f"  Best K: {best_k_inverse_sq}")
print(f"  Accuracy: {weighted_results['inverse_square'][best_k_inverse_sq]['accuracy']:.4f}")
print()

### 1.h. Comparison and Interpretation

print("=" * 80)
print("PART 1(h): COMPARISON AND INTERPRETATION")
print("=" * 80)

# Summary table
print("\nSUMMARY TABLE - BEST K VALUES:")
print("-" * 100)
print(f"{'Method':<30} {'Best K':<10} {'Accuracy':<12} {'Avg Precision':<15} {'Avg Recall':<15}")
print("-" * 100)

methods = [
    ('Standard K-NN', best_k_standard, standard_results[best_k_standard]),
    ('Weighted K-NN (1/d)', best_k_inverse, weighted_results['inverse'][best_k_inverse]),
    ('Weighted K-NN (1/d²)', best_k_inverse_sq, weighted_results['inverse_square'][best_k_inverse_sq])
]

for method_name, k, results in methods:
    avg_prec = np.mean(list(results['precision'].values()))
    avg_rec = np.mean(list(results['recall'].values()))
    print(f"{method_name:<30} {k:<10} {results['accuracy']:<12.4f} {avg_prec:<15.4f} {avg_rec:<15.4f}")

print("-" * 100)

# Detailed comparison
print("\nDETAILED ANALYSIS:")
print("-" * 80)

print("\n1. STANDARD K-NN vs WEIGHTED K-NN (1/d):")
acc_diff_1 = weighted_results['inverse'][best_k_inverse]['accuracy'] - standard_results[best_k_standard]['accuracy']
print(f"   Accuracy difference: {acc_diff_1:+.4f}")
if acc_diff_1 > 0:
    print(f"   → Weighted K-NN (1/d) performs {abs(acc_diff_1)*100:.2f}% better")
elif acc_diff_1 < 0:
    print(f"   → Standard K-NN performs {abs(acc_diff_1)*100:.2f}% better")
else:
    print(f"   → Both methods perform equally")

print("\n2. STANDARD K-NN vs WEIGHTED K-NN (1/d²):")
acc_diff_2 = weighted_results['inverse_square'][best_k_inverse_sq]['accuracy'] - standard_results[best_k_standard]['accuracy']
print(f"   Accuracy difference: {acc_diff_2:+.4f}")
if acc_diff_2 > 0:
    print(f"   → Weighted K-NN (1/d²) performs {abs(acc_diff_2)*100:.2f}% better")
elif acc_diff_2 < 0:
    print(f"   → Standard K-NN performs {abs(acc_diff_2)*100:.2f}% better")
else:
    print(f"   → Both methods perform equally")

print("\n3. WEIGHTED K-NN (1/d) vs WEIGHTED K-NN (1/d²):")
acc_diff_3 = weighted_results['inverse'][best_k_inverse]['accuracy'] - weighted_results['inverse_square'][best_k_inverse_sq]['accuracy']
print(f"   Accuracy difference: {acc_diff_3:+.4f}")
if acc_diff_3 > 0:
    print(f"   → 1/d weighting performs {abs(acc_diff_3)*100:.2f}% better")
elif acc_diff_3 < 0:
    print(f"   → 1/d² weighting performs {abs(acc_diff_3)*100:.2f}% better")
else:
    print(f"   → Both weightings perform equally")

print("\n" + "-" * 80)
print("INTERPRETATION:")
print("-" * 80)

# Determine overall best method
all_methods = [
    ('Standard K-NN', standard_results[best_k_standard]['accuracy'], best_k_standard),
    ('Weighted (1/d)', weighted_results['inverse'][best_k_inverse]['accuracy'], best_k_inverse),
    ('Weighted (1/d²)', weighted_results['inverse_square'][best_k_inverse_sq]['accuracy'], best_k_inverse_sq)
]
best_method = max(all_methods, key=lambda x: x[1])

print(f"\n✓ BEST PERFORMING METHOD: {best_method[0]}")
print(f"  K value: {best_method[2]}")
print(f"  Accuracy: {best_method[1]:.4f}")

print("\nKEY INSIGHTS:")
print("1. Distance weighting tends to give more importance to closer neighbors,")
print("   which can improve classification when nearby points are more relevant.")
print("\n2. The 1/d² weighting scheme gives even more emphasis to very close points")
print("   compared to 1/d, which may help or hurt depending on data distribution.")
print("\n3. Standard K-NN uses equal voting, which can be more robust when the")
print("   feature space is well-normalized and all neighbors are equally informative.")
print("\n4. For the Iris dataset, the relatively simple decision boundaries may")
print("   not benefit significantly from distance weighting.")

# Per-class performance comparison
print("\n" + "-" * 80)
print("PER-CLASS PERFORMANCE (BEST K VALUES):")
print("-" * 80)

for cls in species:
    print(f"\n{cls}:")
    print(f"  {'Method':<25} {'Precision':<12} {'Recall':<12}")
    print(f"  {'-'*50}")
    for method_name, k, results in methods:
        prec = results['precision'][cls]
        rec = results['recall'][cls]
        print(f"  {method_name:<25} {prec:<12.4f} {rec:<12.4f}")

# Final visualization: Side-by-side comparison
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle('Best K Comparison: Standard vs Weighted K-NN', fontsize=16)

# Accuracy
ax = axes[0]
methods_names = ['Standard', '1/d', '1/d²']
accuracies_best = [m[1] for m in all_methods]
colors_bar = ['#1f77b4', '#ff7f0e', '#2ca02c']
bars = ax.bar(methods_names, accuracies_best, color=colors_bar, alpha=0.7, edgecolor='black')
ax.set_ylabel('Accuracy')
ax.set_title('Accuracy Comparison')
ax.set_ylim([0, 1.1])
ax.grid(True, alpha=0.3, axis='y')
for bar, acc in zip(bars, accuracies_best):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{acc:.3f}', ha='center', va='bottom', fontweight='bold')

# Precision per class
ax = axes[1]
x = np.arange(len(species))
width = 0.25
for i, (method_name, k, results) in enumerate(methods):
    precisions = [results['precision'][cls] for cls in species]
    ax.bar(x + i*width, precisions, width, label=method_name, color=colors_bar[i], alpha=0.7, edgecolor='black')
ax.set_xlabel('Species')
ax.set_ylabel('Precision')
ax.set_title('Precision per Class')
ax.set_xticks(x + width)
ax.set_xticklabels(species, rotation=45)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

# Recall per class
ax = axes[2]
for i, (method_name, k, results) in enumerate(methods):
    recalls = [results['recall'][cls] for cls in species]
    ax.bar(x + i*width, recalls, width, label=method_name, color=colors_bar[i], alpha=0.7, edgecolor='black')
ax.set_xlabel('Species')
ax.set_ylabel('Recall')
ax.set_title('Recall per Class')
ax.set_xticks(x + width)
ax.set_xticklabels(species, rotation=45)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('1h_comparison.png', dpi=100, bbox_inches='tight')
plt.show()

print("\n" + "=" * 80)
print("ANALYSIS COMPLETE")
print("=" * 80)