## 1Ô∏è‚É£ Setup & Mount Drive

In [None]:
# Clone repository
!git clone https://github.com/dungdinhhaha/AIDetect.git
%cd AIDetect

In [None]:
# Install dependencies
!pip install -q tensorflow==2.19.0 matplotlib seaborn scikit-learn

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Check GPU
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))
print("Num GPUs:", len(tf.config.list_physical_devices('GPU')))

## 2Ô∏è‚É£ Load Model

In [None]:
# List available models
import os
from pathlib import Path

model_dir = Path('/content/drive/MyDrive/comparison_detector_models_v2')

print("üìÅ Available models:")
print("-" * 60)
for f in model_dir.glob('*.h5'):
    size_mb = f.stat().st_size / (1024*1024)
    print(f"  {f.name}: {size_mb:.1f} MB")

for f in model_dir.glob('*.keras'):
    size_mb = f.stat().st_size / (1024*1024)
    print(f"  {f.name}: {size_mb:.1f} MB")

print("-" * 60)

In [None]:
# Load best model
model_path = '/content/drive/MyDrive/comparison_detector_models_v2/best_model.h5'
# Ho·∫∑c d√πng final model:
# model_path = '/content/drive/MyDrive/comparison_detector_models_v2/final_model.keras'

print(f"üì¶ Loading model: {model_path}")
model = tf.keras.models.load_model(model_path)
print("‚úÖ Model loaded!")
print(f"   Input shape: {model.input_shape}")
print(f"   Output shape: {model.output_shape}")

In [None]:
# Model summary (optional)
model.summary()

## 3Ô∏è‚É£ Load Test Dataset

In [None]:
from data.loader_tf2 import build_dataset
from configs.config_v2 import IMAGE_SIZE, NUM_CLASSES, BATCH_SIZE

# Load test dataset
test_paths = ['/content/drive/MyDrive/content/data/tct/test.tfrecord']

print(f"üìä Loading test dataset from: {test_paths[0]}")
test_ds = build_dataset(
    test_paths,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    is_training=False
)

# Extract labels for classification
def extract_label(img, tgt):
    return img, tgt['labels'][:, 0]

test_ds_labeled = test_ds.map(extract_label)

print("‚úÖ Test dataset loaded!")
print(f"   Batch size: {BATCH_SIZE}")
print(f"   Image size: {IMAGE_SIZE}")
print(f"   Num classes: {NUM_CLASSES}")

## 4Ô∏è‚É£ Quick Evaluation

In [None]:
# Evaluate on test set (100 batches)
print("‚öôÔ∏è  Evaluating model on test set...")
print("This may take a few minutes...\n")

results = model.evaluate(test_ds_labeled.take(100), verbose=1)

test_loss = results[0]
test_accuracy = results[1]

print("\n" + "=" * 60)
print("üìä TEST RESULTS")
print("=" * 60)
print(f"Test Loss:     {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print("=" * 60)

## 5Ô∏è‚É£ Detailed Analysis with Predictions

In [None]:
import numpy as np

# Collect all predictions and ground truth
print("üîÆ Collecting predictions...")

all_predictions = []
all_true_labels = []
all_images = []

num_batches = 100  # Test on 100 batches

for i, (images, labels) in enumerate(test_ds_labeled.take(num_batches)):
    if i % 10 == 0:
        print(f"  Processing batch {i}/{num_batches}...")
    
    preds = model.predict(images, verbose=0)
    all_predictions.extend(np.argmax(preds, axis=1))
    all_true_labels.extend(labels.numpy().astype(int))
    all_images.extend(images.numpy())

all_predictions = np.array(all_predictions)
all_true_labels = np.array(all_true_labels)
all_images = np.array(all_images)

print(f"\n‚úÖ Processed {len(all_predictions)} images")

In [None]:
# Calculate accuracy
accuracy = np.mean(all_predictions == all_true_labels)
print(f"\nüìä Overall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"   Correct: {np.sum(all_predictions == all_true_labels)}")
print(f"   Incorrect: {np.sum(all_predictions != all_true_labels)}")

## 6Ô∏è‚É£ Classification Report

In [None]:
from sklearn.metrics import classification_report

print("=" * 60)
print("üìã CLASSIFICATION REPORT")
print("=" * 60)
print(classification_report(
    all_true_labels,
    all_predictions,
    target_names=[f"Class {i}" for i in range(NUM_CLASSES)],
    zero_division=0
))
print("=" * 60)

## 7Ô∏è‚É£ Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Calculate confusion matrix
cm = confusion_matrix(all_true_labels, all_predictions)

# Plot
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=[f"C{i}" for i in range(NUM_CLASSES)],
            yticklabels=[f"C{i}" for i in range(NUM_CLASSES)])
plt.title('Confusion Matrix', fontsize=16)
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.show()

# Save to Drive
save_path = '/content/drive/MyDrive/comparison_detector_models_v2/confusion_matrix.png'
plt.savefig(save_path, dpi=150, bbox_inches='tight')
print(f"\n‚úÖ Confusion matrix saved to: {save_path}")

## 8Ô∏è‚É£ Visualize Sample Predictions

In [None]:
# Get correct and incorrect predictions
correct_mask = all_predictions == all_true_labels
incorrect_mask = ~correct_mask

correct_indices = np.where(correct_mask)[0]
incorrect_indices = np.where(incorrect_mask)[0]

print(f"‚úÖ Correct predictions: {len(correct_indices)}")
print(f"‚ùå Incorrect predictions: {len(incorrect_indices)}")

In [None]:
# Visualize CORRECT predictions
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
fig.suptitle('‚úÖ CORRECT PREDICTIONS (Sample)', fontsize=16, color='green')

for i, ax in enumerate(axes.flat):
    if i < len(correct_indices):
        idx = correct_indices[i]
        ax.imshow(all_images[idx])
        ax.set_title(f"True: {all_true_labels[idx]}, Pred: {all_predictions[idx]}",
                    color='green', fontsize=10, fontweight='bold')
        ax.axis('off')

plt.tight_layout()
plt.show()

# Save
save_path = '/content/drive/MyDrive/comparison_detector_models_v2/correct_predictions.png'
plt.savefig(save_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved to: {save_path}")

In [None]:
# Visualize INCORRECT predictions
if len(incorrect_indices) > 0:
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    fig.suptitle('‚ùå INCORRECT PREDICTIONS (Sample)', fontsize=16, color='red')

    for i, ax in enumerate(axes.flat):
        if i < min(8, len(incorrect_indices)):
            idx = incorrect_indices[i]
            ax.imshow(all_images[idx])
            ax.set_title(f"True: {all_true_labels[idx]}, Pred: {all_predictions[idx]}",
                        color='red', fontsize=10, fontweight='bold')
            ax.axis('off')

    plt.tight_layout()
    plt.show()

    # Save
    save_path = '/content/drive/MyDrive/comparison_detector_models_v2/incorrect_predictions.png'
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"‚úÖ Saved to: {save_path}")
else:
    print("üéâ Perfect accuracy! No incorrect predictions!")

## 9Ô∏è‚É£ Class Distribution Analysis

In [None]:
# Plot class distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# True labels distribution
unique_true, counts_true = np.unique(all_true_labels, return_counts=True)
ax1.bar(unique_true, counts_true, color='skyblue', edgecolor='black')
ax1.set_title('True Label Distribution', fontsize=14, fontweight='bold')
ax1.set_xlabel('Class', fontsize=12)
ax1.set_ylabel('Count', fontsize=12)
ax1.grid(axis='y', alpha=0.3)

# Predicted labels distribution
unique_pred, counts_pred = np.unique(all_predictions, return_counts=True)
ax2.bar(unique_pred, counts_pred, color='salmon', edgecolor='black')
ax2.set_title('Predicted Label Distribution', fontsize=14, fontweight='bold')
ax2.set_xlabel('Class', fontsize=12)
ax2.set_ylabel('Count', fontsize=12)
ax2.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

# Save
save_path = '/content/drive/MyDrive/comparison_detector_models_v2/class_distribution.png'
plt.savefig(save_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved to: {save_path}")

## üîü Per-Class Accuracy

In [None]:
# Calculate per-class accuracy
print("=" * 60)
print("üìä PER-CLASS ACCURACY")
print("=" * 60)

class_accuracies = []
class_labels = []

for i in range(NUM_CLASSES):
    mask = all_true_labels == i
    if np.sum(mask) > 0:
        class_acc = np.mean(all_predictions[mask] == all_true_labels[mask])
        total = np.sum(mask)
        correct = np.sum(all_predictions[mask] == all_true_labels[mask])
        
        class_accuracies.append(class_acc)
        class_labels.append(i)
        
        print(f"Class {i:2d}: {class_acc:.4f} ({class_acc*100:5.2f}%) - {correct}/{total} correct")

print("=" * 60)

In [None]:
# Plot per-class accuracy
plt.figure(figsize=(12, 6))
bars = plt.bar(class_labels, class_accuracies, color='teal', edgecolor='black', alpha=0.7)

# Color bars based on accuracy
for i, (bar, acc) in enumerate(zip(bars, class_accuracies)):
    if acc >= 0.9:
        bar.set_color('green')
    elif acc >= 0.7:
        bar.set_color('orange')
    else:
        bar.set_color('red')

plt.title('Per-Class Accuracy', fontsize=16, fontweight='bold')
plt.xlabel('Class', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.ylim(0, 1.05)
plt.grid(axis='y', alpha=0.3)
plt.axhline(y=accuracy, color='black', linestyle='--', linewidth=2, label=f'Overall: {accuracy:.3f}')
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()

# Save
save_path = '/content/drive/MyDrive/comparison_detector_models_v2/per_class_accuracy.png'
plt.savefig(save_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved to: {save_path}")

## üíæ Save Test Results

In [None]:
import json
from datetime import datetime

# Prepare results dictionary
results_dict = {
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_path': model_path,
    'total_images_tested': len(all_predictions),
    'overall_accuracy': float(accuracy),
    'test_loss': float(test_loss),
    'num_correct': int(np.sum(all_predictions == all_true_labels)),
    'num_incorrect': int(np.sum(all_predictions != all_true_labels)),
    'per_class_accuracy': {},
    'confusion_matrix': cm.tolist()
}

# Add per-class accuracy
for i in range(NUM_CLASSES):
    mask = all_true_labels == i
    if np.sum(mask) > 0:
        class_acc = np.mean(all_predictions[mask] == all_true_labels[mask])
        results_dict['per_class_accuracy'][f'class_{i}'] = {
            'accuracy': float(class_acc),
            'total_samples': int(np.sum(mask)),
            'correct_predictions': int(np.sum(all_predictions[mask] == all_true_labels[mask]))
        }

# Save to JSON
save_path = '/content/drive/MyDrive/comparison_detector_models_v2/test_results.json'
with open(save_path, 'w') as f:
    json.dump(results_dict, f, indent=2)

print(f"‚úÖ Test results saved to: {save_path}")
print(f"\nüìÑ Results preview:")
print(json.dumps(results_dict, indent=2)[:500] + "...")

## üìä Final Summary

In [None]:
print("\n" + "=" * 60)
print("‚úÖ TESTING COMPLETED!")
print("=" * 60)
print(f"\nüìä Summary:")
print(f"  Model: {Path(model_path).name}")
print(f"  Images tested: {len(all_predictions)}")
print(f"  Overall accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"  Test loss: {test_loss:.4f}")
print(f"\nüìÅ Files saved in Drive:")
print(f"  - confusion_matrix.png")
print(f"  - correct_predictions.png")
if len(incorrect_indices) > 0:
    print(f"  - incorrect_predictions.png")
print(f"  - class_distribution.png")
print(f"  - per_class_accuracy.png")
print(f"  - test_results.json")
print("\n" + "=" * 60)
print("üéâ All visualizations and results saved to Google Drive!")
print("=" * 60)