In [None]:
# Course: CSC 2611-131 AI Tools
# Fall 2024
# Final Project – Dog Classifier
# Name: Theresa Kettner
# Created: 12/12/2024
# This was adapted from Code generated by Claude 
# (Anthropic, 2024)
# The prompt used was: 
# "How can I improve the accuracy of my KNN model?"



import os
import numpy as np
from PIL import Image
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tqdm import tqdm

def create_feature_extractor():
    """Create a pre-trained ResNet model for feature extraction"""
    # Load pre-trained ResNet50
    model = models.resnet50(pretrained=True)
    # Remove the final fully connected layer
    feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])
    # Set to evaluation mode
    feature_extractor.eval()
    return feature_extractor

def load_and_preprocess_image(image_path):
    """Load and preprocess a single image for the ResNet model"""
    # Define the same transforms used during ResNet training
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
    
    image = Image.open(image_path).convert('RGB')
    return preprocess(image)

def extract_features(data_path, subset='train', device='cuda'):
    """Extract features using ResNet for all images in the specified subset"""
    subset_path = os.path.join(data_path, subset)
    features = []
    labels = []
    
    # Create feature extractor
    feature_extractor = create_feature_extractor().to(device)
    
    # Get list of dog breeds
    breeds = os.listdir(subset_path)
    
    for breed in tqdm(breeds, desc=f'Processing {subset} data'):
        breed_path = os.path.join(subset_path, breed)
        if not os.path.isdir(breed_path):
            continue
            
        for img_name in os.listdir(breed_path):
            try:
                img_path = os.path.join(breed_path, img_name)
                # Preprocess image
                img_tensor = load_and_preprocess_image(img_path)
                img_tensor = img_tensor.unsqueeze(0).to(device)
                
                # Extract features
                with torch.no_grad():
                    features_tensor = feature_extractor(img_tensor)
                    features_numpy = features_tensor.squeeze().cpu().numpy().flatten()
                
                features.append(features_numpy)
                labels.append(breed)
                
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
    
    return np.array(features), np.array(labels)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load datasets with ResNet features
data_path = 'C:\\00\\70-dog-breedsimage-data-set-updated'
X_train, y_train = extract_features(data_path, 'train', device)
X_valid, y_valid = extract_features(data_path, 'valid', device)
X_test, y_test = extract_features(data_path, 'test', device)

print(f"Training set shape: {X_train.shape}")
print(f"Validation set shape: {X_valid.shape}")
print(f"Test set shape: {X_test.shape}")

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

# Try different k values and distance metrics
k_values = [3, 5, 7, 11, 13, 15]
metrics = ['euclidean', 'cosine']
best_k = None
best_metric = None
best_accuracy = 0

for metric in metrics:
    print(f"\nTesting with distance metric: {metric}")
    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric, n_jobs=-1)
        knn.fit(X_train_scaled, y_train)
        
        # Evaluate on validation set
        valid_accuracy = knn.score(X_valid_scaled, y_valid)
        print(f"k={k}, Validation accuracy: {valid_accuracy:.3f}")
        
        if valid_accuracy > best_accuracy:
            best_accuracy = valid_accuracy
            best_k = k
            best_metric = metric

print(f"\nBest parameters: k={best_k}, metric={best_metric}")

# Train final model with best parameters
final_knn = KNeighborsClassifier(
    n_neighbors=best_k,
    metric=best_metric,
    n_jobs=-1
)
final_knn.fit(X_train_scaled, y_train)

# Evaluate on test set
test_predictions = final_knn.predict(X_test_scaled)
print("\nTest Set Performance:")
print(classification_report(y_test, test_predictions))

Using device: cpu


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\slkol/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:02<00:00, 47.5MB/s]
Processing train data: 100%|██████████| 40/40 [07:12<00:00, 10.81s/it]
Processing valid data: 100%|██████████| 40/40 [00:36<00:00,  1.10it/s]
Processing test data: 100%|██████████| 40/40 [00:36<00:00,  1.10it/s]


Training set shape: (4600, 2048)
Validation set shape: (400, 2048)
Test set shape: (400, 2048)

Testing with distance metric: euclidean
k=3, Validation accuracy: 0.965
k=5, Validation accuracy: 0.953
k=7, Validation accuracy: 0.958
k=11, Validation accuracy: 0.955
k=13, Validation accuracy: 0.953
k=15, Validation accuracy: 0.955

Testing with distance metric: cosine
k=3, Validation accuracy: 0.963
k=5, Validation accuracy: 0.963
k=7, Validation accuracy: 0.958
k=11, Validation accuracy: 0.950
k=13, Validation accuracy: 0.945
k=15, Validation accuracy: 0.950

Best parameters: k=3, metric=euclidean

Test Set Performance:
                   precision    recall  f1-score   support

           Afghan       1.00      1.00      1.00        10
American Hairless       1.00      0.90      0.95        10
 American Spaniel       1.00      0.90      0.95        10
           Basset       1.00      1.00      1.00        10
           Beagle       1.00      1.00      1.00        10
       Bloodhound 