# PCGS Coin Grade Classification

Train a model to classify coin grades based on images scraped from PCGS.

## Step 1: Import Libraries

In [None]:
from transformers import AutoFeatureExtractor, ResNetForImageClassification
import torch
from torchvision import datasets, transforms
import torchvision
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
import numpy as np

## Step 2: Prepare Dataset

Before running this notebook, run:

```bash
python prepare_dataset.py
```

## Step 3: Define Transforms and Load Dataset

In [None]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

training_dataset = datasets.ImageFolder('coin_dataset/train', transform=transform)
testing_dataset = datasets.ImageFolder('coin_dataset/test', transform=transform)
validation_dataset = datasets.ImageFolder('coin_dataset/val', transform=transform)

classes = training_dataset.classes
print(f"Classes (grades): {classes}")
print(f"Training: {len(training_dataset)}, Test: {len(testing_dataset)}, Val: {len(validation_dataset)}")

## Step 4: Create Data Loaders

In [None]:
BATCH_SIZE = 4

training_loader = torch.utils.data.DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=True)
testing_loader = torch.utils.data.DataLoader(testing_dataset, batch_size=BATCH_SIZE, shuffle=False)
validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Step 5: Initialize Model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

num_classes = len(classes)
model = ResNetForImageClassification.from_pretrained('microsoft/resnet-50', num_labels=num_classes, ignore_mismatched_sizes=True)
model = model.to(device)
print(f"Model loaded with {num_classes} classes")

## Step 6: Define Loss and Optimizer

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter(f'runs/coin_classifier_{timestamp}')

## Step 7: Training Functions

In [None]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        if (i + 1) % 10 == 0:
            print(f'  Batch [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    return running_loss / len(train_loader), 100 * correct / total

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return running_loss / len(val_loader), 100 * correct / total

## Step 8: Train the Model

In [None]:
NUM_EPOCHS = 20
best_val_acc = 0.0

for epoch in range(NUM_EPOCHS):
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}]")
    train_loss, train_acc = train_epoch(model, training_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, validation_loader, criterion, device)
    scheduler.step()
    
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Loss/validation', val_loss, epoch)
    writer.add_scalar('Accuracy/train', train_acc, epoch)
    writer.add_scalar('Accuracy/validation', val_acc, epoch)
    
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%\n")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'coin_classifier_best.pth')
        print(f"  âœ“ Saved best model (Val Acc: {val_acc:.2f}%)\n")

writer.close()
print(f"Training complete! Best validation accuracy: {best_val_acc:.2f}%")

## Step 9: Evaluate on Test Set

In [None]:
model.load_state_dict(torch.load('coin_classifier_best.pth'))
test_loss, test_acc = validate(model, testing_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}%")