In [14]:
# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 2. Install PyTorch (if not already)
!pip install torch torchvision --quiet

# 3. Imports
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Subset, random_split
import random

# 4. Set dataset root
root = "/content/drive/MyDrive/Colab Notebooks/data/naip_patches"

# 5. Image transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((128, 128)),  # Resize to 128x128
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)  # Normalize to [-1, 1]
])

# 6. Load dataset
dataset = datasets.ImageFolder(root=root, transform=transform)
total = len(dataset)
print(f"Total images in dataset: {total}")

# 7. Randomly sample 1.5% of the dataset
subset_size = int(0.075 * total)
indices = list(range(total))
random.shuffle(indices)
subset_indices = indices[:subset_size]
subset = Subset(dataset, subset_indices)
print(f"Using {subset_size} images ({100*subset_size/total:.2f}% of dataset)")

# 8. Split into train/val/test (70/15/15)
train_size = int(0.7 * subset_size)
val_size = int(0.15 * subset_size)
test_size = subset_size - train_size - val_size
train_set, val_set, test_set = random_split(subset, [train_size, val_size, test_size])
print(f"Train: {len(train_set)}, Val: {len(val_set)}, Test: {len(test_set)}")

# 9. DataLoaders
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32)
test_loader = DataLoader(test_set, batch_size=32)

# 10. Print a batch shape for sanity check
images, labels = next(iter(train_loader))
print(f"Batch shape: {images.shape}, Labels: {labels.shape}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Total images in dataset: 19442
Using 1458 images (7.50% of dataset)
Train: 1020, Val: 218, Test: 220
Batch shape: torch.Size([32, 3, 128, 128]), Labels: torch.Size([32])


In [15]:
import torch
import torch.nn as nn
import torchvision.models as models

# Use pretrained ResNet18
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 4)  # 4 ridership classes

model = model.to("cuda" if torch.cuda.is_available() else "cpu")

In [16]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(10):  # adjust as needed
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")


Epoch 1, Loss: 0.9126
Epoch 2, Loss: 0.3017
Epoch 3, Loss: 0.1665
Epoch 4, Loss: 0.0668
Epoch 5, Loss: 0.0368
Epoch 6, Loss: 0.0293
Epoch 7, Loss: 0.0215
Epoch 8, Loss: 0.0133
Epoch 9, Loss: 0.0137
Epoch 10, Loss: 0.0078


In [17]:
import torch

def evaluate_model(model, dataloader, device):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No need to track gradients
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)  # Get class with highest score
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Usage example:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)  # Make sure your model is on the right device

test_accuracy = evaluate_model(model, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.2f}%")


Test Accuracy: 80.45%
