In [3]:
from datasets import load_dataset

dataset = load_dataset("beans")
print(dataset)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 1034/1034 [00:00<00:00, 3150.90 examples/s]
Generating validation split: 100%|██████████| 133/133 [00:00<00:00, 4244.83 examples/s]
Generating test split: 100%|██████████| 128/128 [00:00<00:00, 4821.56 examples/s]


DatasetDict({
    train: Dataset({
        features: ['image_file_path', 'image', 'labels'],
        num_rows: 1034
    })
    validation: Dataset({
        features: ['image_file_path', 'image', 'labels'],
        num_rows: 133
    })
    test: Dataset({
        features: ['image_file_path', 'image', 'labels'],
        num_rows: 128
    })
})


In [4]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

data_path = "SportsImageClassification"
batch_size = 32
num_epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==== Transformations ====
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  
])

train_data = datasets.ImageFolder(f"{data_path}/train", transform=transform)
valid_data = datasets.ImageFolder(f"{data_path}/valid", transform=transform)
test_data  = datasets.ImageFolder(f"{data_path}/test", transform=transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size)
test_loader  = DataLoader(test_data, batch_size=batch_size)

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(train_data.classes))  
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"[{epoch+1}/{num_epochs}] Loss: {total_loss/len(train_loader):.4f}")

model.eval()
correct = total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

print(f"✅ Test Accuracy: {correct/total:.2%}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\dumas/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 57.1MB/s]


[1/5] Loss: 1.8563
[2/5] Loss: 0.4395
[3/5] Loss: 0.1274
[4/5] Loss: 0.0413
[5/5] Loss: 0.0194
✅ Test Accuracy: 98.00%
