In [1]:
import torch
from transformers import ViTForImageClassification, ViTFeatureExtractor
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, WeightedRandomSampler
from sklearn.metrics import confusion_matrix, accuracy_score
import numpy as np

# transformations
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0), ratio=(0.75, 1.33)),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor()
])

# dataset
dataset = datasets.ImageFolder(root='archive/', transform=train_transform)


class_counts = np.bincount(dataset.targets)
class_weights = 1. / class_counts
weights = [class_weights[t] for t in dataset.targets]

# Split the dataset
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator=torch.Generator().manual_seed(42))

test_dataset.dataset.transform = test_transform

train_loader = DataLoader(train_dataset, batch_size=32, sampler=WeightedRandomSampler(weights[:train_size], train_size))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load pretrained model and feature extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', ignore_mismatched_sizes=True)

model.classifier = torch.nn.Linear(model.classifier.in_features, 2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, verbose=True)

# Training loop
num_epochs = 25
best_loss = float('inf')
patience = 5
early_stop = False
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs = feature_extractor(images=[input.permute(1, 2, 0).numpy() for input in inputs], return_tensors="pt").pixel_values.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # Validate the model
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = feature_extractor(images=[input.permute(1, 2, 0).numpy() for input in inputs], return_tensors="pt").pixel_values.to(device)
            labels = labels.to(device)
            outputs = model(inputs).logits
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    
    val_loss /= len(test_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {running_loss/len(train_loader)}, Validation Loss: {val_loss}')
    scheduler.step(val_loss)

    # Early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_no_improve = 0
        torch.save(model, 'best_model_V6.pth')  # Save the best model
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping')
            early_stop = True
            break

if early_stop:
    model = torch.load('best_model_V6.pth')




It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.


Epoch 1/25, Training Loss: 1.1941048979759217, Validation Loss: 0.6922605037689209
Epoch 2/25, Training Loss: 0.7003136515617371, Validation Loss: 0.6960447430610657
Epoch 3/25, Training Loss: 0.7071355164051056, Validation Loss: 0.6912616491317749
Epoch 4/25, Training Loss: 0.7028756499290466, Validation Loss: 0.7045717835426331
Epoch 5/25, Training Loss: 0.7000247776508332, Validation Loss: 0.708743135134379
Epoch 6/25, Training Loss: 0.6997613430023193, Validation Loss: 0.7745187878608704
Epoch 7/25, Training Loss: 0.7248583555221557, Validation Loss: 0.6917043526967367
Epoch 8/25, Training Loss: 0.6995402336120605, Validation Loss: 0.6911299427350363
Epoch 9/25, Training Loss: 0.6924488782882691, Validation Loss: 0.7143010298411051
Epoch 10/25, Training Loss: 0.6961859047412873, Validation Loss: 0.7103781898816427
Epoch 11/25, Training Loss: 0.6951748073101044, Validation Loss: 0.6904563705126444
Epoch 12/25, Training Loss: 0.6956531882286072, Validation Loss: 0.6976172725359598
Ep

NameError: name 'pytorch' is not defined

In [3]:
# Evaluate the final model
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = feature_extractor(images=[input.permute(1, 2, 0).numpy() for input in inputs], return_tensors="pt").pixel_values.to(device)
        labels = labels.to(device)
        outputs = model(inputs).logits
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute confusion matrix and accuracy
conf_matrix = confusion_matrix(all_labels, all_preds)
accuracy = accuracy_score(all_labels, all_preds)

print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Accuracy: {accuracy * 100:.2f}%')

Confusion Matrix:
[[41  0]
 [39  0]]
Accuracy: 51.25%
