In [8]:
# Import Required Libraries
import torch
from PIL import Image
from torchvision import transforms
# from transformers import ViTFeatureExtractor, ViTForImageClassification

# Load model directly
from transformers import AutoImageProcessor, AutoModelForImageClassification

processor = AutoImageProcessor.from_pretrained("nateraw/food")
model = AutoModelForImageClassification.from_pretrained("nateraw/food")

# Load and Preprocess the Image
image = Image.open('berry.png')
transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
image = transform(image)
inputs = feature_extractor(images=image, return_tensors="pt")

# Perform Image Classification
outputs = model(**inputs)
logits = outputs.logits
predicted_class_idx = torch.argmax(logits, dim=1).item()

# Print the Predicted Class
print("Predicted class:", model.config.id2label[predicted_class_idx])

preprocessor_config.json: 100%|██████████| 228/228 [00:00<00:00, 81.4kB/s]
config.json: 100%|██████████| 5.58k/5.58k [00:00<00:00, 6.68MB/s]
pytorch_model.bin: 100%|██████████| 344M/344M [00:07<00:00, 43.3MB/s] 


Predicted class: breakfast_burrito


In [11]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from transformers import ViTFeatureExtractor, ViTForImageClassification
from torch import nn, optim

# Load and preprocess the dataset
transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
train_dataset = datasets.ImageFolder('food20dataset/train_set', transform=transform)
test_dataset = datasets.ImageFolder('food20dataset/test_set', transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load the pre-trained model
model = AutoModelForImageClassification.from_pretrained("nateraw/food")

# Modify the last layer
num_classes = len(train_dataset.classes)
model.classifier = nn.Linear(model.classifier.in_features, num_classes)

# Define the loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Train the model
num_epochs = 200  # Replace with the desired number of epochs
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_dataloader:
        outputs = model(images)
        loss = loss_fn(outputs.logits, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluate the model on the test set
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_dataloader:
            outputs = model(images)
            loss = loss_fn(outputs.logits, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}: Test Loss: {test_loss:.3f}, Test Accuracy: {accuracy:.2f}%")


Epoch 1/200: Test Loss: 36.987, Test Accuracy: 42.37%
Epoch 2/200: Test Loss: 28.253, Test Accuracy: 51.69%
Epoch 3/200: Test Loss: 29.196, Test Accuracy: 55.25%
Epoch 4/200: Test Loss: 24.783, Test Accuracy: 60.51%
Epoch 5/200: Test Loss: 19.183, Test Accuracy: 68.81%
Epoch 6/200: Test Loss: 22.886, Test Accuracy: 66.78%
Epoch 7/200: Test Loss: 22.678, Test Accuracy: 67.63%
Epoch 8/200: Test Loss: 27.606, Test Accuracy: 60.34%
Epoch 9/200: Test Loss: 21.333, Test Accuracy: 68.31%
Epoch 10/200: Test Loss: 21.290, Test Accuracy: 71.19%
Epoch 11/200: Test Loss: 25.448, Test Accuracy: 66.44%
Epoch 12/200: Test Loss: 22.813, Test Accuracy: 67.80%
Epoch 13/200: Test Loss: 26.906, Test Accuracy: 61.69%
Epoch 14/200: Test Loss: 28.244, Test Accuracy: 63.56%
Epoch 15/200: Test Loss: 23.239, Test Accuracy: 71.19%
Epoch 16/200: Test Loss: 23.448, Test Accuracy: 70.34%
Epoch 17/200: Test Loss: 23.489, Test Accuracy: 68.98%
Epoch 18/200: Test Loss: 25.932, Test Accuracy: 66.95%
Epoch 19/200: Test 

KeyboardInterrupt: 

In [10]:
from sklearn.metrics import classification_report
from torch import no_grad

# Assume test_loader is the DataLoader for your test set

# Switch model to evaluation mode
model.eval()

true_labels = []
pred_labels = []

with no_grad():
    for images, labels in test_dataloader:
        outputs = model(images)
        _, preds = torch.max(outputs.logits, 1)
        
        true_labels.extend(labels.numpy())
        pred_labels.extend(preds.numpy())

# Compute metrics
print(classification_report(true_labels, pred_labels, target_names=train_dataset.classes))

                  precision    recall  f1-score   support

        biriyani       1.00      0.53      0.70        30
    bisibelebath       0.75      0.60      0.67        30
      butternaan       0.76      0.43      0.55        30
           chaat       0.73      0.73      0.73        30
        chappati       0.53      0.79      0.64        29
          dhokla       0.75      0.90      0.82        30
            dosa       0.57      0.80      0.67        30
     gulab jamun       1.00      0.69      0.82        26
           halwa       0.74      0.67      0.70        30
            idly       0.48      0.70      0.57        30
      kathi roll       0.61      0.63      0.62        30
       meduvadai       0.83      0.67      0.74        30
         noodles       0.74      0.87      0.80        30
       paniyaram       0.78      0.60      0.68        30
           poori       0.71      0.50      0.59        30
          samosa       1.00      0.37      0.54        27
tandoori chic