In [37]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image

class Caltech200Dataset(Dataset):
    def __init__(self, root_dir, txt_file, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        with open(txt_file, 'r') as f:
            for line in f:
                relative_path = line.strip()
                full_path = os.path.join(root_dir, relative_path)
                self.image_paths.append(full_path)
                # Extract class label from the path (assuming format "001.Class_Name/image.jpg")
                label = int(relative_path.split('/')[0].split('.')[0]) - 1  # Subtract 1 to start from 0
                self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Set up environment
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Prepare the dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

root_dir = '/home/feem/Workspace/caltech_birds/images/'
train_dataset = Caltech200Dataset(root_dir=root_dir, 
                                  txt_file='/home/feem/Workspace/caltech_birds/lists/train.txt', 
                                  transform=transform)
test_dataset = Caltech200Dataset(root_dir=root_dir, 
                                 txt_file='/home/feem/Workspace/caltech_birds/lists/test.txt',
                                 transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Define the model
model = torchvision.models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 200)  # 200 classes
model = model.to(device)

# Set up training loop
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# num_epochs = 10
# for epoch in range(num_epochs):
#     model.train()
#     for images, labels in train_loader:
#         images, labels = images.to(device), labels.to(device)
        
#         optimizer.zero_grad()
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
    
#     # Validation
#     model.eval()
#     correct = 0
#     total = 0
#     with torch.no_grad():
#         for images, labels in test_loader:
#             images, labels = images.to(device), labels.to(device)
#             outputs = model(images)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
    
#     print(f'Epoch [{epoch+1}/{num_epochs}], Accuracy: {100 * correct / total:.2f}%')

# # Save the model
# torch.save(model.state_dict(), 'resnet50_caltech200.pth')

In [38]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image
from tqdm import tqdm

# ... (keep the Caltech200Dataset class and other setup code as before)

# Add these imports
from time import time
from collections import defaultdict

# Training and evaluation functions
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    progress_bar = tqdm(loader, desc="Training", leave=False)
    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        progress_bar.set_postfix({'loss': loss.item(), 'accuracy': 100 * correct / total})
    
    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        progress_bar = tqdm(loader, desc="Evaluating", leave=False)
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            progress_bar.set_postfix({'loss': loss.item(), 'accuracy': 100 * correct / total})
    
    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

# Training loop
num_epochs = 10
best_acc = 0.0
history = defaultdict(list)

for epoch in range(num_epochs):
    start_time = time()
    
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = evaluate(model, test_loader, criterion, device)
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    epoch_time = time() - start_time
    
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}% | "
          f"Time: {epoch_time:.2f}s")
    
    # Save the best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_resnet50_caltech200.pth')
        print(f"Best model saved with accuracy: {best_acc:.2f}%")

# Save the final model
torch.save(model.state_dict(), 'final_resnet50_caltech200.pth')

print(f"Training completed. Best accuracy: {best_acc:.2f}%")

                                                                                      

Epoch [1/10] Train Loss: 5.4709, Train Acc: 0.30% | Val Loss: 6.2803, Val Acc: 0.73% | Time: 80.45s
Best model saved with accuracy: 0.73%


                                                                                      

Epoch [2/10] Train Loss: 5.2197, Train Acc: 0.87% | Val Loss: 5.2059, Val Acc: 1.22% | Time: 80.42s
Best model saved with accuracy: 1.22%


                                                                                     

Epoch [3/10] Train Loss: 5.0101, Train Acc: 1.40% | Val Loss: 4.9861, Val Acc: 2.18% | Time: 80.78s
Best model saved with accuracy: 2.18%


                                                                                     

Epoch [4/10] Train Loss: 4.8825, Train Acc: 2.40% | Val Loss: 4.9607, Val Acc: 2.14% | Time: 80.50s


                                                                                      

Epoch [5/10] Train Loss: 4.7329, Train Acc: 2.80% | Val Loss: 4.8631, Val Acc: 3.00% | Time: 81.14s
Best model saved with accuracy: 3.00%


                                                                                     

Epoch [6/10] Train Loss: 4.5824, Train Acc: 4.10% | Val Loss: 4.7115, Val Acc: 3.99% | Time: 81.29s
Best model saved with accuracy: 3.99%


                                                                                     

Epoch [7/10] Train Loss: 4.4416, Train Acc: 4.43% | Val Loss: 4.6749, Val Acc: 3.99% | Time: 65.58s


                                                                                     

Epoch [8/10] Train Loss: 4.3148, Train Acc: 5.43% | Val Loss: 5.0503, Val Acc: 3.92% | Time: 62.73s


                                                                                     

Epoch [9/10] Train Loss: 4.1200, Train Acc: 7.10% | Val Loss: 4.8420, Val Acc: 4.06% | Time: 63.19s
Best model saved with accuracy: 4.06%


                                                                                     

Epoch [10/10] Train Loss: 3.9766, Train Acc: 9.37% | Val Loss: 4.3939, Val Acc: 6.63% | Time: 63.26s
Best model saved with accuracy: 6.63%
Training completed. Best accuracy: 6.63%


In [4]:
mat = scipy.io.loadmat('/home/feem/Workspace/caltech_birds/lists/splits.mat')

In [34]:
len(mat['splits'][0][0][0][0])

3000