In [1]:
import os
import random
import shutil
from sklearn.model_selection import KFold
import torch
from torch.utils.data import DataLoader, Subset, Dataset
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm
import time
import numpy as np

In [2]:
# Paths for data and separate test set
data_path = 'C:\\Users\\HP\\anaconda3\\envs\\Heather\\signsss'
test_path = 'C:\\Users\\HP\\Desktop\\ane\\Test'

In [3]:
# Create the FrameDataset class (as you have already defined it)
class FrameDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.data = []
        self.label_to_idx = {label: idx for idx, label in enumerate(os.listdir(folder_path))}
        
        for label_folder in os.listdir(folder_path):
            label_path = os.path.join(folder_path, label_folder)
            if os.path.isdir(label_path):
                for frame in os.listdir(label_path):
                    frame_path = os.path.join(label_path, frame)
                    self.data.append((frame_path, self.label_to_idx[label_folder]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        frame_path, label_idx = self.data[idx]
        image = Image.open(frame_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label_idx

In [4]:
# Define transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [5]:
# Load the full dataset
full_dataset = FrameDataset(data_path, transform=transform)

In [6]:
# Define the model and training function
def initialize_model(num_classes):
    model = models.resnet50(weights='DEFAULT')
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def train_and_validate(model, train_loader, val_loader, epochs, criterion, optimizer, device):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        print(f"Epoch {epoch + 1}/{epochs}, "
              f"Train Loss: {running_loss / len(train_loader):.4f}, "
              f"Val Loss: {val_loss / len(val_loader):.4f}, "
              f"Val Acc: {100 * correct / total:.2f}%")

In [7]:
# Cross-validation setup
kf = KFold(n_splits=2, shuffle=True, random_state=42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 10  # Or any number you find suitable
num_classes = len(os.listdir(data_path))
fold_accuracies = []

In [8]:
# 7-Fold Cross Validation
for fold, (train_idx, val_idx) in enumerate(kf.split(full_dataset)):
    print(f"\nFold {fold + 1}")
    
    # Create data loaders for the current fold
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    # Initialize model, criterion, and optimizer for each fold
    model = initialize_model(num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    
    # Train and validate
    train_and_validate(model, train_loader, val_loader, epochs, criterion, optimizer, device)

    # Evaluate fold accuracy
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    fold_accuracy = 100 * correct / total
    fold_accuracies.append(fold_accuracy)
    print(f"Fold {fold + 1} Validation Accuracy: {fold_accuracy:.2f}%")

ValueError: Cannot have number of splits n_splits=2 greater than the number of samples: n_samples=0.

In [None]:
# Average cross-validation accuracy
average_accuracy = np.mean(fold_accuracies)
print(f"\nAverage Cross-Validation Accuracy: {average_accuracy:.2f}%")

In [None]:
# Testing the final model on the separate test set (if applicable)
# Save the model if needed
torch.save(model.state_dict(), 'Tsakatsa_CrossVal.pth')
print("Model Saved Successfully")