In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from transformers import SwinForImageClassification

# Define hyperparameters
BATCH_SIZE = 16
LEARNING_RATE = 0.001
EPOCHS = 3  # Reduced to 3 epochs
NUM_CLASSES = 101  # UCF101 dataset classes
TEST_SPLIT = 0.2  # Proportion of data used for testing

# Define data transformations for pre-processing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load datasets from separate train and test folders
train_dataset = datasets.ImageFolder(root="/Users/rukmini/Documents/Project/newdata/reduced_train", transform=transform)
test_dataset = datasets.ImageFolder(root="/Users/rukmini/Documents/Project/newdata/reduced_test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Load Swin Transformer model
model = SwinForImageClassification.from_pretrained(
    "microsoft/swin-tiny-patch4-window7-224",
    num_labels=NUM_CLASSES,
    ignore_mismatched_sizes=True
)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

# Training loop
def train_model():
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images).logits
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {running_loss / len(train_loader):.4f}, "
              f"Accuracy: {100 * correct / total:.2f}%")

# Testing loop
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images).logits
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")

# Real-time updates during training
def real_time_updates():
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        correct = 0
        total = 0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images).logits
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Update every batch
            if (i + 1) % 10 == 0:  # Update every 10 batches
                print(f"Epoch [{epoch + 1}/{EPOCHS}], Step [{i + 1}/{len(train_loader)}], "
                      f"Loss: {loss.item():.4f}, Accuracy: {100 * correct / total:.2f}%")

# Train and test the model
if __name__ == "__main__":
    real_time_updates()
    test_model()


Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([101, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([101]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch [1/3], Step [10/395], Loss: 2.0471, Accuracy: 59.38%
Epoch [1/3], Step [20/395], Loss: 1.5452, Accuracy: 68.75%
Epoch [1/3], Step [30/395], Loss: 0.9077, Accuracy: 69.79%
Epoch [1/3], Step [40/395], Loss: 0.5226, Accuracy: 73.28%
Epoch [1/3], Step [50/395], Loss: 1.2386, Accuracy: 74.12%
Epoch [1/3], Step [60/395], Loss: 0.5527, Accuracy: 74.90%
Epoch [1/3], Step [70/395], Loss: 1.3703, Accuracy: 74.55%
Epoch [1/3], Step [80/395], Loss: 1.0352, Accuracy: 74.38%
Epoch [1/3], Step [90/395], Loss: 1.2353, Accuracy: 74.51%
Epoch [1/3], Step [100/395], Loss: 0.9099, Accuracy: 74.81%
Epoch [1/3], Step [110/395], Loss: 1.4807, Accuracy: 75.45%
Epoch [1/3], Step [120/395], Loss: 0.9040, Accuracy: 75.89%
Epoch [1/3], Step [130/395], Loss: 0.8414, Accuracy: 75.77%
Epoch [1/3], Step [140/395], Loss: 1.1673, Accuracy: 75.67%
Epoch [1/3], Step [150/395], Loss: 1.6101, Accuracy: 75.29%
Epoch [1/3], Step [160/395], Loss: 0.5200, Accuracy: 75.20%
Epoch [1/3], Step [170/395], Loss: 1.4906, Accura

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from transformers import SwinForImageClassification



# Define hyperparameters
BATCH_SIZE = 16
LEARNING_RATE = 0.001
EPOCHS = 20
NUM_CLASSES = 101  # UCF101 dataset classes
TEST_SPLIT = 0.2  # Proportion of  data used for testing



# Define data transformations for pre-processing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])



# Load datasets from separate train and test folders
train_dataset = datasets.ImageFolder(root="/Users/rukmini/Documents/Project/newdata/reduced_train", transform=transform)
test_dataset = datasets.ImageFolder(root="/Users/rukmini/Documents/Project/newdata/reduced_test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)




# Load Swin Transformer model
model = SwinForImageClassification.from_pretrained(
    "microsoft/swin-tiny-patch4-window7-224",
    num_labels=NUM_CLASSES,
    ignore_mismatched_sizes=True
)



# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)



# Training loop
def train_model():
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images).logits
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {running_loss / len(train_loader):.4f}, "
              f"Accuracy: {100 * correct / total:.2f}%")




# Testing loop
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images).logits
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")



# Real-time updates during training
def real_time_updates():
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        correct = 0
        total = 0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images).logits
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Update every batch
            if (i + 1) % 10 == 0:  # Update every 10 batches
                print(f"Epoch [{epoch + 1}/{EPOCHS}], Step [{i + 1}/{len(train_loader)}], "
                      f"Loss: {loss.item():.4f}, Accuracy: {100 * correct / total:.2f}%")



# Train and test the model
if __name__ == "__main__":
    real_time_updates()
    test_model()

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([101, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([101]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
