<h1> ECE4179 - Semi-Supervised Learning Project</h1>
<h2>Data</h2>

We will be using a dataset that can be obtained directly from the torchvision package. There are 10 classes and we will be training a CNN for the image classification task. We have training, validation and test sets that are labelled with the class, and a large unlabeled set.

We will simulating a low training data scenario by only sampling a small percentage of the labelled data (10%) as training data. The remaining examples will be used as the validation set.

To get the labelled data, change the dataset_dir to something suitable for your machine, and execute the following (you will then probably want to wrap the dataset objects in a PyTorch DataLoader):

In [1]:
import torch
import torch.nn as nn
from torchvision.datasets import STL10 as STL10
import torchvision.transforms as transforms
from torch.utils.data import random_split
import torchvision
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data import Subset
from copy import deepcopy
from torch.optim import Adam
import torch.optim as optim
from torchvision import models
from sklearn.metrics import f1_score, classification_report
import torch.nn.functional as F
import csv
import os
import random

####### CHANGE TO APPROPRIATE DIRECTORY TO STORE DATASET
dataset_dir = "../../CNN-VAE/data"
#For MonARCH
# dataset_dir = "/mnt/lustre/projects/ds19/SHARED"

#All images are 3x96x96
image_size = 96
#Example batch size
batch_size = 16
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(torch.cuda.is_available())  # Should return True
# Define the number of classes
num_classes = 10
num_epochs = 10
learning_rate = 0.001

Using device: cuda
True


<h3>Create the appropriate transforms</h3>

In [2]:
#Perform random crops and mirroring for data augmentation
transform_train = transforms.Compose(
    [transforms.RandomCrop(image_size, padding=4),
     transforms.RandomHorizontalFlip(p=0.5),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform_unlabelled = transforms.Compose(
    [transforms.RandomHorizontalFlip(p=0.5),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

#No random 
transform_test = transforms.Compose(
    [transforms.CenterCrop(image_size),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


<h3>Create training and validation split</h3>

In [3]:
#Load train and validation sets
trainval_set = STL10(dataset_dir, split='train', transform=transform_train, download=True)

#Use 10% of data for training - simulating low data scenario
num_train = int(len(trainval_set)*0.1)

#Split data into train/val sets
torch.manual_seed(0) #Set torch's random seed so that random split of data is reproducible
train_set, val_set = random_split(trainval_set, [num_train, len(trainval_set)-num_train])

#Load test set
test_set = STL10(dataset_dir, split='test', transform=transform_test, download=False)

Files already downloaded and verified


<h3>Get the unlabelled data</h3>

In [4]:
unlabelled_set = STL10(dataset_dir, split='unlabeled', transform=transform_unlabelled, download=True)

Files already downloaded and verified


### Print the length of unlabelled data

In [5]:
len(unlabelled_set)

100000

### Only get the 1/1000 for unlabled data

In [6]:
# Determine the size of the subset (1/1000 of the full dataset)
subset_size = len(unlabelled_set) // 1000  # This will be 100 samples

# Randomly select indices for the subset
random_indices = random.sample(range(len(unlabelled_set)), subset_size)

# Create a subset of the unlabelled dataset
unlabelled_subset = Subset(unlabelled_set, random_indices)

# Now, create the DataLoader using the subset
unlabelled_loader = DataLoader(unlabelled_subset, shuffle=True, batch_size=batch_size, num_workers=2)

You may find later that you want to make changes to how the unlabelled data is loaded. This might require you sub-classing the STL10 class used above or to create your own dataloader similar to the Pytorch one.
https://pytorch.org/docs/stable/_modules/torchvision/datasets/stl10.html#STL10

<h3>Create the four dataloaders</h3>

In [7]:
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size, num_workers=2)

valid_loader = DataLoader(val_set, batch_size=batch_size, num_workers=2)
test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=2)

<h3>Accuracy</h3>

In [8]:
# Define the test function
def test_model(model, test_loader):
    # Define the device inside the function
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model.to(device)  # Move the model to the appropriate device
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient calculation
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to the appropriate device
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy}%")


<h3>Marco F1 Score</h3>

In [9]:
# Define the test function to calculate F1 score
def test_model_with_f1(model, test_loader):
    # Define the device inside the function
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model.to(device)  # Move the model to the appropriate device
    model.eval()  # Set model to evaluation mode
    
    all_labels = []
    all_preds = []

    with torch.no_grad():  # Disable gradient calculation
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            
            # Collect all predictions and labels for F1-score calculation
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    # Calculate the Macro F1-score for each class
    f1 = f1_score(all_labels, all_preds, average='macro')
    
    # Alternatively, you can get a detailed report for all classes
    report = classification_report(all_labels, all_preds, target_names=[f"Class {i}" for i in range(10)])
    
    print(f"Macro F1-score: {f1}")
    print("Classification Report:\n", report)

## Network

Let's use a ResNet18 architecture for our CNN...

### Define the training function

In [None]:
# Define a dataset wrapper to convert labels to tensors
class TensorLabelDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        # Convert label to tensor
        label = torch.tensor(label, dtype=torch.long)
        return img, label

In [10]:
def train_model_with_pseudo_labeling_and_grid_search(
    model,
    train_loader,
    valid_loader,
    unlabelled_loader,
    num_classes,
    num_epochs=10,
    learning_rate=0.001,
    log_filename='training_log.csv',
    model_name='resnet',
    batch_size=64
):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Ensure 'logs' directory exists for saving the best model
    if not os.path.exists('logs'):
        os.makedirs('logs')

    ### Phase 1: Initial Training on Labeled Data ###

    print("Starting Phase 1: Initial Training on Labeled Data...")
    # Copy the model for initial training
    initial_model = deepcopy(model).to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(initial_model.parameters(), lr=learning_rate)

    # Training loop
    initial_model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = initial_model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        print(f"Initial Training Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

    ### Phase 2: Generate Pseudo-Labels for Unlabeled Data ###

    print("Starting Phase 2: Generating Pseudo-Labels for Unlabeled Data...")
    initial_model.eval()
    pseudo_labels = []
    all_unlabelled_inputs = []

    with torch.no_grad():
        for inputs, _ in unlabelled_loader:
            inputs = inputs.to(device)
            outputs = initial_model(inputs)
            _, predicted = torch.max(outputs, 1)
            pseudo_labels.extend(predicted.cpu())
            all_unlabelled_inputs.extend(inputs.cpu())

    # Create a new dataset with unlabeled data and pseudo-labels
    from torch.utils.data import TensorDataset, ConcatDataset

    pseudo_dataset = TensorDataset(torch.stack(all_unlabelled_inputs), torch.stack(pseudo_labels))

    ### Phase 3: Combine Labeled and Pseudo-Labeled Data ###

    print("Combining Labeled and Pseudo-Labeled Data...")
    # Wrap train_loader.dataset to convert labels to tensors
    tensor_train_dataset = TensorLabelDataset(train_loader.dataset)

    # Combine datasets
    combined_dataset = ConcatDataset([tensor_train_dataset, pseudo_dataset])

    # Create new DataLoader
    combined_loader = torch.utils.data.DataLoader(
        combined_dataset, batch_size=batch_size, shuffle=True, num_workers=2
    )

    ### Phase 4: Retrain Model on Combined Data ###

    print("Starting Phase 3: Retraining Model on Combined Data...")
    # Copy the initial model
    retrain_model = deepcopy(initial_model).to(device)

    # Optionally, you can reset the optimizer
    optimizer = Adam(retrain_model.parameters(), lr=learning_rate)

    # Retraining loop
    retrain_model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in combined_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = retrain_model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(combined_loader)
        print(f"Retraining Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

    ### Phase 5: Transfer Learning with Grid Search ###

    print("Starting Phase 4: Transfer Learning with Grid Search...")
    # Define different layer unfreezing configurations based on model type
    if model_name == 'resnet':
        unfreeze_configs = {
            'fc': ['fc'],
            'fc+layer4': ['layer4', 'fc'],
            'fc+layer3+layer4': ['layer3', 'layer4', 'fc'],
        }
    elif model_name == 'efficientnet':
        unfreeze_configs = {
            'fc': ['classifier.1'],
            'fc+features8': ['features.8', 'classifier.1'],
            'fc+features7+features8': ['features.7', 'features.8', 'classifier.1'],
        }
    elif model_name == 'vit':
        unfreeze_configs = {
            'fc': ['heads.head'],
            'fc+encoder11': ['encoder.layers.encoder_layer_11', 'heads.head'],
            'fc+encoder10+encoder11': ['encoder.layers.encoder_layer_10', 'encoder.layers.encoder_layer_11', 'heads.head'],
        }
    else:
        raise ValueError("Model name not recognized.")

    best_f1 = 0.0
    best_config = ''
    best_model_state = None

    # Open log file for recording training progress
    with open(os.path.join('logs', log_filename), mode='w', newline='') as log_file:
        log_writer = csv.writer(log_file)
        log_writer.writerow(['Configuration', 'Epoch', 'Training Loss', 'Validation Macro F1'])

        for config_name, layers_to_unfreeze in unfreeze_configs.items():
            print(f"\nStarting Transfer Learning Phase with configuration: {config_name}")

            # Create a new model by copying the retrained model
            finetune_model = deepcopy(retrain_model)

            # Modify the final layer based on the model's layer names
            if model_name == 'resnet':
                feature_dim = finetune_model.fc.in_features
                finetune_model.fc = nn.Linear(feature_dim, num_classes)
            elif model_name == 'efficientnet':
                feature_dim = finetune_model.classifier[1].in_features
                finetune_model.classifier[1] = nn.Linear(feature_dim, num_classes)
            elif model_name == 'vit':
                feature_dim = finetune_model.heads.head.in_features
                finetune_model.heads.head = nn.Linear(feature_dim, num_classes)
            else:
                raise ValueError("Model name not recognized.")

            # Freeze all layers first
            for param in finetune_model.parameters():
                param.requires_grad = False

            # Unfreeze specified layers
            for name, param in finetune_model.named_parameters():
                for layer_name in layers_to_unfreeze:
                    if name.startswith(layer_name):
                        param.requires_grad = True
                        print(f"Unfreezing layer: {name}")

            finetune_model = finetune_model.to(device)

            # Define loss function and optimizer for fine-tuning
            criterion = nn.CrossEntropyLoss()
            optimizer = Adam(filter(lambda p: p.requires_grad, finetune_model.parameters()), lr=learning_rate)

            # Fine-tuning training loop
            for epoch in range(num_epochs):
                finetune_model.train()
                running_loss = 0.0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = finetune_model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                avg_loss = running_loss / len(train_loader)

                # Validation
                finetune_model.eval()
                all_labels = []
                all_preds = []
                with torch.no_grad():
                    for inputs, labels in valid_loader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = finetune_model(inputs)
                        _, predicted = torch.max(outputs, 1)
                        all_labels.extend(labels.cpu().numpy())
                        all_preds.extend(predicted.cpu().numpy())

                # Calculate Macro F1 score
                f1 = f1_score(all_labels, all_preds, average='macro')

                # Log results
                log_writer.writerow([config_name, epoch + 1, avg_loss, f1])
                print(f"Config: {config_name}, Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Validation Macro F1: {f1:.4f}")

            # Update best model if current config is better
            if f1 > best_f1:
                best_f1 = f1
                best_config = config_name
                best_model_state = deepcopy(finetune_model.state_dict())

    print(f"\nBest Configuration: {best_config} with Macro F1 Score: {best_f1}")
    # Save the best model
    torch.save(best_model_state, os.path.join('logs', f"best_model_{model_name}.pth"))

## ResNet18

In [11]:
# We will keep this for later
model0 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)


for name, param in model0.named_parameters():
    print(f"Name: {name}, Shape: {param.shape}")

Name: conv1.weight, Shape: torch.Size([64, 3, 7, 7])
Name: bn1.weight, Shape: torch.Size([64])
Name: bn1.bias, Shape: torch.Size([64])
Name: layer1.0.conv1.weight, Shape: torch.Size([64, 64, 3, 3])
Name: layer1.0.bn1.weight, Shape: torch.Size([64])
Name: layer1.0.bn1.bias, Shape: torch.Size([64])
Name: layer1.0.conv2.weight, Shape: torch.Size([64, 64, 3, 3])
Name: layer1.0.bn2.weight, Shape: torch.Size([64])
Name: layer1.0.bn2.bias, Shape: torch.Size([64])
Name: layer1.1.conv1.weight, Shape: torch.Size([64, 64, 3, 3])
Name: layer1.1.bn1.weight, Shape: torch.Size([64])
Name: layer1.1.bn1.bias, Shape: torch.Size([64])
Name: layer1.1.conv2.weight, Shape: torch.Size([64, 64, 3, 3])
Name: layer1.1.bn2.weight, Shape: torch.Size([64])
Name: layer1.1.bn2.bias, Shape: torch.Size([64])
Name: layer2.0.conv1.weight, Shape: torch.Size([128, 64, 3, 3])
Name: layer2.0.bn1.weight, Shape: torch.Size([128])
Name: layer2.0.bn1.bias, Shape: torch.Size([128])
Name: layer2.0.conv2.weight, Shape: torch.Size(

Using cache found in C:\Users\weita/.cache\torch\hub\pytorch_vision_v0.10.0


In [12]:
# Example usage with ResNet18
model_resnet18 = deepcopy(model0)  # assuming model0 is a pretrained resnet18
model_resnet18 = model_resnet18.to(device)
train_model_with_pseudo_labeling_and_grid_search(
    model=model_resnet18,
    train_loader=train_loader,
    valid_loader=valid_loader,
    unlabelled_loader=unlabelled_loader,
    num_classes=num_classes,
    num_epochs=num_epochs,
    learning_rate=learning_rate,
    log_filename='resnet18_training_log.csv',
    model_name='resnet',
    batch_size=batch_size
)


Using device: cuda
Starting Phase 1: Initial Training on Labeled Data...
Initial Training Epoch [1/10], Loss: 3.4981
Initial Training Epoch [2/10], Loss: 1.5099
Initial Training Epoch [3/10], Loss: 0.9794
Initial Training Epoch [4/10], Loss: 0.9971
Initial Training Epoch [5/10], Loss: 0.9152
Initial Training Epoch [6/10], Loss: 0.7804
Initial Training Epoch [7/10], Loss: 0.6554
Initial Training Epoch [8/10], Loss: 0.5819
Initial Training Epoch [9/10], Loss: 0.6586
Initial Training Epoch [10/10], Loss: 0.7637
Starting Phase 2: Generating Pseudo-Labels for Unlabeled Data...
Combining Labeled and Pseudo-Labeled Data...
Starting Phase 3: Retraining Model on Combined Data...


AttributeError: Caught AttributeError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 55, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\collate.py", line 317, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\collate.py", line 174, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\collate.py", line 174, in <listcomp>
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\collate.py", line 142, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\collate.py", line 211, in collate_tensor_fn
    numel = sum(x.numel() for x in batch)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\weita\anaconda3\envs\ECE4179_CV\Lib\site-packages\torch\utils\data\_utils\collate.py", line 211, in <genexpr>
    numel = sum(x.numel() for x in batch)
                ^^^^^^^
AttributeError: 'int' object has no attribute 'numel'


In [None]:
# Initialize the ResNet18 model
best_model_resnet = models.resnet18(pretrained=False)
best_model_resnet.fc = nn.Linear(best_model_resnet.fc.in_features, num_classes)
best_model_resnet = best_model_resnet.to(device)

# Load the best model weights
best_model_resnet.load_state_dict(torch.load(f'logs/best_model_resnet.pth'))

# Set the model to evaluation mode
best_model_resnet.eval()

In [None]:
# Call the test functions
test_model(best_model_resnet, test_loader)

In [None]:
test_model_with_f1(best_model_resnet, test_loader)

## EfficientNet

In [None]:
# Load pretrained EfficientNet-B0 model from torchvision hub
model1 = torch.hub.load('pytorch/vision', 'efficientnet_b0', weights="EfficientNet_B0_Weights.IMAGENET1K_V1")

for name, param in model1.named_parameters():
    print(f"Name: {name}, Shape: {param.shape}")

In [None]:
# Example usage with EfficientNetB0
model_efficientnetb0 = deepcopy(model1)  # assuming model1 is a pretrained efficientnetb0
model_efficientnetb0 = model_efficientnetb0.to(device)
# Call the training function
train_model_with_pseudo_labeling_and_grid_search(
    model=model_efficientnetb0,
    train_loader=train_loader,
    valid_loader=valid_loader,
    unlabelled_loader=unlabelled_loader,
    num_classes=num_classes,
    num_epochs=num_epochs,
    learning_rate=learning_rate,
    log_filename='efficientnetb0_training_log.csv',
    model_name='efficientnet',
    batch_size=batch_size
)

In [None]:
# Initialize the EfficientNetB0 model
best_model_efficientnet = models.efficientnet_b0(pretrained=False)
best_model_efficientnet.classifier[1] = nn.Linear(best_model_efficientnet.classifier[1].in_features, num_classes)
best_model_efficientnet = best_model_efficientnet.to(device)

# Load the best model weights
best_config = 'fc+features8'  # Replace with your best configuration name
best_model_efficientnet.load_state_dict(torch.load(f'logs/best_model_efficientnet.pth'))

# Set the model to evaluation mode
best_model_efficientnet.eval()

In [None]:
# Call the test function
test_model(best_model_efficientnet, test_loader)

In [None]:
# Call the function to calculate and print F1-scores
test_model_with_f1(best_model_efficientnet, test_loader)

## Vision Transformer (ViT)

In [None]:
# Set image size to 224x224 to match the input size of ViT
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.RandomCrop(224, padding=4),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_unlabelled = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
# Load train and validation sets without redownloading data
trainval_set = STL10(dataset_dir, split='train', transform=transform_train, download=False)

# Use 10% of the data for training (simulating a low data scenario)
num_train = int(len(trainval_set) * 0.1)

# Split data into train/validation sets with a fixed random seed
torch.manual_seed(0)  # Ensure reproducibility
train_set, val_set = random_split(trainval_set, [num_train, len(trainval_set) - num_train])

# Load test set without redownloading data
test_set = STL10(dataset_dir, split='test', transform=transform_test, download=False)

In [None]:
unlabelled_set = STL10(dataset_dir, split='unlabeled', transform=transform_unlabelled, download=False)

# Determine the size of the subset (1/1000 of the full dataset)
subset_size = len(unlabelled_set) // 1000  # This will be 100 samples

# Randomly select indices for the subset
random_indices = random.sample(range(len(unlabelled_set)), subset_size)

# Create a subset of the unlabelled dataset
unlabelled_subset = Subset(unlabelled_set, random_indices)

# Now, create the DataLoader using the subset
unlabelled_loader = DataLoader(unlabelled_subset, shuffle=True, batch_size=batch_size, num_workers=2)

In [None]:
# Create DataLoader for train, validation, and test sets
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size, num_workers=2)

valid_loader = DataLoader(val_set, batch_size=batch_size, num_workers=2)
test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=2)

In [None]:
# Load pretrained Vision Transformer (ViT) model from torchvision models
model2 = models.vit_b_16(pretrained=True)

# Print the model structure to verify the changes
for name, param in model2.named_parameters():
    print(f"Name: {name}, Shape: {param.shape}")

In [None]:
# Example usage with Vision Transformer (ViT)
model_vit = deepcopy(model2)  # assuming model2 is a pretrained Vision Transformer (ViT)
model_vit = model_vit.to(device)

# Call the training function
train_model_with_pseudo_labeling_and_grid_search(
    model=model_vit,
    train_loader=train_loader,
    valid_loader=valid_loader,
    unlabelled_loader=unlabelled_loader,
    num_classes=num_classes,
    num_epochs=num_epochs,
    learning_rate=learning_rate,
    log_filename='vit_training_log.csv',
    model_name='vit',
    batch_size=batch_size
)


In [None]:
# Initialize the ViT model
best_model_vit = models.vit_b_16(pretrained=False)
best_model_vit.heads.head = nn.Linear(best_model_vit.heads.head.in_features, num_classes)
best_model_vit = best_model_vit.to(device)

# Load the best model weights
best_config = 'fc+encoder11'  # Replace with your best configuration name
best_model_vit.load_state_dict(torch.load(f'logs/best_model_vit.pth'))

# Set the model to evaluation mode
best_model_vit.eval()

In [None]:
# Call the test function
test_model(best_model_vit, test_loader)

In [None]:
# Call the function to calculate and print F1-scores
test_model_with_f1(best_model_vit, test_loader)