In [3]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from transformers import AutoFeatureExtractor, SwinModel
from PIL import Image
# from timm import create_model
from tqdm import tqdm

def create_featureset(dataset_name: str):
	# Parameters
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	batch_size = 64
	save_path = f"{dataset_name}_swin_features.pt"

	# Load HuggingFace Swin
	model_name = "microsoft/swin-tiny-patch4-window7-224"
	extractor = AutoFeatureExtractor.from_pretrained(model_name)
	model = SwinModel.from_pretrained(model_name)
	model.eval().to(device)

	# Typical ViT preprocessing (same as Hugging Face ViT config)
	transform = transforms.Compose([
		transforms.Resize(224),
		transforms.ToTensor(),
		transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	])


	# Load CIFAR dataset
	if dataset_name == 'cifar10':
		dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
	elif dataset_name == 'cifar100':
		dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)

	dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

	# Store features + labels
	all_features = []
	all_labels = []

	with torch.no_grad():
		for imgs, labels in tqdm(dataloader):
			imgs = imgs.to(device)
			features = model(imgs)
			features = features.pooler_output
			all_features.append(features.cpu())
			all_labels.append(labels)

	# Save to file
	features_tensor = torch.cat(all_features)
	labels_tensor = torch.cat(all_labels)
	torch.save({'features': features_tensor, 'labels': labels_tensor}, save_path)

	print(f"Saved features to {save_path}")


create_featureset('cifar10')
create_featureset('cifar100')


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 782/782 [00:57<00:00, 13.59it/s]


Saved features to cifar10_swin_features.pt


Some weights of the model checkpoint at microsoft/swin-tiny-patch4-window7-224 were not used when initializing SwinModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SwinModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SwinModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 782/782 [00:57<00:00, 13.60it/s]


Saved features to cifar100_swin_features.pt


In [2]:
import torch
# print(torch.cuda.is_available())  # This should return True if CUDA is available
# print(torch.cuda.current_device())  # This prints the current GPU device index
# print(torch.cuda.get_device_name(0))  # This should print your GPU model name


In [39]:
# from sklearn.decomposition import PCA
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
cifar10 = torch.load('cifar100_swin_features.pt')
# Config
batch_size = 64
epochs = 300
lr = 0.01
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
# pca = PCA(0.75)
# cifar10['features'] = torch.tensor(pca.fit_transform(cifar10['features'].numpy()))
print(min(cifar10['labels']), max(cifar10['labels']))
# Model
cifar10_model = torch.nn.Sequential(
	torch.nn.Linear(cifar10['features'].shape[1], 100),
	# torch.nn.ReLU(),
	# torch.nn.BatchNorm1d(256),
	# torch.nn.Linear(256, 10)
).to(device)

optimizer = torch.optim.SGD(cifar10_model.parameters(), lr=lr)#, weight_decay=0.02)
criterion = torch.nn.CrossEntropyLoss()

# Load data
X = cifar10['features']
y = cifar10['labels'].long()

# Dataset
full_dataset = TensorDataset(X, y)

# Split sizes
total = len(full_dataset)
train_size = int(0.8 * total)
val_size = int(0.1 * total)
test_size = total - train_size - val_size

train_set, val_set, test_set = random_split(full_dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

# Training loop
for epoch in range(epochs):
	cifar10_model.train()
	total_loss = 0
	correct = 0
	total = 0

	for xb, yb in train_loader:
		xb, yb = xb.to(device), yb.to(device)
		outputs = cifar10_model(xb)
		loss = criterion(outputs, yb)

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		total_loss += loss.item() * yb.size(0)
		_, preds = torch.max(outputs, 1)
		correct += (preds == yb).sum().item()
		total += yb.size(0)

	train_acc = correct / total
	train_loss = total_loss / total

	# Validation
	cifar10_model.eval()
	val_correct = 0
	val_total = 0
	val_loss = 0

	with torch.no_grad():
		for xb, yb in val_loader:
			xb, yb = xb.to(device), yb.to(device)
			outputs = cifar10_model(xb)
			loss = criterion(outputs, yb)

			val_loss += loss.item() * yb.size(0)
			_, preds = torch.max(outputs, 1)
			val_correct += (preds == yb).sum().item()
			val_total += yb.size(0)

	val_acc = val_correct / val_total
	val_loss /= val_total

	print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.2%} | Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.2%}")

# Final test set evaluation
cifar10_model.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
	for xb, yb in test_loader:
		xb, yb = xb.to(device), yb.to(device)
		outputs = cifar10_model(xb)
		_, preds = torch.max(outputs, 1)
		test_correct += (preds == yb).sum().item()
		test_total += yb.size(0)

test_acc = test_correct / test_total
print(f"\n✅ Test Accuracy: {test_acc:.2%}")


tensor(0) tensor(99)
Epoch 1: Train Loss = 3.3888, Train Acc = 40.52% | Val Loss = 2.4643, Val Acc = 59.14%
Epoch 2: Train Loss = 1.9920, Train Acc = 63.81% | Val Loss = 1.7087, Val Acc = 65.22%
Epoch 3: Train Loss = 1.5147, Train Acc = 67.80% | Val Loss = 1.4208, Val Acc = 67.16%
Epoch 4: Train Loss = 1.3054, Train Acc = 69.80% | Val Loss = 1.2755, Val Acc = 68.26%
Epoch 5: Train Loss = 1.1867, Train Acc = 71.15% | Val Loss = 1.1863, Val Acc = 69.78%
Epoch 6: Train Loss = 1.1081, Train Acc = 72.33% | Val Loss = 1.1258, Val Acc = 70.84%
Epoch 7: Train Loss = 1.0508, Train Acc = 73.06% | Val Loss = 1.0807, Val Acc = 71.18%
Epoch 8: Train Loss = 1.0061, Train Acc = 73.78% | Val Loss = 1.0464, Val Acc = 71.92%
Epoch 9: Train Loss = 0.9702, Train Acc = 74.47% | Val Loss = 1.0191, Val Acc = 71.92%
Epoch 10: Train Loss = 0.9402, Train Acc = 75.06% | Val Loss = 0.9962, Val Acc = 72.44%
Epoch 11: Train Loss = 0.9143, Train Acc = 75.49% | Val Loss = 0.9785, Val Acc = 72.78%
Epoch 12: Train Loss

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define basic transform: convert images to tensors and normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

import torch.nn as nn

model = nn.Sequential(
	nn.Flatten(),                     # Flatten 28x28 -> 784
	nn.Linear(784, 256),
	nn.ReLU(),
	nn.Linear(256, 128),
	nn.ReLU(),
	nn.Linear(128, 10)               # 10 output classes for MNIST
).to(device)


# Load MNIST train/test datasets
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

# DataLoaders for batching
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch_idx, (x, y) in enumerate(loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            total_loss += criterion(output, y).item()
            pred = output.argmax(dim=1)
            correct += pred.eq(y).sum().item()
    accuracy = correct / len(loader.dataset)
    return total_loss / len(loader), accuracy


for epoch in range(1, 11):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    test_loss, test_acc = evaluate(model, test_loader, criterion, device)
    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Test Loss = {test_loss:.4f}, Accuracy = {test_acc:.4f}")


Epoch 1: Train Loss = 0.2294, Test Loss = 0.1165, Accuracy = 0.9635
Epoch 2: Train Loss = 0.0925, Test Loss = 0.1266, Accuracy = 0.9593
Epoch 3: Train Loss = 0.0637, Test Loss = 0.0829, Accuracy = 0.9755
Epoch 4: Train Loss = 0.0498, Test Loss = 0.0843, Accuracy = 0.9734
Epoch 5: Train Loss = 0.0411, Test Loss = 0.0675, Accuracy = 0.9800
Epoch 6: Train Loss = 0.0321, Test Loss = 0.0831, Accuracy = 0.9785
Epoch 7: Train Loss = 0.0290, Test Loss = 0.0757, Accuracy = 0.9789
Epoch 8: Train Loss = 0.0254, Test Loss = 0.0925, Accuracy = 0.9764
Epoch 9: Train Loss = 0.0225, Test Loss = 0.0897, Accuracy = 0.9778
Epoch 10: Train Loss = 0.0176, Test Loss = 0.0950, Accuracy = 0.9783
