In [2]:
import sys
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import random_split, DataLoader

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = torchvision.datasets.ImageFolder(
    root='../images',
    transform=preprocess
)
train_dataset, val_dataset = random_split(dataset, [0.8, 0.2])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)

# Load a pretrained ResNet-50 model with ImageNet weights.
resnet50_model = torchvision.models.resnet50(
    weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V1
)

# Replace the final fully connected (fc) layer with nn.Identity().
resnet50_model.fc = nn.Identity()

# Set the model to evaluation mode.
resnet50_model.eval()

# Take one batch from train_dataloader and pass it through the model.
#       Print the output shape and the raw embeddings, then stop after the first batch.
for X, y in train_dataloader:
    out = resnet50_model(X)
    print(out.shape)
    print(out)
    break

torch.Size([32, 2048])
tensor([[1.0059e-01, 4.1759e-01, 1.6776e-01,  ..., 5.2081e-01, 1.1498e-01,
         2.2940e-01],
        [3.9117e-01, 2.3366e-01, 1.7963e-04,  ..., 9.8132e-02, 1.4791e-01,
         5.7737e-01],
        [2.7805e-01, 1.3923e+00, 1.1339e-01,  ..., 4.5049e-01, 7.4545e-01,
         1.6186e-02],
        ...,
        [4.5541e-01, 1.6756e-01, 0.0000e+00,  ..., 1.0186e-01, 4.2693e-01,
         7.3713e-01],
        [1.3626e-01, 2.6164e-01, 1.0709e-01,  ..., 1.7943e-02, 5.5905e-02,
         9.0061e-01],
        [1.0109e-01, 1.2666e+00, 9.1233e-02,  ..., 5.3629e-01, 8.3181e-02,
         7.5868e-01]], grad_fn=<ViewBackward0>)
