In [1]:
import os
from PIL import Image
from torchvision import transforms
import torch
import torchvision.models as models
from tqdm import tqdm
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Device: {device}')

Device: cuda:0


In [10]:
def load_model():
    model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features.eval()
    model.to(device)
    return model

def extract_features(model, loader):
    all_features = []
    for batch_images, _ in loader:
        batch_images = batch_images.to(device)
        features = model(batch_images)
        all_features.extend(features.detach().cpu().numpy())
    return all_features

In [11]:
########################

In [14]:
def main():
    preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    # Set up the directory containing the images
    dataset = ImageFolder(root='samples', transform=preprocess)
    loader = DataLoader(dataset, batch_size=8, shuffle=False, num_workers=4)

    model = load_model()

    features_batched = extract_features(model, loader)
    output_directory = 'features'
    # Save the feature vectors to .txt files
    for idx, (path, _) in enumerate(dataset.imgs):
        feature_vector = features_batched[idx].flatten()
        filename = os.path.basename(path).split('.')[0] + '_feature.txt'
        output_file_path = os.path.join(output_directory, filename)
        with open(output_file_path, 'w') as f:
            f.write(','.join(map(str, feature_vector)))

if __name__ == "__main__":
    main()