In [1]:
import torch
from torchvision import datasets
import numpy as np
from scipy import io
import os
import clip

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [3]:
model, preprocess = clip.load("RN101", device=device)

In [4]:
preprocess

Compose(
    Resize(size=224, interpolation=bicubic, max_size=None, antialias=None)
    CenterCrop(size=(224, 224))
    <function _convert_image_to_rgb at 0x000001FD9A1A60D0>
    ToTensor()
    Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
)

In [5]:
model

CLIP(
  (visual): ModifiedResNet(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
     

In [6]:
model, preprocess = clip.load("RN50x4", device=device)

In [7]:
preprocess

Compose(
    Resize(size=288, interpolation=bicubic, max_size=None, antialias=None)
    CenterCrop(size=(288, 288))
    <function _convert_image_to_rgb at 0x000001FD9A1A60D0>
    ToTensor()
    Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
)

In [8]:
model

CLIP(
  (visual): ModifiedResNet(
    (conv1): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv2): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(40, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn3): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
     

In [9]:
from torchvision import datasets
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """
    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # print('original_tuple = ', original_tuple)
        # the image file path
        path = self.imgs[index][0]
        # print('the image file full path = ', path)
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [None]:
train_path ="E:\Datasets\ILSVRC2016\ILSVRC\Data\CLS-LOC\\train"
train_set = ImageFolderWithPaths(train_path, transform=preprocess)
print('train_set = ', train_set)

In [None]:
import numpy as np
loader = torch.utils.data.DataLoader(train_set)
loader

In [None]:
current_path_head = []
last_labels_array = np.array([])
current_array = np.array([])

In [None]:
from scipy import io
import os

with torch.no_grad():
    # for data in loader:
    for images, labels, paths in loader:
        #print('data = ', data)
        print('labels = ', labels)
        # print('images = ', images)
        # print('paths = ', paths)

        labels_array = labels.cpu().detach().numpy()
        # print('labels_array = ', labels_array)
        if last_labels_array.size == 0:
            last_labels_array = labels_array

        # Split the path in
        # head and tail pair
        path = paths[0]
        head_tail = os.path.split(path)
        # print head and tail
        # of the specified path
        # print("Head of '% s:'" % path, head_tail[0])
        # print("Tail of '% s:'" % path, head_tail[1], "\n")
        if not current_path_head:
            current_path_head = head_tail[0]
        else:
            if current_path_head != head_tail[0]:
                outputs_matrix = np.asmatrix(current_array)
                # print('outputs_matrix = ', outputs_matrix)
                print('outputs_matrix.shape = ', outputs_matrix.shape)
                # scipy.io.savemat()
                # io.savemat([os.path.basename(current_path_head)+'.mat'], {'feature': outputs_matrix, 'label': labels_array})
                print('last_labels_array = ', last_labels_array)
                io.savemat(current_path_head+'.mat', {'feature': outputs_matrix, 'label': last_labels_array})
                print('Save mat file to:', current_path_head+'.mat')
                current_path_head = head_tail[0]
                current_array = np.array([])
        last_labels_array = labels_array

        images = images.to(device)
        outputs = model.encode_image(images)
        # print('outputs = ', outputs)
        outputs_array = outputs.cpu().detach().numpy()

        # np.reshape(outputs_array, (1, outputs_array.size))
        # print('outputs_array = ', outputs_array)
        # print('outputs_array.size = ', outputs_array.size)
        if current_array.size == 0:
            current_array = outputs_array
            # print('current_array = ', current_array)
            # print('current_array.shape = ', current_array.shape)
        else:
            # current_array = np.append(current_array, outputs_array, axis=0)
            current_array = np.vstack([current_array, outputs_array])
            # print('current_array = ', current_array)
            # print('current_array.shape = ', current_array.shape)

    # Save for the last folder:
    outputs_matrix = np.asmatrix(current_array)
    # print('outputs_matrix = ', outputs_matrix)
    print('outputs_matrix.shape = ', outputs_matrix.shape)
    # scipy.io.savemat()
    # io.savemat([os.path.basename(current_path_head)+'.mat'], {'feature': outputs_matrix, 'label': labels_array})
    print('labels_array = ', labels_array)
    io.savemat(current_path_head+'.mat', {'feature': outputs_matrix, 'label': labels_array})
    print('Save mat file to:', current_path_head+'.mat')