In [1]:
import torch
from torchvision import datasets
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
import numpy as np
from scipy import io
import os
import clip

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [3]:
model, preprocess = clip.load("RN50x64", device=device)

In [4]:
import clip
try:
    from torchvision.transforms import InterpolationMode
    BICUBIC = InterpolationMode.BICUBIC
except ImportError:
    BICUBIC = Image.BICUBIC
    

def _convert_image_to_rgb(image):
    return image.convert("RGB")
    
def _transform(n_px):
    return Compose([
        Resize(n_px, interpolation=BICUBIC),
        CenterCrop(n_px),
        _convert_image_to_rgb,
        ToTensor(),
        Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
    ])

def features32(self, x):
    def stem(x):
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.conv2(x)))
        x = self.relu3(self.bn3(self.conv3(x)))
        x = self.avgpool(x)
        return x

    x = x.type(self.conv1.weight.dtype)
    x = stem(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    x=F.avg_pool2d(x,x.shape[2])

    return x

# def features32(net, x):
#     x = x.type(net.conv1.weight.dtype)
#     for conv, bn in [(net.conv1, net.bn1), (net.conv2, net.bn2), (net.conv3, net.bn3)]:
#         x = net.relu(bn(conv(x)))
#     #print(1,x.shape)
#     x = net.avgpool(x)
#     x = net.layer1(x)
#     x = net.layer2(x)
#     x = net.layer3(x)
#     x = net.layer4(x)
#     #print(2,x.shape)
#     #x = net.attnpool(x)
#     x=F.avg_pool2d(x,x.shape[2])
#     return x

def features(net, x):
    x = x.type(net.conv1.weight.dtype)
    for conv, bn in [(net.conv1, net.bn1), (net.conv2, net.bn2), (net.conv3, net.bn3)]:
        x = net.relu(bn(conv(x)))
    #print(1,x.shape)
    x = net.avgpool(x)
    x = net.layer1(x)
    x = net.layer2(x)
    x = net.layer3(x)
    x = net.layer4(x)
    #print(2,x.shape)
    x = net.attnpool(x)
    return x

In [5]:
from torchvision import datasets
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """
    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # print('original_tuple = ', original_tuple)
        # the image file path
        path = self.imgs[index][0]
        # print('the image file full path = ', path)
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [6]:
train_path ="C:\\Users\\Yijia Zhou\\Desktop\\cifar-10-python\\cifar10\\train"
train_set = ImageFolderWithPaths(train_path, transform=_transform(144))
print('train_set = ', train_set)

train_set =  Dataset ImageFolderWithPaths
    Number of datapoints: 50000
    Root location: C:\Users\Yijia Zhou\Desktop\cifar-10-python\cifar10\train
    StandardTransform
Transform: Compose(
               Resize(size=144, interpolation=bicubic, max_size=None, antialias=None)
               CenterCrop(size=(144, 144))
               <function _convert_image_to_rgb at 0x000002388D612B80>
               ToTensor()
               Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
           )


In [7]:
import numpy as np
loader = torch.utils.data.DataLoader(train_set)
loader

<torch.utils.data.dataloader.DataLoader at 0x238e41c84c0>

In [8]:
current_path_head = []
last_labels_array = np.array([])
current_array = np.array([])

In [None]:
from scipy import io
import os

with torch.no_grad():
    # for data in loader:
    for images, labels, paths in loader:
        #print('data = ', data)
        print('labels = ', labels)
        # print('images = ', images)
        # print('paths = ', paths)

        labels_array = labels.cpu().detach().numpy()
        # print('labels_array = ', labels_array)
        if last_labels_array.size == 0:
            last_labels_array = labels_array

        # Split the path in
        # head and tail pair
        path = paths[0]
        head_tail = os.path.split(path)
        # print head and tail
        # of the specified path
        # print("Head of '% s:'" % path, head_tail[0])
        # print("Tail of '% s:'" % path, head_tail[1], "\n")
        if not current_path_head:
            current_path_head = head_tail[0]
        else:
            if current_path_head != head_tail[0]:
                outputs_matrix = np.asmatrix(current_array)
                # print('outputs_matrix = ', outputs_matrix)
                print('outputs_matrix.shape = ', outputs_matrix.shape)
                # scipy.io.savemat()
                # io.savemat([os.path.basename(current_path_head)+'.mat'], {'feature': outputs_matrix, 'label': labels_array})
                print('last_labels_array = ', last_labels_array)
                io.savemat(current_path_head+'.mat', {'feature': outputs_matrix, 'label': last_labels_array})
                print('Save mat file to:', current_path_head+'.mat')
                current_path_head = head_tail[0]
                current_array = np.array([])
        last_labels_array = labels_array

        images = images.to(device)
        outputs = features32(model,images)
        # print('outputs = ', outputs)
        outputs_array = outputs.cpu().detach().numpy()

        # np.reshape(outputs_array, (1, outputs_array.size))
        # print('outputs_array = ', outputs_array)
        # print('outputs_array.size = ', outputs_array.size)
        if current_array.size == 0:
            current_array = outputs_array
            # print('current_array = ', current_array)
            # print('current_array.shape = ', current_array.shape)
        else:
            # current_array = np.append(current_array, outputs_array, axis=0)
            current_array = np.vstack([current_array, outputs_array])
            # print('current_array = ', current_array)
            # print('current_array.shape = ', current_array.shape)

    # Save for the last folder:
    outputs_matrix = np.asmatrix(current_array)
    # print('outputs_matrix = ', outputs_matrix)
    print('outputs_matrix.shape = ', outputs_matrix.shape)
    # scipy.io.savemat()
    # io.savemat([os.path.basename(current_path_head)+'.mat'], {'feature': outputs_matrix, 'label': labels_array})
    print('labels_array = ', labels_array)
    io.savemat(current_path_head+'.mat', {'feature': outputs_matrix, 'label': labels_array})
    print('Save mat file to:', current_path_head+'.mat')