In [None]:
#Loading Libraries
import numpy as np
import torch

import os
from os import listdir
from os.path import isfile, join
from PIL import Image

import time

import torch.utils.data as data_utils
from torch.nn import CrossEntropyLoss
from torch import nn
from torch.optim import Adam, lr_scheduler

from torchvision.datasets import CIFAR100
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from torch.utils.data import TensorDataset,DataLoader

from scipy.io import loadmat
from scipy.io import savemat

try:
    from torchvision.transforms import InterpolationMode
    BICUBIC = InterpolationMode.BICUBIC
except ImportError:
    BICUBIC = Image.BICUBIC

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

In [None]:
import xml.etree.ElementTree as ET
def ParseAnoXml(path):
    files = [f for f in listdir(path) if isfile(join(path, f))]
    classes={}
    nf=len(files)
    for i in range(nf):
        f=files[i]
        tree = ET.parse(path+f)
        root = tree.getroot()
        child=root.findall("./object/name")[0]
        fj=f[0:-4]+'.jpeg'
        if child.text in classes.keys():
            classes[child.text].append(fj)
        else:
            classes[child.text]=[fj]
        if i%1000==0:
            print(i,fj)
    return classes
path='C:/Datasets/ILSVRC2016/ILSVRC/Annotations/CLS-LOC/val/'
classes=ParseAnoXml(path)
torch.save(classes,'C:/Datasets/ILSVRC2016/classes_val.pth')
print('done')

In [None]:
# The CLIP model
import clip

def _transform(n_px):
    return Compose([
        Resize(n_px, interpolation=BICUBIC),
        CenterCrop(n_px),
        _convert_image_to_rgb,
        ToTensor(),
        Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
    ])

def features(net, x):
    #print(0,x.shape)
    x = x.type(net.conv1.weight.dtype)
    for conv, bn in [(net.conv1, net.bn1), (net.conv2, net.bn2), (net.conv3, net.bn3)]:
        x = net.relu(bn(conv(x)))
    x = net.avgpool(x)
    x = net.layer1(x)
    x = net.layer2(x)
    x = net.layer3(x)
    x = net.layer4(x)
    x = net.attnpool(x)
    #x=F.avg_pool2d(x,x.shape[2],1) For CIFAR
    return x

print(clip.available_models())
model, preprocess = clip.load('RN50x4', device)

In [None]:
# The SWSL Model from 
# Billion-scale semi-supervised learning for image classification, https://arxiv.org/abs/1905.00546

def features(net, x):
    # See note [TorchScript super()]
    x = net.conv1(x)
    x = net.bn1(x)
    x = net.relu(x)
    x = net.maxpool(x)

    x = net.layer1(x)
    x = net.layer2(x)
    x = net.layer3(x)
    x = net.layer4(x)
    
    #print(x.shape)
    #x = net.avgpool(x)
    x = F.avg_pool2d(x,x.shape[2],1)
    x = torch.flatten(x, 1)
    return x

def _transform(n_px):
    return Compose([
        Resize(n_px, interpolation=BICUBIC),
        CenterCrop(n_px),
        _convert_image_to_rgb,
        ToTensor(),
        Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

torch.hub.list('facebookresearch/semi-supervised-ImageNet1K-models')
model = torch.hub.load('facebookresearch/semi-supervised-ImageNet1K-models', 'resnet50_swsl')
model=model.to(device)
model.eval()

In [None]:
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14_reg')
model.eval()

In [None]:
#path='d:/datasets/Imagenet/'
path = 'd:/datasets/ILSVRC2016/'
name = path+'train.txt'
with open(name, 'r') as f:
    names = f.read().splitlines()
nc=len(names)
print(nc)

In [None]:
# compute features Imagenet and save them
from os import listdir
from os.path import isfile, join
from PIL import Image
from torch.utils.data import Dataset,TensorDataset, DataLoader
from torchvision import transforms
from typing import Sequence

# class GaussianBlur(transforms.RandomApply):
#     """
#     Apply Gaussian Blur to the PIL image.
#     """

#     def __init__(self, *, p: float = 0.5, radius_min: float = 0.1, radius_max: float = 2.0):
#         # NOTE: torchvision is applying 1 - probability to return the original image
#         keep_p = 1 - p
#         transform = transforms.GaussianBlur(kernel_size=9, sigma=(radius_min, radius_max))
#         super().__init__(transforms=[transform], p=keep_p)

class MaybeToTensor(transforms.ToTensor):
    """
    Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor, or keep as is if already a tensor.
    """

    def __call__(self, pic):
        """
        Args:
            pic (PIL Image, numpy.ndarray or torch.tensor): Image to be converted to tensor.
        Returns:
            Tensor: Converted image.
        """
        if isinstance(pic, torch.Tensor):
            return pic
        return super().__call__(pic)


# Use timm's names
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)

def make_normalize_transform(
    mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
    std: Sequence[float] = IMAGENET_DEFAULT_STD,
) -> transforms.Normalize:
    return transforms.Normalize(mean=mean, std=std)

# This roughly matches torchvision's preset for classification training:
#   https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L6-L44
def make_classification_train_transform(
    *,
    crop_size: int = 224,
    interpolation=transforms.InterpolationMode.BICUBIC,
    hflip_prob: float = 0.5,
    mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
    std: Sequence[float] = IMAGENET_DEFAULT_STD,
):
    transforms_list = [transforms.RandomResizedCrop(crop_size, interpolation=interpolation)]
    if hflip_prob > 0.0:
        transforms_list.append(transforms.RandomHorizontalFlip(hflip_prob))
    transforms_list.extend(
        [
            MaybeToTensor(),
            make_normalize_transform(mean=mean, std=std),
        ]
    )
    return transforms.Compose(transforms_list)


# This matches (roughly) torchvision's preset for classification evaluation:
#   https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L47-L69
def make_classification_eval_transform(
    *,
    resize_size: int = 256,
    interpolation=transforms.InterpolationMode.BICUBIC,
    crop_size: int = 224,
    mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
    std: Sequence[float] = IMAGENET_DEFAULT_STD,
) -> transforms.Compose:
    transforms_list = [
        transforms.Resize(resize_size, interpolation=interpolation),
        transforms.CenterCrop(crop_size),
        MaybeToTensor(),
        make_normalize_transform(mean=mean, std=std),
    ]
    return transforms.Compose(transforms_list)
    
def LoadImages(transform,path,files): 
    n=len(files)
    x=[]
    for i in range(n):
        im = Image.open(join(path, files[i]))
        #print(i,files[i])
        x.append(transform(im.convert("RGB")))
    x=torch.stack(x)
    print(x.shape)
    return x

def ComputeFeatures(model,x,batch_size=16): 
    data=TensorDataset(x)
    loader=DataLoader(data,batch_size=batch_size,shuffle=False)
    X=[]
    for images in loader:
        images=images[0].to(device)
        with torch.no_grad():
            fi=model(images)   
        X.append(fi.cpu())
        #print(len(X),fi.shape)
    X=torch.cat(X,dim=0)
    return X.squeeze()

model=dinov2_vitl14_reg.to(device)
preprocess = make_classification_eval_transform()
for i in range(426,nc):
    path='d:/Datasets/ILSVRC2016/ILSVRC/Data/CLS-LOC/train/'+names[i]
    files = [f for f in listdir(path) if isfile(join(path, f))]
    x=LoadImages(preprocess,path,files)
    x=ComputeFeatures(dinov2_vitl14_reg,x.to(device),256)
    name='d:/Datasets/ILSVRC2016/dinov2_vitl14_reg/'+names[i]+'.mat'
    print(i,name)
    savemat(name,{'feature':x.float().numpy()})

In [None]:
classes=torch.load('d:/Datasets/ILSVRC2016/classes_val0.pth')
path='d:/Datasets/ILSVRC2016/ILSVRC/Data/CLS-LOC/val/'
for i in range(nc):
    files = classes[names[i]]
    x=LoadImages(preprocess,path,files)
    x=ComputeFeatures(dinov2_vitl14_reg,x.to(device),256)
    name='d:/Datasets/ILSVRC2016/dinov2_vitl14_reg_val/'+names[i]+'.mat'
    print(i,name)
    savemat(name,{'feature':x.float().numpy()})

In [None]:
def _convert_image_to_rgb(image):
    return image.convert("RGB")

def _transform(n_px):
    return Compose([
        Resize(n_px, interpolation=BICUBIC),
        CenterCrop(n_px),
        _convert_image_to_rgb,
        ToTensor(),
        Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
    ])
nx=144

import clip
print(clip.available_models())
model, preprocess = clip.load('RN50x4', device)

relu = torch.nn.functional.relu
def features(net, x):
    x = x.type(net.conv1.weight.dtype)
    for conv, bn in [(net.conv1, net.bn1), (net.conv2, net.bn2), (net.conv3, net.bn3)]:
        x = relu(bn(conv(x)))
    x = net.avgpool(x)
    x = net.layer1(x)
    x = net.layer2(x)
    x = net.layer3(x)
    x = net.layer4(x)
    x=net.avgpool(x) 
    x=net.avgpool(x)
    return x

# Download the dataset
cifar100_train = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=True, transform=_transform(nx))
cifar100_test = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=False, transform=_transform(nx))

In [None]:
from tqdm import tqdm
def generate_features(dataset,model):
    labels = torch.empty(0).cpu()
    i = 0
    for images, labs in tqdm(DataLoader(dataset, batch_size=100)):
            images = images.to(device)
            labs = labs.cpu()
            with torch.no_grad():
                f = features(model.visual,images) 
                f = f.squeeze((2,3))
            f=f.cpu()
            if i==0:
                d=f.shape[1]
                feat = torch.empty(0, d).cpu()
            feat = torch.cat((feat,f),dim=0)
            labels = torch.cat((labels , labs),dim=0)
            i = i+1
    return feat, labels

features_train,labels_train = generate_features(cifar100_train,model)
features_test,labels_test = generate_features(cifar100_test,model)

for i in range(100):
    name = r'D:\datasets\Cifar100\Clip\train'+'{}'.format(i) +'.mat'
    x = features_train[labels_train == i,:]
    savemat(name,{'feature':x.float().numpy()})
    name = r'D:\datasets\Cifar100\Clip\val'+'{}'.format(i) +'.mat'
    x = features_test[labels_test == i,:]
    savemat(name,{'feature':x.float().numpy()})
    if i%10==0:
        print(i)
print('All features saved')