In [1]:
import torch
import PIL.Image
from datasets import load_dataset
import torch.nn as nn
import os
from os import listdir
import pandas as pd
import numpy
from torchvision import datasets, models, transforms
from CustomDataset_org import CustomDataset
import requests
import csv
from transformers import AutoImageProcessor, SwinModel
from transformers import ViTFeatureExtractor, ViTModel
from transformers import AutoProcessor, CLIPModel
from transformers import AutoImageProcessor, BeitModel

In [2]:
import transformers
print(transformers.__version__)

4.29.2


In [4]:
def create_dataloader(folder_name,main_path):
    image_dir = main_path+"/Generated_Images/"+folder_name
    csv_dir = main_path +"/CSV/org_csv/"
#     csv_dir = main_path +"/GVC_CSV/org_csv/"
    data_transforms = {
    'transform': transforms.Compose([
        transforms.ToTensor()
    ])}
    
    dataset = CustomDataset(csv_dir + folder_name+"_sent.csv", root_dir = image_dir, transform = data_transforms['transform'])
    print(folder_name + " set size: ",len(dataset))
    dataloader_dict = {'data': torch.utils.data.DataLoader(dataset, batch_size=50, shuffle=False)}
    
    return dataloader_dict


In [5]:
def generate_embeddings(model_name,dataloader,folder_name):
    device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
    
    if model_name == 'ViT':
        feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
        model = ViTModel.from_pretrained('google/vit-base-patch16-224')
        
    elif model_name == 'BEiT': 
        image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
        model = BeitModel.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
        
    elif model_name == 'CLIP':
        model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
        processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
        
    elif model_name == 'SWIN': 
        image_processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
        model = SwinModel.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
    
    else:
         print("Incorrect Model Name.")
            
    model.to(device)
    model.eval()
    embeddings = {}
    print("Generating Embeddings for "+folder_name+" set ...")
    for i,(names,images) in enumerate(dataloader['data']):

            print(i)
            if model_name == 'ViT':
                inputs = feature_extractor(images=images.to(device), return_tensors="pt")

            elif model_name == 'BEiT':
                inputs = image_processor(images, return_tensors="pt")

            elif model_name == 'CLIP':
                inputs = processor(text=sent , images=images, return_tensors="pt", padding=True)

            elif model_name == 'SWIN':
                inputs =image_processor(images, return_tensors="pt")

            else:
                print("Incorrect Model Name.")


            inputs = inputs.to(device)
            if model_name == 'SWIN':
                with torch.no_grad():
                    outputs = model(**inputs)
            else:
                outputs = model(**inputs)

            if model_name =='CLIP':
                pooled = outputs.vision_model_output.pooler_output
            else:
                pooled= outputs.pooler_output

            for j,(n,p) in enumerate(zip(names,pooled.cpu().detach().numpy())):
                
                embeddings[n] = p 
    print("Done")       
    torch.cuda.empty_cache()
    return embeddings

In [6]:
main_path = '/Users/emergencyaccount/Downloads/ECBImages'
dataloader_train = create_dataloader("train",main_path)
dataloader_test = create_dataloader("test",main_path)
dataloader_dev = create_dataloader("dev",main_path)

traincopy set size:  97
test set size:  982
dev set size:  982


In [7]:
model_names = ['ViT','CLIP','BEiT','SWIN']
embeddings_train = generate_embeddings(model_names[0],dataloader_train,'train')
embeddings_test = generate_embeddings(model_names[0],dataloader_test,'test')
embeddings_dev = generate_embeddings(model_names[0],dataloader_dev,'dev')


Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.weight', 'vit.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Generating Embeddings for traincopy set ...


FileNotFoundError: [Errno 2] No such file or directory: '/Users/emergencyaccount/Downloads/ECBImages/Generated_Images/traincopy/1_4ecbplus_xml.jpg'

In [None]:
#saving the models' embeddings for train, test and dev set
torch.save(embeddings_train,"GVC_Embeddings/"+model_names[0]+"/train")
torch.save(embeddings_test,"GVC_Embeddings/"+model_names[0]+"/test")
torch.save(embeddings_dev,"GVC_Embeddings/"+model_names[0]+"/dev")