In [None]:
#from PIL import Image
#import requests
#requests.__version__
#!pip install requests==2.27.1
!pip install transformers
!pip install wget
!pip install lmdb

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
import os
sys.path.append(os.path.abspath('/content/drive/MyDrive/teses/tese_MECD/implementation'))

In [None]:
%cd '/content/drive/MyDrive/teses/tese_MECD/implementation'

In [None]:
#url = "./data/flickr30k_images/flickr30k_images/5897297135.jpg"
#os.path.isfile(url)
#os.path.isfile('./data/flickr30k_images/flickr30k_images/4852389235.jpg')
#image = Image.open(requests.get(url, stream=True).raw).convert("RGB")

In [8]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import LxmertTokenizer, LxmertConfig, LxmertModel
from modeling_frcnn import GeneralizedRCNN
import utils
from processing_image import Preprocess
from torch.optim import AdamW
from torch.utils.data import DataLoader
import lmdb
import pickle
import time

In [2]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self,path,image_path,device):
        self.device = device
        self.path = path
        self.size = self.getSize()
        self.env = lmdb.open(
            path, readonly=True, create=False, readahead=not False
        )
        self.txn = self.env.begin(buffers=True)
        self.image_path = image_path
        self.img_env = lmdb.open(
            self.image_path, readonly=True, create=False, readahead=not False
        )
        self.img_txn = self.img_env.begin(buffers=True)

    
    def getSize(self):
        env = lmdb.open(self.path, readonly=True)
        stats = env.stat()
        count = stats['entries']
        env.close()
        return count
    
    def deserializeItem(self,item):
        item = pickle.loads(item)
        item['input_ids']=torch.tensor(item['input_ids'][0], dtype = torch.int32, device = self.device)
        item['attention_mask']=torch.tensor(item['attention_mask'][0], dtype = torch.int32, device = self.device)
        item['token_type_ids']=torch.tensor(item['token_type_ids'][0], dtype = torch.int32, device = self.device)
        item_img = pickle.loads(self.img_txn.get(item['img'].encode()))
        item['normalized_boxes']=torch.tensor(item_img['normalized_boxes'][0], dtype = torch.float32, device = self.device)
        item['features']=torch.tensor(item_img['features'][0], dtype = torch.float32, device = self.device)
        item['label']=torch.tensor(item['label'], device = self.device)
        return item

    def __getitem__(self, idx):
        item = self.txn.get(str(idx).encode())
        item = self.deserializeItem(item)
        return item

    def __len__(self):
        return self.size
    
    def __exit__(self):
        self.img_env.close()
        self.env.close()

In [3]:
class MyTrainer():
    def __init__(self,model,train,eval_test, numb_labels = 3):
        self.model = model
        self.train = train
        self.eval_test = eval_test
        self.test_acc_list = []#init
        self.model_path = "my_model_epoch_"
        self.num_labels = numb_labels
        self.config_problem_type = "single_label_classification"
        if self.config_problem_type == "single_label_classification":
          self.loss_fct = torch.nn.CrossEntropyLoss()
          self.output_loss = lambda output,labels : self.loss_fct(output.logits.view(-1, self.num_labels), labels.view(-1)) 
        elif self.config_problem_type == "regression":
          self.loss_fct = torch.nn.MSELoss()
          if self.num_labels == 1: self.output_loss = lambda output,labels : self.loss_fct(output.logits.squeeze(), labels.squeeze())
          else: self.output_loss =  lambda output,labels : self.loss_fct(output.logits, labels)
        elif self.config_problem_type == "multi_label_classification":
          self.loss_fct = torch.nn.BCEWithLogitsLoss()
          self.output_loss = lambda output,labels : self.loss_fct(output.logits, labels)

    def train_model(self,batch_size = None, lr= None, epochs=None):
        optimizer = AdamW(self.model.parameters(), lr=lr)
        train_loader = DataLoader(self.train, batch_size=batch_size, shuffle=True)
        for epoch in range(epochs):
            for item in train_loader:
                start = time.time()
                optimizer.zero_grad()
                outputs = self.model.forward(item)
                label = item['label']
                loss = self.output_loss(outputs, label)
                loss.backward()
                optimizer.step()
                end = time.time()
                print(end - start)
            print("Saving model ....")
            model.save_model(self.model_path+str(epoch))
            print("Model Saved!")
            test_acc = self.eval_test.evaluate(batch_size = batch_size)
            self.test_acc_list.append(test_acc)
            print('--- Epoch ',epoch,' Acc: ',test_acc)
        return

In [4]:
class MyEvaluator():
  def __init__(self,model,test):
    self.test_dataset = test
    self.model = model
  
  def evaluate(self, batch_size = 8):
      self.model.eval()
      loader = DataLoader(self.test_dataset, batch_size=batch_size)
      n_correct = 0
      n_possible = 0
      for item in loader:
        y_hat = self.model.predict(item)
        y = item['label']
        n_correct += (y == y_hat).sum().item()
        n_possible += float(y.shape[0])
      self.model.train()
      return n_correct / n_possible

In [5]:
class Lxmert(LxmertModel):
    def __init__(self,numb_labels=3):
        super().__init__(LxmertConfig.from_pretrained("unc-nlp/lxmert-base-uncased"))
        self.num_labels = numb_labels
        self.classification = torch.nn.Linear(self.config.hidden_size, self.num_labels)
        # don't forget to init the weights for the new layers
        self.init_weights()
    
    def forward(self,item):       
        input_ids = item['input_ids']
        attention_mask=item['attention_mask']
        token_type_ids=item['token_type_ids']
        features = item['features']
        normalized_boxes = item['normalized_boxes']
        
        output = super().forward(
            input_ids=input_ids,
            attention_mask=attention_mask,
            visual_feats=features,
            visual_pos=normalized_boxes,
            token_type_ids=token_type_ids,
            return_dict=True,
            output_attentions=False,
        )
        
        output.logits = self.classification(output.pooled_output)
        return output
    
    def predict(self,item):
      """
      item (n_examples x n_features)
      """
      scores = model(item)  # (n_examples x n_classes)
      predicted_labels = scores.logits.argmax(dim=-1)  # (n_examples)
      return predicted_labels

    def save_model(self,path):
        torch.save(self.state_dict(), path)
        
    def load_model(self,path):
        self.load_state_dict(torch.load(path))

In [6]:
device = "cpu"
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
task = 'train'
device

'cpu'

In [9]:
train = MyDataset("my_train_db",
                      "my_image_db",
                      device)
test = MyDataset("my_test_db",
                      "my_image_db",
                      device)
dev = MyDataset("my_dev_db",
                      "my_image_db",
                      device)
if task =='train':
    model = Lxmert()
    model = model.to(device)
    test_evaluator = MyEvaluator(model,test)
    trainer = MyTrainer(model,train,test_evaluator)
    print("-----Training Model-----")
    trainer.train_model(epochs=15,batch_size = 10, lr = 1e-3)
    print('----Training finished-----')
    dev_evaluator = MyEvaluator(model,dev)
    dev_acc = dev_evaluator.evaluate(batch_size = 10)
    print("---- Dev Acc: ",dev_acc)
elif task =='test':
    model = Lxmert()
    model.load_model("my_model")
    model = model.to(device)
    evaluator = MyEvaluator(model,dev)
    acc = evaluator.evaluate(batch_size = 10)
    print(acc)
    #output = run_example(model,train)

-----Training Model-----
20.82525873184204
22.463051080703735
15.623872995376587
15.226142883300781


KeyboardInterrupt: 

In [None]:
#%reset
import gc
torch.cuda.empty_cache()
from numba import cuda 
dev = cuda.get_current_device()
dev.reset()
gc.collect()

In [None]:
def run_example(model):
    data_path = '/content/drive/MyDrive/teses/tese_MECD/implementation/data/'
    train = pd.read_csv(data_path+'esnlive_train.csv')
    labels_encoding = {'contradiction':0,'neutral': 1,
                       'entailment':2}
    train['gold_label']=train['gold_label'].apply(lambda label: labels_encoding[label])
    img_path = data_path+'flickr30k_images/flickr30k_images/'+ train.loc[50,'Flickr30kID']#"32542645.jpg"
    question = train.loc[50,'hypothesis'] #"How many people are in the image?"
    label = train.loc[50,'gold_label']
    
    lxmert_tokenizer = LxmertTokenizer.from_pretrained("unc-nlp/lxmert-base-uncased")
    rcnn_cfg = utils.Config.from_pretrained("unc-nlp/frcnn-vg-finetuned")
    rcnn = GeneralizedRCNN.from_pretrained("unc-nlp/frcnn-vg-finetuned", config=rcnn_cfg)
    image_preprocess = Preprocess(rcnn_cfg)
    
    images, sizes, scales_yx = image_preprocess(img_path)
    
    #preprocess image
    output_dict = rcnn(
        images, 
        sizes, 
        scales_yx=scales_yx, 
        padding="max_detections",
        max_detections=rcnn_cfg.max_detections,
        return_tensors="pt"
    )
    
    #preprocess text
    inputs = lxmert_tokenizer(
        question,
        padding="max_length",
        max_length=20,
        truncation=True,
        return_token_type_ids=True,
        return_attention_mask=True,
        add_special_tokens=True,
        return_tensors="pt"
    )
    
    #Very important that the boxes are normalized
    normalized_boxes = output_dict.get("normalized_boxes")
    features = output_dict.get("roi_features")
    item = {'input_ids': inputs['input_ids'],
            'attention_mask': inputs['attention_mask'],
            'token_type_ids': inputs['token_type_ids'],
            'features':features, 
            'normalized_boxes':normalized_boxes, 
            'label':torch.LongTensor([label])}
    output = model.forward(inputs['input_ids'],inputs['attention_mask'],inputs['token_type_ids'],
                          features,normalized_boxes,torch.LongTensor([label]))
    m = torch.nn.Softmax(dim=1)
    probs = m(output.logits)
    print(img_path)
    print(question)
    print(label)
    print(probs)
    return output

run_example(model)

In [None]:
env = lmdb.open(
            "/content/drive/MyDrive/teses/tese_MECD/implementation/data/my_train_db", readonly=True
        )
txn = env.begin(buffers=True)
item = txn.get(str(10).encode())

In [None]:
item=pickle.loads(item)
item

In [None]:
img_env = lmdb.open(
            "/content/drive/MyDrive/teses/tese_MECD/implementation/data/my_image_db", readonly=True, create=False, readahead=not False
        )
img_txn = img_env.begin(buffers=True)
item_img = item['img']
image = img_txn.get(item_img.encode())
image = pickle.loads(image)
image


In [None]:
%ls

In [None]:
path = "/content/drive/MyDrive/teses/tese_MECD/implementation/my_model_epoch_"
epoch = 1
path+str(epoch)