In [None]:
import torch
import pandas as pd
import numpy as np
from transformers import LxmertTokenizer, LxmertConfig, LxmertModel, get_scheduler
from modeling_frcnn import GeneralizedRCNN
import utils
from processing_image import Preprocess
from torch.optim import AdamW
from torch.utils.data import DataLoader
import lmdb
import pickle
import time
import io
from tqdm import tqdm
import math

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self,dataset_path,image_path, only_vsr = False, vsr = None, vsr_image_path = './data/vsr-images', max_length = 77):
        self.lxmert_tokenizer = LxmertTokenizer.from_pretrained("unc-nlp/lxmert-base-uncased")
        self.max_length = max_length
        self.dataset_path = dataset_path
        self.image_path = image_path
        self.img_txns = [None, None]
        if(only_vsr):#assess only for the vsr dataset
            self.dataset = self.read_vsr_dataset(vsr)
            vsr_img_env = lmdb.open(
                vsr_image_path, readonly=True, create=False, readahead=not False
            )
            vsr_img_txn = vsr_img_env.begin(buffers=True)
            self.img_txns[1]= vsr_img_txn
        else:#assess for the SNLI-VE dataset
            self.dataset = self.read_dataset(dataset_path)
            img_env = lmdb.open(
                self.image_path, readonly=True, create=False, readahead=not False
            )
            img_txn = img_env.begin(buffers=True)
            self.img_txns[0]= img_txn
            if(vsr):#add extra data to make model more reliable
                vsr_img_env = lmdb.open(vsr_image_path, readonly=True, create=False, readahead=not False)
                vsr_img_txn = vsr_img_env.begin(buffers=True)
                self.img_txns[1]= vsr_img_txn
                self.dataset = pd.concat([self.dataset, self.read_vsr_dataset('train.json'),
                                self.read_vsr_dataset('test.json'), self.read_vsr_dataset('dev.json')], ignore_index=True)
    
    def read_vsr_dataset(self,dataset_name, dataset_path = '../visual-spatial-reasoning/',splits_path='splits/', 
                         image_path = 'images/',sort = False, encode_labels = False):
        dataset = pd.read_json(dataset_path+splits_path+dataset_name, lines =True)
        dataset = dataset[['caption','image','label']]
        dataset.rename(columns = {'caption':'hypothesis', 'image':'Flickr30kID', 'label' : 'gold_label'}, inplace = True)
        if encode_labels:
            labels_encoding = {0:0,1:2}#leave the label 0 the same and convert 1 to 2 to mean entailment
            dataset['gold_label']=dataset['gold_label'].apply(lambda label: labels_encoding[label])
        if(dataset_name=='train.json'):
            dataset.drop(labels=[1786,3569,4553,4912], axis=0, inplace = True)
        elif(dataset_name=='test.json'):
            dataset.drop(labels=[135,614,1071,1621,1850], axis=0, inplace = True)
        elif(dataset_name=='dev.json'):
            dataset.drop(labels=[807], axis=0, inplace = True)
        dataset['img_txn'] = pd.Series(np.full((len(dataset.index)), 1, dtype=int), index=dataset.index)
        dataset.reset_index(drop=True, inplace=True)
        if sort:
            dataset.sort_values(by="hypothesis", key=lambda x: x.str.len(), inplace = True)
        return dataset
    
    def read_dataset(self, url,sort = False):
        dataset = pd.read_csv(url)
        labels_encoding = {'contradiction':0,'neutral': 1,
                           'entailment':2}
        dataset = dataset[['hypothesis','Flickr30kID','gold_label']]
        dataset['gold_label']=dataset['gold_label'].apply(lambda label: labels_encoding[label])
        dataset['img_txn'] = pd.Series(np.full((len(dataset.index)), 0, dtype=int), index=dataset.index)
        if sort:
            dataset.sort_values(by="hypothesis", key=lambda x: x.str.len(), inplace = True)
        return dataset
    
    def get_text_features(self,text): 
        #preprocess text
        inputs = self.lxmert_tokenizer(
            text,
            padding="max_length",
            max_length=self.max_length,
            truncation=True,
            return_token_type_ids=True,
            return_attention_mask=True,
            add_special_tokens=True,
            return_tensors="pt"
        )
        return inputs
    
    def __getitem__(self, idx):
        sample = self.dataset.loc[idx]
        img_name = sample['Flickr30kID']
        text = sample['hypothesis']
        label = sample['gold_label']
        inputs = self.get_text_features(text)
        item_img = pickle.loads(self.img_txns[sample['img_txn']].get(img_name.encode()))
        
        item = {'input_ids': inputs['input_ids'][0].to(torch.int32),
                'attention_mask': inputs['attention_mask'][0].to(torch.bool),
                'token_type_ids': inputs['token_type_ids'][0].to(torch.int32),
                'normalized_boxes': torch.tensor(item_img['normalized_boxes'][0], dtype = torch.float32),
                'features': torch.tensor(item_img['features'][0], dtype = torch.float32),
                'label': torch.tensor(label,dtype = torch.long)}
        return item

    def __len__(self):
        return len(self.dataset.index)
    
    def __exit__(self):
        self.img_env.close()
        self.env.close()

In [None]:
class MyTrainer():
    def __init__(self,model,train,eval_test, device = None, num_labels = 3):
        self.device = device
        self.model = model
        self.train = train
        self.eval_test = eval_test
        self.test_acc_list = []#init
        self.model_path = "./models/new_my_model_epoch_"
        self.num_labels = num_labels
        self.config_problem_type = "single_label_classification"
        if self.config_problem_type == "single_label_classification":
          self.loss_fct = torch.nn.CrossEntropyLoss()
          self.output_loss = lambda output,labels : self.loss_fct(output.logits.view(-1, self.num_labels), labels.view(-1)) 
        elif self.config_problem_type == "regression":
          self.loss_fct = torch.nn.MSELoss()
          if self.num_labels == 1: self.output_loss = lambda output,labels : self.loss_fct(output.logits.squeeze(), labels.squeeze())
          else: self.output_loss =  lambda output,labels : self.loss_fct(output.logits, labels)
        elif self.config_problem_type == "multi_label_classification":
          self.loss_fct = torch.nn.BCEWithLogitsLoss()
          self.output_loss = lambda output,labels : self.loss_fct(output.logits, labels)

    def train_model(self,batch_size = None, lr= None, epochs=None):
        optimizer = AdamW(self.model.parameters(), lr=lr)
        train_loader = DataLoader(self.train, batch_size=batch_size, shuffle=True, num_workers = 4)
        lr_scheduler = get_scheduler(
            name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps= epochs * len(train_loader)
        )
        for epoch in range(epochs):
            progress_bar = tqdm(range(math.ceil(len(self.train)/batch_size)))
            train_losses = []
            for item in train_loader:
                """
                print(item.keys())
                for key, value in item.items() :
                    print(value.shape)
                    print(key,'\n',value)
                """
                item['input_ids']=item['input_ids'].to(self.device)
                item['attention_mask']= item['attention_mask'].to(self.device)
                item['token_type_ids']= item['token_type_ids'].to(self.device)
                item['normalized_boxes'] = item['normalized_boxes'].to(self.device)
                item['features']= item['features'].to(self.device)
                item['label'] = item['label'].to(self.device)
                optimizer.zero_grad()
                outputs = self.model.forward(item)
                label = item['label']
                loss = self.output_loss(outputs, label)
                train_losses.append(loss)
                loss.backward()
                optimizer.step()
                lr_scheduler.step()
                progress_bar.update(1)
            print("Saving model ....")
            model.save_model(self.model_path+str(epoch))
            print("Model Saved!")
            test_acc = self.eval_test.evaluate(batch_size = batch_size)
            self.test_acc_list.append(test_acc)
            print('--- Epoch ',epoch,' Acc: ',test_acc)
            mean_loss = torch.tensor(train_losses).mean().item()
            print('Training loss: %.4f' % (mean_loss))
        return

In [None]:
class MyEvaluator():
  def __init__(self,model,test, device = None):
    self.test_dataset = test
    self.model = model
    self.device = device
  
  def evaluate(self, batch_size = 8):
      self.model.eval()
      loader = DataLoader(self.test_dataset, batch_size=batch_size, shuffle = False, num_workers = 4)
      n_correct = 0
      n_possible = 0
      for item in loader:
        item['input_ids']=item['input_ids'].to(self.device)
        item['attention_mask']= item['attention_mask'].to(self.device)
        item['token_type_ids']= item['token_type_ids'].to(self.device)
        item['normalized_boxes'] = item['normalized_boxes'].to(self.device)
        item['features']= item['features'].to(self.device)
        item['label'] = item['label'].to(self.device)
        y_hat = self.model.predict(item)
        y = item['label']
        n_correct += (y == y_hat).sum().item()
        n_possible += float(y.shape[0])
      self.model.train()
      return n_correct / n_possible

In [None]:
class Lxmert(LxmertModel):
    def __init__(self,num_labels=3):
        super().__init__(LxmertConfig.from_pretrained("unc-nlp/lxmert-base-uncased"))
        self.num_labels = num_labels
        self.classification = torch.nn.Linear(self.config.hidden_size, self.num_labels)
        # don't forget to init the weights for the new layers
        #self.init_weights()
    
    def forward(self,item):       
        input_ids = item['input_ids']
        attention_mask=item['attention_mask']
        token_type_ids=item['token_type_ids']
        features = item['features']
        normalized_boxes = item['normalized_boxes']
        
        output = super().forward(
            input_ids=input_ids,
            attention_mask=attention_mask,
            visual_feats=features,
            visual_pos=normalized_boxes,
            token_type_ids=token_type_ids,
            return_dict=True,
            output_attentions=False,
        )
        
        output.logits = self.classification(output.pooled_output)
        return output
    
    def predict(self,item):
      """
      item (n_examples x n_features)
      """
      scores = model(item)  # (n_examples x n_classes)
      predicted_labels = scores.logits.argmax(dim=-1)  # (n_examples)
      return predicted_labels

    def save_model(self,path):
        torch.save(self.state_dict(), path)
        
    def load_model(self,path):
        self.load_state_dict(torch.load(path))

In [None]:
#device = "cpu"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.get_device_name(device)

In [None]:
model = Lxmert()

In [None]:
print(model)

In [None]:
train = MyDataset("../e-ViL/data/esnlive_train.csv",
                      "./data/my_image_db",
                      max_length = 77, only_vsr = True, vsr= 'train.json')
test = MyDataset("../e-ViL/data/esnlive_test.csv",
                      "./data/my_image_db",
                      max_length = 77,
                        only_vsr = True, vsr='test.json')
dev = MyDataset("../e-ViL/data/esnlive_dev.csv",
                      "./data/my_image_db",
                      max_length = 77,  only_vsr = True, vsr= 'dev.json')

In [None]:
print(len(train))
print(len(test))
print(len(dev))

In [None]:
task = 'train'
batch_size = 32
epochs = 10
lr = 1e-5/math.sqrt(2)
if task =='train':
    test_evaluator = MyEvaluator(model,test, device = device)
    dev_evaluator = MyEvaluator(model,dev, device = device)
    trainer = MyTrainer(model,train,test_evaluator, device = device)
    model = model.to(device)
    print("-----Training Model-----")
    trainer.train_model(epochs=epochs ,batch_size = batch_size, lr = lr)
    print('----Training finished-----')
    dev_acc = dev_evaluator.evaluate(batch_size = batch_size)
    print("---- Dev Acc: ",dev_acc)
elif task =='test':
    model = model.to(device)
    model.load_model("my_model_epoch_9")
    evaluator = MyEvaluator(model,dev, device = device)
    acc = evaluator.evaluate(batch_size = batch_size)
    print(acc)
    #output = run_example(model,train)

In [None]:
model.save_model('my_model')

In [None]:
#!/usr/bin/env python
import psutil
# gives a single float value
print(psutil.cpu_percent())
# gives an object with many fields
print(psutil.virtual_memory())
# you can convert that object to a dictionary 
print(dict(psutil.virtual_memory()._asdict()))
# you can have the percentage of used RAM
print(psutil.virtual_memory().percent)
# you can calculate percentage of available memory
print(psutil.virtual_memory().available * 100 / psutil.virtual_memory().total)
print(psutil.virtual_memory().total/1024/1024/1024)
print(psutil.virtual_memory().available/1024/1024/1024)

In [None]:
available_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())]
available_gpus

In [None]:
for i in range(torch.cuda.device_count()):
    #Check CUDA memory usage
    t = torch.cuda.get_device_properties(i).total_memory
    r = torch.cuda.memory_reserved(i)
    a = torch.cuda.memory_allocated(i)
    f = r-a  # free inside reserved
    name = torch.cuda.get_device_name(i)
    print('------- Device ',i,' name:',name,' ------')
    print('t ',t/1024/1024/1024)
    print('r ',r/1024/1024/1024)
    print('a ',a/1024/1024/1024)
    print('f ',f/1024/1024/1024)

In [None]:
%reset
import gc
import torch
gc.collect()
torch.cuda.empty_cache()
gc.collect()
del(variables)