# CoRaX

In [1]:
cd C-Tran/

/home/cougarnet.uh.edu/aawasth3/C-Tran


In [2]:
import torch
import random
import numpy as np
import os
seed=0
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(0)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

In [3]:
args={
    'dataroot':'/home/cougarnet.uh.edu/aawasth3/Eye_Gaze_Research_Data_Set/images/jpg/',
    'dataset':'coco',
    'workers':0,
    'results_dir':'results2/',
    'test_known':0,
    
    
    

    # Optimization
    'optim':'adam',
    'lr':0.0002,
   'batch_size':32,
    'test_batch_size':-1,
    'grad_ac_steps':1,
    'scheduler_step':1000,
    'scheduler_gamma':0.1,
    'epochs' :100,
    'int_loss':0.0,
    'aux_loss' :0.0,
    'loss_type' :'bce',
    'scheduler_type' :'plateau',
    'loss_labels':'all',
   'lr_decay':0,
    'weight_decay':1e-4,
    'max_samples':-1,
    'max_batches':-1,
    'warmup_scheduler':'',

    # Model
    'layers':3,
   'heads':4,
    'dropout':0.1,
    'pos_emb':False, 
    'use_lmt':True,
    'freeze_backbone':False,
    'no_x_features':False,

    
    
    # Image Sizes
    'scale_size':640,
    'crop_size':576,

    # Testing Models
    'inference':True,
    'resume':False,
    'saved_model_name':'',
    
    'overwrite':False,
    'name':'',
    'num_labels':14,
    'epoch':1,
    'train_known_labels':0,
    'test_known_labels':0,
    'attr_group_dict':'',
    
    'n_groups':10
    
}




import pandas as pd
args=pd.Series(args)

In [4]:


from torch.utils.data import Dataset, DataLoader
import pickle
from pdb import set_trace as stop
from dataloaders.data_utils import get_unk_mask_indices,image_loader

class Coco80Dataset(Dataset):

    def __init__(self, split,num_labels,data_file,img_root,annotation_dir,max_samples=-1,transform=None,known_labels=0,testing=False,analyze=False):
        self.split=split


        with open(data_file[:-1], "rb") as f:
                 self.split_data=pickle.load(f)
        #self.split_data = pickle.load(open(data_file,'rb'))
        
        if max_samples != -1:
            self.split_data = self.split_data[0:max_samples]

        self.img_root = img_root
        self.transform = transform
        self.num_labels = num_labels
        self.known_labels = known_labels
        self.testing=testing
        self.epoch = 1

    def __len__(self):
        return len(self.split_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image_ID = self.split_data[idx]['file_name']

        img_name = os.path.join(self.img_root,image_ID)
        image = image_loader(img_name,self.transform)

        labels = self.split_data[idx]['object']
        labels = torch.Tensor(labels)

        unk_mask_indices = get_unk_mask_indices(image,self.testing,self.num_labels,self.known_labels)
        
        mask = labels.clone()
        mask.scatter_(0,torch.Tensor(unk_mask_indices).long() , -1)

        sample = {}
        sample['image'] = image
        sample['labels'] = labels
        sample['mask'] = mask
        sample['imageIDs'] = image_ID
        return sample




In [5]:

from skimage import io, transform

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from pdb import set_trace as stop


import warnings
warnings.filterwarnings("ignore")


def get_data(args):
    dataset = args.dataset
    data_root=args.dataroot
    batch_size=args.batch_size

    rescale=args.scale_size
    random_crop=args.crop_size
    attr_group_dict=args.attr_group_dict
    workers=args.workers
    n_groups=args.n_groups

    normTransform = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    scale_size = rescale
    crop_size = random_crop
    if args.test_batch_size == -1:
        args.test_batch_size = batch_size
    
    trainTransform = transforms.Compose([transforms.Resize((scale_size, scale_size)),
                                        transforms.RandomChoice([
                                        transforms.RandomCrop(640),
                                        transforms.RandomCrop(576),
                                        transforms.RandomCrop(512),
                                        transforms.RandomCrop(384),
                                        transforms.RandomCrop(320)
                                        ]),
                                        transforms.Resize((crop_size, crop_size)),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        normTransform])

    testTransform = transforms.Compose([transforms.Resize((scale_size, scale_size)),
                                        transforms.CenterCrop(crop_size),
                                        transforms.ToTensor(),
                                        normTransform])

    test_dataset = None
    test_loader = None
    drop_last = False
    if dataset == 'coco':
        coco_root = os.path.join(data_root,'coco')
        ann_dir = data_root
        train_img_root = data_root
        test_img_root = data_root
        
        val_data_name = 'val_test.data'
        
        
        valid_dataset = Coco80Dataset(split='val',
            num_labels=args.num_labels,
            data_file=os.path.join(data_root,val_data_name),
            img_root=test_img_root,
            annotation_dir=ann_dir,
            max_samples=args.max_samples,
            transform=testTransform,
            known_labels=args.test_known_labels,
            testing=True)

   

    
    if valid_dataset is not None:
        valid_loader = DataLoader(valid_dataset, batch_size=args.test_batch_size,shuffle=False, num_workers=workers)
   

    return valid_loader

valid_loader=get_data(args)

In [6]:
import torch
import torch.nn as nn
import argparse,math,numpy as np

from models import CTranModel

from config_args import get_args
import utils.evaluate as evaluate
import utils.logger as logger
from pdb import set_trace as stop
from optim_schedule import WarmupLinearSchedule
from run_epoch import run_epoch



print('Labels: {}'.format(args.num_labels))
print('Train Known: {}'.format(args.train_known_labels))
print('Test Known:  {}'.format(args.test_known_labels))




model = CTranModel(args.num_labels,args.use_lmt,args.pos_emb,args.layers,args.heads,args.dropout,args.no_x_features)
print(model.self_attn_layers)


def load_saved_model(saved_model_name,model):
    checkpoint = torch.load('/home/cougarnet.uh.edu/aawasth3/C-Tran/results/coco.3layer.bsz_16.adam1e-05.lmt.unk_loss/best_model.pt')
    model.load_state_dict(checkpoint['state_dict'])
    return model
#device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
#print(args.model_name)
if torch.cuda.device_count() > 1:
    #print("Using", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model,device_ids=[0])

model = model.cuda()



Labels: 14
Train Known: 0
Test Known:  0
ModuleList(
  (0): SelfAttnLayer(
    (transformer_layer): TransformerEncoderLayer(
      (self_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=2048, out_features=2048, bias=True)
      )
      (linear1): Linear(in_features=2048, out_features=2048, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (linear2): Linear(in_features=2048, out_features=2048, bias=True)
      (norm1): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
      (dropout1): Dropout(p=0.1, inplace=False)
      (dropout2): Dropout(p=0.1, inplace=False)
    )
  )
  (1): SelfAttnLayer(
    (transformer_layer): TransformerEncoderLayer(
      (self_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=2048, out_features=2048, bias=True)
      )
      (linear1): Linear(in_features=2048, out_features=2048, bias=True)


In [7]:

model = load_saved_model(args.saved_model_name,model)
   
data_loader =valid_loader
   
    
all_preds,all_targs,all_masks,all_ids,test_loss,test_loss_unk = run_epoch(args,model,data_loader,None,1,'Testing')
    #test_metrics = evaluate.compute_metrics(args,all_preds,all_targs,all_masks,test_loss,test_loss_unk,0,args.test_known_labels)

 

                                                  

In [8]:
x=all_preds

In [9]:
for o in range(len(x)):
          for n in range(14):
                if x[o][n]<0:
                    x[o][n]=0


In [10]:
for o in range(len(x)):
          for n in range(14):
                if x[o][n]>1:
                    x[o][n]=1

In [11]:
import pickle
with open('/home/cougarnet.uh.edu/aawasth3/Eye_Gaze_Research_Data_Set/images/jpg/train.dat', "rb") as f:
                 split_data=pickle.load(f)

In [12]:
import pickle 
# laod a pickle file
with open("/home/cougarnet.uh.edu/aawasth3/VidChapters/YouCook2/2summary_masked_one_disease_youcook2_asr_align_proc.pkl", "rb") as file:
    loaded_dictm = pickle.load(file)

# display the dictionary
print(loaded_dictm)

{'ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d': {'text': ['Cardiomegaly', 'Pleural Effusion', 'Pneumothorax'], 'start': 2.7, 'end': 27.0}, 'eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0': {'text': ['normal heart and mediastinum.'], 'start': 1.3, 'end': 4.9}, '475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd': {'text': ['normal heart and mediastinum.'], 'start': 3.0, 'end': 7.5}, '2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0': {'text': ['right costophrenic angle is not completely included. normal heart', 'and mediastinum.'], 'start': 1.2, 'end': 8.4}, '12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652': {'text': ['minimal haziness at the left lung base could be related to epicardial fat.', 'tortuous aorta.', 'moderate scoliosis.'], 'start': 7.7, 'end': 20.0}, '75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29': {'text': ['Cardiomegaly', 'Pleural Effusion', 'thoracic spinal degeneration.', 'right ac joint arthritis.'], 'start': 1.7000000000000002, 'end': 16.5}, 'b4f219eb-0bdbf37a-c33e70a8-385ac112-2f4

In [13]:
for m in split_data:
    item = 1
    print(m['object'])
 
    if m['object'][0]==1:
        loaded_dictm[m['image_id']]['text']=['No Finding.']

[0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0]
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0]
[0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0]
[0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1]
[0, 0, 0, 0

In [14]:
dise=['No Finding.', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
               'Lung Lesion', 'Lung Opacity', 'Edema', 'Consolidation', 'Pneumonia',
               'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other',
               'Fracture', 'Support Devices']
for m in range(len(split_data)):
    item = 1
    print(x[m])
    indices = [i for i in range(len(x[m])) if x[m][i] == item]
    print(indices)
    s=[]
    for b in indices:
        s.append(dise[b])
    split_data[m]['object']=s
    

tensor([0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.])
[2, 5, 7]
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
[0]
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
[0]
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
[0]
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
[]
tensor([0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.])
[2, 5, 10]
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
[]
tensor([0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.])
[2, 4, 8]
tensor([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0.])
[4, 11]
tensor([0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.])
[2, 5]
tensor([0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 0., 0., 0.])
[2, 4, 5, 7, 8, 10]
tensor([0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0.])
[2, 4, 5, 7, 10]
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
[0]
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [15]:
split_data

[{'image_id': 'ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d',
  'object': ['Cardiomegaly', 'Edema', 'Pneumonia'],
  'file_name': 'ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d.jpg'},
 {'image_id': 'eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0',
  'object': ['No Finding.'],
  'file_name': 'eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0.jpg'},
 {'image_id': '475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd',
  'object': ['No Finding.'],
  'file_name': '475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd.jpg'},
 {'image_id': '2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0',
  'object': ['No Finding.'],
  'file_name': '2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0.jpg'},
 {'image_id': '12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652',
  'object': [],
  'file_name': '12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652.jpg'},
 {'image_id': '75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29',
  'object': ['Cardiomegaly', 'Edema', 'Pleural Effusion'],
  'file_name': '75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29.jpg'},
 {'ima

In [16]:
loaded_dictm

{'ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d': {'text': ['Cardiomegaly',
   'Pleural Effusion',
   'Pneumothorax'],
  'start': 2.7,
  'end': 27.0},
 'eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0': {'text': ['No Finding.'],
  'start': 1.3,
  'end': 4.9},
 '475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd': {'text': ['No Finding.'],
  'start': 3.0,
  'end': 7.5},
 '2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0': {'text': ['No Finding.'],
  'start': 1.2,
  'end': 8.4},
 '12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652': {'text': ['minimal haziness at the left lung base could be related to epicardial fat.',
   'tortuous aorta.',
   'moderate scoliosis.'],
  'start': 7.7,
  'end': 20.0},
 '75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29': {'text': ['Cardiomegaly',
   'Pleural Effusion',
   'thoracic spinal degeneration.',
   'right ac joint arthritis.'],
  'start': 1.7000000000000002,
  'end': 16.5},
 'b4f219eb-0bdbf37a-c33e70a8-385ac112-2f486ffc': {'text': ['prominent heart.',
   'small amount of f

In [17]:

for f in split_data:
    loaded_dictm[f['image_id']]['text']=list(set(f['object']).union(set(loaded_dictm[f['image_id']]['text'])))
    

In [18]:

for f in split_data:
    #loaded_dictm[f['image_id']]['text']=list(set(f['object']).union(set(loaded_dictm[f['image_id']]['text'])))
    
    common_list = [x for x in f['object'] if x in loaded_dictm[f['image_id']]['text']]
    
    uncommon_list=[i for i in f['object'] if i not in loaded_dictm[f['image_id']]['text']] + [i for i in loaded_dictm[f['image_id']]['text'] if i not in f['object']]
    loaded_dictm[f['image_id']]['text']=uncommon_list+common_list

In [19]:
loaded_dictm['4c52c619-3d060093-a32dbfea-64bd3d30-081d8712']

{'text': ['aortic calcifications prominent pulmonary vessels.',
  'sternotomy wires.',
  'Cardiomegaly',
  'Lung Opacity',
  'Edema',
  'Pleural Effusion'],
 'start': 0.8,
 'end': 13.8}

In [20]:
loaded_dictm

{'ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d': {'text': ['Pneumothorax',
   'Pleural Effusion',
   'Cardiomegaly',
   'Edema',
   'Pneumonia'],
  'start': 2.7,
  'end': 27.0},
 'eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0': {'text': ['No Finding.'],
  'start': 1.3,
  'end': 4.9},
 '475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd': {'text': ['No Finding.'],
  'start': 3.0,
  'end': 7.5},
 '2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0': {'text': ['No Finding.'],
  'start': 1.2,
  'end': 8.4},
 '12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652': {'text': ['moderate scoliosis.',
   'tortuous aorta.',
   'minimal haziness at the left lung base could be related to epicardial fat.'],
  'start': 7.7,
  'end': 20.0},
 '75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29': {'text': ['right ac joint arthritis.',
   'thoracic spinal degeneration.',
   'Cardiomegaly',
   'Edema',
   'Pleural Effusion'],
  'start': 1.7000000000000002,
  'end': 16.5},
 'b4f219eb-0bdbf37a-c33e70a8-385ac112-2f486ffc': {'text': ['

In [21]:


for p in loaded_dictm.items():
    print(p)
    for u in range(len(p[1]['text'])):
        
                
                if p[1]['text'][u][-1]=='.':
                      continue
                else:
                      loaded_dictm[p[0]]['text'][u]=p[1]['text'][u]+'.'
    loaded_dictm[p[0]]['text']=''.join(loaded_dictm[p[0]]['text'])



('ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d', {'text': ['Pneumothorax', 'Pleural Effusion', 'Cardiomegaly', 'Edema', 'Pneumonia'], 'start': 2.7, 'end': 27.0})
('eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0', {'text': ['No Finding.'], 'start': 1.3, 'end': 4.9})
('475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd', {'text': ['No Finding.'], 'start': 3.0, 'end': 7.5})
('2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0', {'text': ['No Finding.'], 'start': 1.2, 'end': 8.4})
('12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652', {'text': ['moderate scoliosis.', 'tortuous aorta.', 'minimal haziness at the left lung base could be related to epicardial fat.'], 'start': 7.7, 'end': 20.0})
('75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29', {'text': ['right ac joint arthritis.', 'thoracic spinal degeneration.', 'Cardiomegaly', 'Edema', 'Pleural Effusion'], 'start': 1.7000000000000002, 'end': 16.5})
('b4f219eb-0bdbf37a-c33e70a8-385ac112-2f486ffc', {'text': ['prominent heart.', 'small amount of fluid in the right

In [22]:
loaded_dictm

{'ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d': {'text': 'Pneumothorax.Pleural Effusion.Cardiomegaly.Edema.Pneumonia.',
  'start': 2.7,
  'end': 27.0},
 'eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0': {'text': 'No Finding.',
  'start': 1.3,
  'end': 4.9},
 '475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd': {'text': 'No Finding.',
  'start': 3.0,
  'end': 7.5},
 '2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0': {'text': 'No Finding.',
  'start': 1.2,
  'end': 8.4},
 '12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652': {'text': 'moderate scoliosis.tortuous aorta.minimal haziness at the left lung base could be related to epicardial fat.',
  'start': 7.7,
  'end': 20.0},
 '75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29': {'text': 'right ac joint arthritis.thoracic spinal degeneration.Cardiomegaly.Edema.Pleural Effusion.',
  'start': 1.7000000000000002,
  'end': 16.5},
 'b4f219eb-0bdbf37a-c33e70a8-385ac112-2f486ffc': {'text': 'prominent heart.small amount of fluid in the right minor fissure.subtle per

In [23]:
cd /home/cougarnet.uh.edu/aawasth3/VidChapters

/home/cougarnet.uh.edu/aawasth3/VidChapters


In [24]:
import argparse
import os

PRESAVE_DIR = "/home/cougarnet.uh.edu/aawasth3/VidChapters/"
MODEL_DIR = "/home/cougarnet.uh.edu/aawasth3/VidChapters/"
DATA_DIR = "/home/cougarnet.uh.edu/aawasth3/VidChapters/"
SSD_DIR = "/home/cougarnet.uh.edu/aawasth3/VidChapters/"
NLTK_FOLDER = "/home/cougarnet.uh.edu/aawasth3/VidChapters/"
name2folder = {
    "youcook": "YouCook2",
    "htm": "howto100m",
    "chapters": "AllChapters",
    "vitt": "ViTT"
}


args={
    # Dataset specific
    "save_dir":"CoRaX_Somechecks3",
  "combine_datasets":['youcook'],
      
   
    "combine_datasets_val":['youcook'],
        

    
       
   
    "youcook_features_path":os.path.join(DATA_DIR, name2folder["youcook"], "clipvitl14.pth"),
    
  
    "youcook_train_json_path":os.path.join(DATA_DIR, name2folder["youcook"], "val.json"),
   
    
    "youcook_val_json_path":os.path.join(DATA_DIR, name2folder["youcook"], "val.json"),
    
    
    "youcook_subtitles_path":os.path.join(DATA_DIR, name2folder["youcook"], "newyoucook2_asr_align_proc.pkl"),
    

    
   

    
    "denoising":1.,
   "generative":1.,
   "genasr":False,
    "random":False,
    "mask_prob":0.25,
       
    'mask_len':5,
    "lr":3e-4,
    "beta1":0.9,
    "beta2":0.999,
    
    "batch_size":1,
    
    "batch_size_val":1,
    "weight_decay":0, 
    "epochs":20, 
    "optimizer":"adam",
    
        "label_smoothing":0.1,
        "clip_max_norm":1., 
   
        "schedule":'cosine_with_warmup',
       
    
        "fraction_warmup_steps":0.1,
        
        "eval_skip":1,
       
        "print_freq":100,
        

    # Run specific
    
        
   
        "presave_dir":PRESAVE_DIR,
        "device":"cuda",
    "seed":0, 
    
        "load":'/home/cougarnet.uh.edu/aawasth3/VidChapters/youcook_exp/best_model.pth' ,
        
    
        "resume":False,
        
    
        "start-epoch":0,
    
    "eval":True,
    "num_workers":0, 
    
    
        "world-size":1,
    
        "dist-url":"env://",
    
        "model_name":"t5-base",
        
    
    
        "bert_name":"bert-base-uncased",
        
        "text_encoder_dropout":0.1, 
    
        "text_decoder_dropout":0.1,
    
        "visual_encoder_dropout":0.1, 
        "max_feats":100,
        
        "features_dim":768,
        
   
        "embedding_dim":768,
        
   
        "mlp_dim":2048,
        
    
        "depth":12,
        
    
        "heads":12,
        
        "num_bins":100,
        
    
        "use_video":True,
        
    
        "use_speech":True,
       
   
        "max_input_tokens":1000,
        
        
   
        "max_output_tokens":256,
        
        
    
        "num_beams":4,
        
        
    
        "length_penalty":1.0,
        
       
        "repetition_penalty":1.0,
        
        
        "top_p":0.9,
        
        "blip2_model_name":"pretrain_flant5xl_vitL",
        
        "resolution":224,
        
        "video_example":'',
        
  
        "asr_example":''

}
import pandas as pd
args=pd.Series(args)

In [25]:
for y in loaded_dictm.items():
    loaded_dictm[y[0]]['start']=[loaded_dictm[y[0]]['start']]
    loaded_dictm[y[0]]['end']=[loaded_dictm[y[0]]['end']]
    loaded_dictm[y[0]]['text']=[loaded_dictm[y[0]]['text']]

In [26]:
for y in loaded_dictm.items():
    try:
        if loaded_dictm[y[0]]['text'][0][-1]!='.':
              print(loaded_dictm[y[0]]['text'][0])
    except:
        print(y[0])


fdec7e65-a00757e3-4872655f-ee15457b-8da7f362


In [27]:
loaded_dictm['fdec7e65-a00757e3-4872655f-ee15457b-8da7f362']['text']=['No Finding.']

In [28]:
loaded_dictm['fdec7e65-a00757e3-4872655f-ee15457b-8da7f362']

{'text': ['No Finding.'], 'start': [4.2], 'end': [5.0]}

In [29]:
loaded_dictm

{'ab2e5ae7-0c8d8980-ddbe5385-6330431b-3981574d': {'text': ['Pneumothorax.Pleural Effusion.Cardiomegaly.Edema.Pneumonia.'],
  'start': [2.7],
  'end': [27.0]},
 'eb9c34a4-dcffac67-d7c42490-bd8699b6-d00422e0': {'text': ['No Finding.'],
  'start': [1.3],
  'end': [4.9]},
 '475c058c-33113f5b-59278e0c-9d7a622c-35b1a3dd': {'text': ['No Finding.'],
  'start': [3.0],
  'end': [7.5]},
 '2bb86823-45f8d22b-81f8d920-9a1be431-aa5e4be0': {'text': ['No Finding.'],
  'start': [1.2],
  'end': [8.4]},
 '12de11b6-1c0e25cb-8e8f55ed-01017e20-0df81652': {'text': ['moderate scoliosis.tortuous aorta.minimal haziness at the left lung base could be related to epicardial fat.'],
  'start': [7.7],
  'end': [20.0]},
 '75e3199d-ed90d1f2-5816b72a-06302006-b7e57d29': {'text': ['right ac joint arthritis.thoracic spinal degeneration.Cardiomegaly.Edema.Pleural Effusion.'],
  'start': [1.7000000000000002],
  'end': [16.5]},
 'b4f219eb-0bdbf37a-c33e70a8-385ac112-2f486ffc': {'text': ['prominent heart.small amount of fluid 

In [30]:
cd /home/cougarnet.uh.edu/aawasth3/VidChapters/

/home/cougarnet.uh.edu/aawasth3/VidChapters


In [31]:
import os
import torch as th
from torch.utils.data import Dataset
import json
import pickle
import numpy as np
from util.t5 import create_sentinel_ids, filter_input_ids, random_spans_noise_mask


class DenseVideoCaptioning_Dataset(Dataset):
    def __init__(
        self,
        json_path,
        features_path,
        max_feats=100,
        features_dim=768,
        tokenizer=None,
        subtitles_path=None,
        num_bins=100,
        max_input_tokens=1000,
        max_output_tokens=256,
        noise_density=0.25,
        mean_noise_span_length=5,
    ):
        self.data = json.load(open(json_path, 'r'))
        self.vids = list(self.data.keys())
        self.features = None
        self.features_path = None
        if os.path.isdir(features_path):
            self.features_path = features_path
        else:
            print(features_path)
            self.features = th.load(features_path)
        self.max_feats = max_feats
        self.features_dim = features_dim
        self.tokenizer = tokenizer
        
        
        self.subs = loaded_dictm
        
        self.num_bins = num_bins
        self.max_input_tokens = max_input_tokens
        self.max_output_tokens = max_output_tokens
        self.num_text_tokens = len(tokenizer) - num_bins
        self.noise_density = noise_density
        self.mean_noise_span_length = mean_noise_span_length

    def __len__(self):
        return len(self.data)

    def _get_text(self, text):
        text = text.strip()
        text = text.capitalize()
        #print(text)
        if text[-1] != '.':
                text = text + '.'
        
        return text

    def _get_video(self, video_id):
        if self.features is not None:
            assert video_id in self.features, video_id
            video = self.features[video_id].float()
        else:
            features_path = os.path.join(self.features_path, video_id + '.mp4.npy')
            if not os.path.exists(features_path):
                features_path = os.path.join(self.features_path, video_id + '.npy')
            assert os.path.exists(features_path), features_path
            video = th.from_numpy(np.load(features_path)).float()

        if len(video) > self.max_feats:
            sampled = []
            for j in range(self.max_feats):
                sampled.append(video[(j * len(video)) // self.max_feats])
            video = th.stack(sampled)
            video_len = self.max_feats
        elif len(video) < self.max_feats:
            video_len = len(video)
            video = th.cat(
                [video, th.zeros(self.max_feats - video_len, self.features_dim)], 0
            )
        else:
            video_len = self.max_feats

        return video

    def time_tokenize(self, x, duration, num_bins):

        time_token = int(float((num_bins - 1) * x) / float(duration))
        
        #print(time_token,self.num_bins)
        if time_token >= self.num_bins:
             print("error")
             print(self.num_bins,x,duration)
             assert time_token <= self.num_bins,duration
        return time_token + self.num_text_tokens

    def __getitem__(self, idx):
        video_id = self.vids[idx]
        annotations = self.data[video_id]
        video = self._get_video(video_id)
        duration = annotations["duration"]

        # get subtitles
        
        if (self.subs is not None and video_id in self.subs):
            sub = self.subs[video_id]
        else:
            print(video_id)

        to_keep = [(x >= 0 and y <= duration) for x, y in zip(sub["start"], sub["end"])]
        if not any(to_keep):  # no subtitles
            input_tokens = (th.ones(1) * self.tokenizer.eos_token_id).long()
        else:
            sub["start"] = [x for i, x in enumerate(sub["start"]) if to_keep[i]]
            sub["end"] = [x for i, x in enumerate(sub["end"]) if to_keep[i]]
            sub['text'] = [self._get_text(x) for i, x in enumerate(sub['text']) if to_keep[i]]
            time_input_tokens = [th.LongTensor([self.time_tokenize(st, duration, self.num_bins),
                                                self.time_tokenize(ed, duration, self.num_bins)])
                                 for st, ed in zip(sub['start'], sub['end'])]
            text_input_tokens = [self.tokenizer(x, add_special_tokens=False, max_length=self.max_input_tokens,
                                                padding="do_not_pad", truncation=True, return_tensors="pt",)['input_ids'][0]
                                 for x in sub['text']]
            input_tokens = [th.cat([ti, te], 0) for ti, te in zip(time_input_tokens, text_input_tokens)]
            input_tokens = th.cat(input_tokens, 0)
            input_tokens = input_tokens[:self.max_input_tokens - 1]
            input_tokens = th.cat([input_tokens, th.LongTensor([self.tokenizer.eos_token_id])], 0)
        
        # denoising sequence
        if len(input_tokens) > 1:
            mask_indices = np.asarray(
                [random_spans_noise_mask(len(input_tokens), self.noise_density, self.mean_noise_span_length)])
            labels_mask = ~mask_indices

            input_ids_sentinel = create_sentinel_ids(mask_indices.astype(np.int8), self.tokenizer, self.num_bins)
            labels_sentinel = create_sentinel_ids(labels_mask.astype(np.int8), self.tokenizer, self.num_bins)

            denoising_output_tokens = th.from_numpy(
                filter_input_ids(input_tokens.unsqueeze(0).numpy(), labels_sentinel, self.tokenizer)).squeeze(0)
            denoising_input_tokens = th.from_numpy(
                filter_input_ids(input_tokens.unsqueeze(0).numpy(), input_ids_sentinel, self.tokenizer)).squeeze(0)
        else:
            input_tokens = th.LongTensor([self.tokenizer.eos_token_id])
            denoising_input_tokens = th.LongTensor([0])
            denoising_output_tokens = input_tokens

        # dvc/vcg sequence
        captions = [self._get_text(x) for x in annotations['sentences']]
        time_output_tokens = [th.LongTensor([self.time_tokenize(st, duration, self.num_bins),
                                             self.time_tokenize(ed, duration, self.num_bins)])
                              for st, ed in annotations['timestamps']]
        text_output_tokens = [self.tokenizer(x, add_special_tokens=False, max_length=self.max_output_tokens,
                                             padding="do_not_pad", truncation=True, return_tensors="pt",)['input_ids'][0]
                              for x in captions]
        output_tokens = [th.cat([ti, te], 0) for ti, te in zip(time_output_tokens, text_output_tokens)]
        
                 
              
        output_tokens = th.cat(output_tokens, 0)
        output_tokens = output_tokens[:self.max_output_tokens - 1]
        output_tokens = th.cat([output_tokens, th.LongTensor([self.tokenizer.eos_token_id])], 0)

        return {
            "video_id": video_id,
            "duration": duration,
            "video": video,
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "denoising_input_tokens": denoising_input_tokens,
            "denoising_output_tokens": denoising_output_tokens,
        }


def densevideocaptioning_collate_fn(batch):
    bs = len(batch)
    video_id = [batch[i]["video_id"] for i in range(bs)]
    duration = [batch[i]["duration"] for i in range(bs)]
    video = th.stack([batch[i]["video"] for i in range(bs)])
    input_tokens = [batch[i]["input_tokens"] for i in range(bs)]
    max_input_len = max(len(x) for x in input_tokens)
    for i in range(bs):
        if len(input_tokens[i]) < max_input_len:
            input_tokens[i] = th.cat([input_tokens[i], th.zeros(max_input_len - len(input_tokens[i])).long()], 0)
    input_tokens = th.stack(input_tokens)
    output_tokens = [batch[i]["output_tokens"] for i in range(bs)]
    max_output_len = max(len(x) for x in output_tokens)
    for i in range(bs):
        if len(output_tokens[i]) < max_output_len:
            output_tokens[i] = th.cat([output_tokens[i], th.zeros(max_output_len - len(output_tokens[i])).long()], 0)
    output_tokens = th.stack(output_tokens)
    denoising_input_tokens = [batch[i]["denoising_input_tokens"] for i in range(bs)]
    max_input_len = max(len(x) for x in denoising_input_tokens)
    for i in range(bs):
        if len(denoising_input_tokens[i]) < max_input_len:
            denoising_input_tokens[i] = th.cat(
                [denoising_input_tokens[i], th.zeros(max_input_len - len(denoising_input_tokens[i])).long()], 0)
    denoising_input_tokens = th.stack(denoising_input_tokens)
    denoising_output_tokens = [batch[i]["denoising_output_tokens"] for i in range(bs)]
    max_denoising_output_len = max(len(x) for x in denoising_output_tokens)
    for i in range(bs):
        if len(denoising_output_tokens[i]) < max_denoising_output_len:
            denoising_output_tokens[i] = th.cat([denoising_output_tokens[i], th.zeros(
                max_denoising_output_len - len(denoising_output_tokens[i])).long()], 0)
    denoising_output_tokens = th.stack(denoising_output_tokens)
    out = {
        "video_id": video_id,
        "duration": duration,
        "video": video,
        "input_tokens": input_tokens,
        "output_tokens": output_tokens,
        "denoising_input_tokens": denoising_input_tokens,
        "denoising_output_tokens": denoising_output_tokens,
    }
    return out


def build_densevideocaptioning_dataset(dataset_name, split, args, tokenizer):
    if dataset_name == "youcook":
        if split == "train":
            json_path = args.youcook_train_json_path
        elif split == "val":
            json_path = args.youcook_train_json_path
        else:
            raise NotImplementedError
        features_path = args.youcook_features_path
        subtitles_path = args.youcook_subtitles_path
    elif dataset_name == "vitt":
        if split == "train":
            json_path = args.vitt_train_json_path
        elif split == "val":
            json_path = args.vitt_val_json_path
        elif split == "test":
            json_path = args.vitt_test_json_path
        else:
            raise NotImplementedError
        features_path = args.vitt_features_path
        subtitles_path = args.vitt_subtitles_path
    elif dataset_name == "chapters":
        if split == "train":
            json_path = args.chapters_train_json_path
        elif split == "val":
            json_path = args.chapters_val_json_path
        elif split == "test":
            json_path = args.chapters_test_json_path
        else:
            raise NotImplementedError
        features_path = args.chapters_features_path
        subtitles_path = args.chapters_subtitles_path
    else:
        raise NotImplementedError
    return DenseVideoCaptioning_Dataset(json_path=json_path,
                                        features_path=features_path,
                                        max_feats=args.max_feats,
                                        features_dim=args.features_dim,
                                        tokenizer=tokenizer,
                                        subtitles_path=subtitles_path,
                                        num_bins=args.num_bins,
                                        max_input_tokens=args.max_input_tokens,
                                        max_output_tokens=args.max_output_tokens)


In [32]:
import os
import torch
import numpy as np
import random
import json
import math
import sys
from typing import Iterable
import argparse
import time
import datetime
import re
from util import dist
from torch.utils.data import DataLoader, DistributedSampler
from collections import namedtuple
from functools import reduce

#from dataset import densevideocaptioning_collate_fn, build_densevideocaptioning_dataset, build_yt_dataset, yt_collate_fn
from model import build_vid2seq_model, _get_tokenizer

from util.misc import adjust_learning_rate
from util.metrics import MetricLogger
from dvc_eval import eval_dvc, eval_soda




@torch.no_grad()
def evaluate(
    model: torch.nn.Module,
    data_loader,
    device: torch.device,
    args,
    split="test",
    dataset_name="chapters"
):
    model.eval()
    metric_logger = MetricLogger(delimiter="  ")
    header = f"{split}:"

    res = {}

    for i_batch, batch_dict in enumerate(
        metric_logger.log_every(data_loader, args.print_freq, header)
    ):
        duration = batch_dict["duration"]
        video = batch_dict["video"].to(device)
        if "input_tokens" not in batch_dict and args.use_speech:
            input_tokens = torch.ones((video.shape[0], 1)).long().to(device)
            input_tokenized = {'input_ids': input_tokens,
                               'attention_mask': input_tokens != 0}
        elif "input_tokens" in batch_dict:
            input_tokens = batch_dict["input_tokens"].to(device)
            input_tokenized = {'input_ids': input_tokens,
                               'attention_mask': input_tokens != 0}
        else:
            input_tokenized = {'input_ids': None,
                               'attention_mask': None}

        output = model.generate(video=video,
                                input_tokenized=input_tokenized,
                                use_nucleus_sampling=args.num_beams == 0,
                                num_beams=args.num_beams,
                                max_length=args.max_output_tokens,
                                min_length=1,
                                top_p=args.top_p,
                                repetition_penalty=args.repetition_penalty,
                                length_penalty=args.length_penalty,
                                num_captions=1,
                                temperature=1)
        for i, vid in enumerate(batch_dict["video_id"]):
            sequences = re.split(r'(?<!<)\s+(?!>)', output[i]) # "<time=5> <time=7> Blablabla <time=7> <time=9> Blobloblo <time=2>" -> ['<time=5>', '<time=7>', 'Blablabla', '<time=7>', '<time=9>', 'Blobloblo', '<time=2>']
            indexes = [j for j in range(len(sequences) - 1) if sequences[j][:6] == '<time=' and sequences[j + 1][:6] == '<time=']
            last_processed = -2
            res[vid] = []
            for j, idx in enumerate(indexes):  # iterate on predicted events
                if idx == last_processed + 1:  # avoid processing 3 time tokens in a row as 2 separate events
                    continue
                seq = [sequences[k] for k in range(idx + 2, indexes[j + 1] if j < len(indexes) - 1 else len(sequences)) if sequences[k] != '<time=']
                if seq:
                    text = ' '.join(seq)
                else:  # no text
                    continue
                start_re = re.search(r'\<time\=(\d+)\>', sequences[idx])
                assert start_re, sequences[idx]
                start_token = int(start_re.group(1))
                start = float(start_token) * float(duration[i]) / float(args.num_bins - 1)
                end_re = re.search(r'\<time\=(\d+)\>', sequences[idx + 1])
                assert end_re, sequences[idx + 1]
                end_token = int(end_re.group(1))
                end = float(end_token) * float(duration[i]) / float(args.num_bins - 1)
                if end <= start:  # invalid time
                    continue
                res[vid].append({'sentence': text,
                                 'timestamp': [start,
                                               end]})
                last_processed = idx

    all_res = dist.all_gather(res)
    results = reduce(lambda a, b: a.update(b) or a, all_res, {})
    assert len(results) == len(data_loader.dataset)
    metrics = {}
    if dist.is_main_process():
        if args.save_dir:
            pred_path = os.path.join(args.save_dir, dataset_name + f"_{split}_preds.json",)
            json.dump({'results': results}, open(pred_path, "w",))
        else:
            pred_path = {'results': results}
        if dataset_name == "youcook":
            references = [args.youcook_val_json_path]
        elif dataset_name == "vitt":
            references = [args.vitt_val_json_path if split == "val" else args.vitt_test_json_path]
        elif dataset_name == "chapters":
            references = [args.chapters_val_json_path if split == "val" else args.chapters_test_json_path]
        else:
            raise NotImplementedError
        metrics.update(eval_dvc(pred_path, references, tious=[0.3, 0.5, 0.7, 0.9], max_proposals_per_video=1000, verbose=False, no_lang_eval=True))
        metrics.update(eval_soda(pred_path, references, verbose=False))
        for k, v in metrics.items():
            print(f"{k}: {v:.4f}")

    metrics = dist.all_gather(metrics)
    metrics = reduce(lambda a, b: a.update(b) or a, metrics, {})

    return metrics

    
  


def main(args):
    # Init distributed mode
    dist.init_distributed_mode(args)

    if dist.is_main_process():
        if args.save_dir and not (os.path.isdir(args.save_dir)):
            os.makedirs(os.path.join(args.save_dir), exist_ok=True)
        print(args)

    device = torch.device(args.device)

    # Fix seeds
    #seed = args.seed + dist.get_rank()
    #torch.manual_seed(seed)
    #np.random.seed(seed)
    #random.seed(seed)

    # Build model
    tokenizer = _get_tokenizer(args.model_name, args.num_bins)

    nt = namedtuple(
        typename="data",
        field_names=[
            "dataset_name",
            "dataloader_val",
            "dataloader_train",
            "dataloader_test",
        ],
    )

    tuples = []
    for dset_name in args.combine_datasets:
        dataloader_val = None
        dataloader_test = None
        print(dset_name)
        if dset_name in args.combine_datasets_val:
            dataset_val = build_densevideocaptioning_dataset(dset_name, "val", args, tokenizer)
            sampler_val = (
                DistributedSampler(dataset_val, shuffle=False)
                if args.distributed
                else torch.utils.data.SequentialSampler(dataset_val)
            )
            dataloader_val = DataLoader(
                dataset_val,
                batch_size=args.batch_size_val,
                sampler=sampler_val,
                collate_fn=densevideocaptioning_collate_fn,
                num_workers=args.num_workers,
            )
            
            dataloader_test = dataloader_val

       
        dataloader_train = None

        tuples.append(
            nt(
                dataset_name=dset_name,
                dataloader_test=dataloader_test,
                dataloader_val=dataloader_val,
                dataloader_train=dataloader_train,
            )
        )

    model = build_vid2seq_model(args, tokenizer)
    model.to(device)
    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    if dist.is_main_process():
        print("number of params:", n_parameters)
    # print(model)

    # Set up optimizer
    params_for_optimization = list(p for p in model.parameters() if p.requires_grad)
    optimizer = torch.optim.Adam(
        params_for_optimization,
        lr=args.lr,
        betas=(args.beta1, args.beta2),
        weight_decay=args.weight_decay,
    )

    # Load pretrained checkpoint
    if args.load:
        if dist.is_main_process():
            print("loading from", args.load)
        checkpoint = torch.load(args.load, map_location="cpu")
        model.load_state_dict(checkpoint["model"], strict=False)
        if args.resume and not args.eval:
            optimizer.load_state_dict(checkpoint["optimizer"])
            args.start_epoch = checkpoint["epoch"] + 1

    for i, item in enumerate(tuples):
        print(item.dataloader_test)
        out = evaluate(
            model=model,
            data_loader=item.dataloader_test,
            device=device,
            dataset_name=item.dataset_name,
            args=args,
            split="test",
        )

        return out



        args.save_dir = os.path.join(args.presave_dir, args.save_dir)
#args.model_name = os.path.join(os.environ["TRANSFORMERS_CACHE"], args.model_name)
x=main(args)


ERROR:root:No token file found. Also make sure that a [prod] section with a 'token = value' assignment exists.


Not using distributed mode
save_dir                                                   CoRaX_Somechecks3
combine_datasets                                                   [youcook]
combine_datasets_val                                               [youcook]
youcook_features_path      /home/cougarnet.uh.edu/aawasth3/VidChapters/Yo...
youcook_train_json_path    /home/cougarnet.uh.edu/aawasth3/VidChapters/Yo...
                                                 ...                        
top_p                                                                    0.9
blip2_model_name                                      pretrain_flant5xl_vitL
resolution                                                               224
video_example                                                               
asr_example                                                                 
Length: 61, dtype: object
youcook
/home/cougarnet.uh.edu/aawasth3/VidChapters/YouCook2/clipvitl14.pth
number of params: 289204

PTBTokenizer tokenized 5168 tokens at 78243.88 tokens per second.
PTBTokenizer tokenized 4013 tokens at 59763.41 tokens per second.


avg. outputs: 3.7121771217712176
Recall@0.3: 0.6811
Precision@0.3: 0.5632
F1@0.3: 0.6166
Recall@0.5: 0.4039
Precision@0.5: 0.3414
F1@0.5: 0.3701
Recall@0.7: 0.2231
Precision@0.7: 0.2036
F1@0.7: 0.2129
Recall@0.9: 0.1161
Precision@0.9: 0.1163
F1@0.9: 0.1162
Recall: 0.3561
Precision: 0.3061
F1: 0.3289
Recall@1s: 0.8085
Precision@1s: 0.5582
F1@1s: 0.6604
Recall@3s: 0.9463
Precision@3s: 0.7768
F1@3s: 0.8532
Recall@5s: 0.9815
Precision@5s: 0.8681
F1@5s: 0.9213
Recall@10s: 0.9951
Precision@10s: 0.9590
F1@10s: 0.9767
Recall@30s: 0.9963
Precision@30s: 0.9963
F1@30s: 0.9963
Recall@60s: 0.9963
Precision@60s: 0.9963
F1@60s: 0.9963
soda_c: 0.2400


Bleu_1: 0.5858
Bleu_2: 0.5807
Bleu_3: 0.0406
Bleu_4: 0.0329

In [33]:
with open('/home/cougarnet.uh.edu/aawasth3/VidChapters/'+args.save_dir+"/youcook2_asr_align_proc.pkl", "wb") as file:
    pickle.dump(loaded_dictm, file, pickle.HIGHEST_PROTOCOL)

In [36]:
            

pathp='/home/cougarnet.uh.edu/aawasth3/VidChapters/'+args.save_dir+'/youcook_test_preds.json'
import json

def js_r(filename: str):
    with open(filename) as f_in:
        return json.load(f_in)
    
pred=js_r(pathp)


In [37]:
post=[]
for i in pred['results'].items():
    predcheck=[]
    for j in i[1]:
        predcheck.append(j['sentence'][:-1].lower())
    inputcheck=list(map(lambda x: x.lower(),loaded_dictm[i[0]]['text'][0].split('.')[:-1]))
    dele=list(set(predcheck)-set(inputcheck))
    if len(dele)>0:
            if 'finding' not in dele[0]:
                print(dele,i[0])
                if 'enlarged heart' not in dele[0]:
                        post.append((dele,i[0]))

['normal heart'] 2f9f452c-bd8170e0-d4cfe79e-fbf20077-0df42d35
['normal heart'] abb11c17-b8d815cf-fba388b3-0ee5a930-b53ad1a2
['pneumothorax'] e5f5c090-09b7a708-82fcdee4-9f869d74-ca3d40a8
['normal heart'] a7719240-9ae3f703-c18562b2-e3bb20c6-1ab91a3c
['left costophrenic angle is not completely included'] 19fc9e56-b2cd2486-cbfd7926-ae2d4343-2228e959
['there may not be a right atrium lead'] 717ec7fd-02518aa0-87b618ec-e6c7683a-7d98b001
['pneumothorax'] 39d186d6-db28156b-c5b88dad-502fee98-d8e9726a
['lungs opacity'] 3886caca-be633d03-fc93f41c-7aea5d72-8097ccbf
['pneumothorax'] 4f69aa4e-728ec4e3-2638f625-f79c6003-b3fc8e94
['normal heart'] 21869e09-d73eea62-035063f1-2f271005-4eb55eba
['normal heart'] 8a17db34-6bcbb153-3e5bc2f7-6a343716-06c14275
['lungs'] e8d07bf0-e54bc1fa-29ec1625-720e906d-c627bfdb
['pneumothorax'] c77dd740-15e4a184-a44d8d70-71c71aff-d135609e
['lungs are clear'] 849b8c1b-882763ca-3031630a-5f84111c-bd517dad
['there is cephalization vessels with prominence of the small pulmonary v

In [38]:
count=0
for y in post:
    if len(y[0])==1:
        for l in range(len(pred['results'][y[1]])):
            if y[0][0].lower() in pred['results'][y[1]][l]['sentence'].lower():
                print(y[1],y[0][0])
                count=count+1
                pred['results'][y[1]].pop(l)
                break
                
    else:
        for yt in y[0]:
                for l in range(len(pred['results'][y[1]])):
                    if yt.lower() in pred['results'][y[1]][l]['sentence'].lower():
                        print(y[1])
                        count=count+1
                        pred['results'][y[1]].pop(l)
                        break

2f9f452c-bd8170e0-d4cfe79e-fbf20077-0df42d35 normal heart
abb11c17-b8d815cf-fba388b3-0ee5a930-b53ad1a2 normal heart
e5f5c090-09b7a708-82fcdee4-9f869d74-ca3d40a8 pneumothorax
a7719240-9ae3f703-c18562b2-e3bb20c6-1ab91a3c normal heart
19fc9e56-b2cd2486-cbfd7926-ae2d4343-2228e959 left costophrenic angle is not completely included
717ec7fd-02518aa0-87b618ec-e6c7683a-7d98b001 there may not be a right atrium lead
39d186d6-db28156b-c5b88dad-502fee98-d8e9726a pneumothorax
3886caca-be633d03-fc93f41c-7aea5d72-8097ccbf lungs opacity
4f69aa4e-728ec4e3-2638f625-f79c6003-b3fc8e94 pneumothorax
21869e09-d73eea62-035063f1-2f271005-4eb55eba normal heart
8a17db34-6bcbb153-3e5bc2f7-6a343716-06c14275 normal heart
e8d07bf0-e54bc1fa-29ec1625-720e906d-c627bfdb lungs
c77dd740-15e4a184-a44d8d70-71c71aff-d135609e pneumothorax
849b8c1b-882763ca-3031630a-5f84111c-bd517dad lungs are clear
48777fa2-80d72d3c-22727ca9-2678db6b-02fa73ff there is cephalization vessels with prominence of the small pulmonary vessels
a9cf9e

for i in pred['results'].items():
    predcheck=[]
    for j in i[1]:
        predcheck.append(j['sentence'][:-1].lower())
    inputcheck=list(map(lambda x: x.lower(),loaded_dictm[i[0]]['text'][0].split('.')[:-1]))
    dele=list(set(predcheck)-set(inputcheck))
    if len(dele)>0:
            if 'finding' not in dele[0]:
                for p in range(len(i[1])):
                    if dele[0] in i[1][p]['sentence'].lower():
                        #i[1].pop(p)
                        print(dele)
                    

In [39]:
args.save_dir

'CoRaX_Somechecks3'

In [40]:
import json

js = json.dumps(pred)
pat='/home/cougarnet.uh.edu/aawasth3/VidChapters/'+args.save_dir+'/youcook_test_pred_corr1.json'
# Open new json file if not exist it will create
fp = open(pat, 'a')

# write to json file
fp.write(js)

# close the connection
fp.close()

In [None]:
import pickle 
# laod a pickle file
with open("/home/cougarnet.uh.edu/aawasth3/VidChapters/YouCook2/2summary_masked_one_disease_youcook2_asr_align_proc.pkl", "rb") as file:
    loaded_dictm = pickle.load(file)

# display the dictionary
#print(loaded_dictmasked)

In [None]:
            
pathr='/home/cougarnet.uh.edu/aawasth3/VidChapters/YouCook2/val.json'
pathp='/home/cougarnet.uh.edu/aawasth3/VidChapters/CoRaX_uncomm_comm2check/youcook_test_pred_corr1.json'
import json

def js_r(filename: str):
    with open(filename) as f_in:
        return json.load(f_in)
    
pred=js_r(pathp)
real=js_r(pathr)
#pred=pred['results']
diseaselist=['cardiomegaly.','pleural effusion.','lung opacity.','edema.','atelectasis.','pneumonia.',
         'pneumothorax.']


for id,input in loaded_dictm.items():
        input=input['text']
        #print(input)
        for m in range(len(input)):
            if input[m][-1]!='.':
                input[m]=input[m]+'.'
            input[m]=input[m].lower()
        print(input)
        loaded_dictm[id]['text']=input
for u in real.items():
    final=[]
    for j in u[1]['sentences']:
        final.append(j.lower())
    real[u[0]]['sentences']=final
        
        

In [None]:
result={}
ght={}

for o in pred['results'].items():
        output=[]
        ght={}
        for k in o[1]:
            
             output.append(k['sentence'].lower())
             ght[k['sentence'].lower()]=k['timestamp']
        input=loaded_dictm[o[0]]['text']
        
        cp=list(set(output)-set(input))
        #print(input)
        #print(output)
        #print(set(cp))
        cp=list(set(cp).intersection(set(['cardiomegaly.','pleural effusion.','lung opacity.','atelectasis.','pneumonia.','pneumothorax.','edema.'])))
        print(cp)
        if len(cp)!=0:
                result[o[0]]={'diff':cp}
                for g in cp:
                       result[o[0]][g]=ght[g]
        else:
            result[o[0]]={'diff':'no suggetion'}



In [None]:
import json

js = json.dumps(pred)
pat='/home/cougarnet.uh.edu/aawasth3/VidChapters/'+args.save_dir+'/results_corax.json'
# Open new json file if not exist it will create
fp = open(pat, 'a')

# write to json file
fp.write(js)

# close the connection
fp.close()

train=js_r(path)

merge=train | val 

for i in loaded_dictm.items():
    try:
            if 'No Finding' in i[1]['text'][0]:
                print(i[1]['text'],i[0],val[i[0]]['sentences'])
    except:
            pass
        

merge

loaded_dictm

import pickle 
# laod a pickle file
with open("/home/cougarnet.uh.edu/aawasth3/VidChapters/YouCook2/deleted_some_cases_youcook2_asr_align_proc.pkl", "rb") as file:
    loaded_dictm = pickle.load(file)

# display the dictionary
print(loaded_dictm)

for m in split_data:
    item = 1
    print(m['object'])
 
    if m['object'][0]==1:
        loaded_dictm[m['image_id']]['text']=['No Finding.']
        loaded_dictm[m['image_id']]['start']=[loaded_dictm[m['image_id']]['start'][0]]
        loaded_dictm[m['image_id']]['end']=[loaded_dictm[m['image_id']]['end'][-1]]

dise=['No Finding.', 'Enlarged Cardiomediastinum.', 'Cardiomegaly.',
               'Lung Lesion.', 'Lung Opacity.', 'Edema.', 'Consolidation.', 'Pneumonia.',
               'Atelectasis.', 'Pneumothorax.', 'Pleural Effusion.', 'Pleural Other.',
               'Fracture.', 'Support Devices.']

split_data