In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import shutil
from tqdm import tqdm

In [2]:
covidx_dir = '/media/ubuntu/MyHDataStor2/datasets/COVID-19/ICCV-MIA/'
covidx_img_dir= covidx_dir 
covidx_mask_dir= covidx_dir + 'mask/' 
data_list_dir = '/media/ubuntu/MyHDataStor2/products/COVID-19/ICCV-MAI/3D-CNN-BERT/BERT/datasets/settings/covid/'

print(covidx_dir)
print(covidx_img_dir)
print(covidx_mask_dir)
print(data_list_dir)

/media/ubuntu/MyHDataStor2/datasets/COVID-19/ICCV-MIA/
/media/ubuntu/MyHDataStor2/datasets/COVID-19/ICCV-MIA/
/media/ubuntu/MyHDataStor2/datasets/COVID-19/ICCV-MIA/mask/
/media/ubuntu/MyHDataStor2/products/COVID-19/ICCV-MAI/3D-CNN-BERT/BERT/datasets/settings/covid/


In [3]:
def load_labels_covidx(label_file):
    """Loads image filenames, classes, and bounding boxes"""
    fnames, classes, bboxes, ratios = [], [], [], []
    
    fp = open(label_file, 'r')    
    lines = fp.readlines() 
    fp.close()
    lines = [x.strip() for x in lines]
    
    for line in lines:
        fname, cls, xmin, ymin, xmax, ymax, ratio = line.split()
        fnames.append(fname)
        classes.append(cls)
        bboxes.append((int(xmin), int(ymin), int(xmax), int(ymax)))
        ratios.append(float(ratio))            
    
    return fnames, classes, bboxes,ratios,lines

In [6]:
subsets = ['val','train']
split = 4 

areas = dict() 
slice_lens = dict() 

for subset in subsets:     

    areas[subset] = [] 
    slice_lens[subset] = [] 
    
    
    label_file = covidx_dir+'{}_ICCV_MAI.txt'.format(subset)
    fnames, classes, bboxes,ratios,lines = load_labels_covidx(label_file)

    list_file =  data_list_dir + '{}_rgb_split{}.txt'.format(subset,split) 
    print(list_file) 
    
    fp = open(list_file,'w')  
    
    count = {'covid':0,'non-covid':0}
    
    #train/covid/ct_scan_0/0.jpg covid 0 18 512 344 0.004721
   
    covid_scan_ids = [] 
    non_covid_scan_ids = [] 
    for f in fnames: 
        if 'non-covid' in f: 
            non_covid_scan_ids.append(f.split('/')[2])
        else:
            covid_scan_ids.append(f.split('/')[2])
            
    covid_scan_ids = set(covid_scan_ids)    
    covid_scan_ids = list(covid_scan_ids)
    non_covid_scan_ids = set(non_covid_scan_ids)    
    non_covid_scan_ids = list(non_covid_scan_ids)
    
    covid_scan_ids.sort(key = lambda x: int(x.split('_')[-1]))
    non_covid_scan_ids.sort(key = lambda x: int(x.split('_')[-1]))
    
    #print("covid_scan_ids = {}".format(covid_scan_ids))
    #print("non_covid_scan_ids = {}".format(non_covid_scan_ids))
    
    all_scan_ids = {"covid": covid_scan_ids, "non-covid": non_covid_scan_ids}
    
    for c in all_scan_ids: 
        
        pbar = tqdm(total=len(all_scan_ids[c]))
        
        for s in all_scan_ids[c]:
            pbar.update()
            #print("class = {}, scan_id = {}".format(c,s))
            
            s_files = [x for x in lines if x.split()[1]==c and x.split()[0].split('/')[2] == s]
            s_files.sort(key = lambda x: int(x.split()[0].split('/')[-1].split('.')[0]) )
            
            s_ratios = [float(x.split()[-1]) for x in s_files]    
            
            #print(s_files)
            
            
            #print(len(s_ratios))
            
            s_ratio_max = np.max(s_ratios) 
            #print(s_ratio_max)        
          
            for thresh_ind in range(7,0,-1):
                #print(thresh_ind)
                thresh = thresh_ind/10 
                s_ind = np.where(s_ratios >= s_ratio_max*thresh)[0]  
                
                if len(s_ind)>=2:
                    if s_ind[-1]-s_ind[0] >=8: 
                        break 
            

            xmins  = [float(x.split()[2]) for x in s_files[s_ind[0]:s_ind[-1]+1]]    
            ymins  = [float(x.split()[3]) for x in s_files[s_ind[0]:s_ind[-1]+1]]    
            xmaxs  = [float(x.split()[4]) for x in s_files[s_ind[0]:s_ind[-1]+1]]    
            ymaxs  = [float(x.split()[5]) for x in s_files[s_ind[0]:s_ind[-1]+1]]              
            
            xmin = int(np.min(xmins))
            ymin = int(np.min(ymins))
            xmax = int(np.max(xmaxs))   
            ymax = int(np.max(ymaxs))

            slice_lens[subset].append(s_ind[-1]-s_ind[0])    
            areas[subset].append((xmax-xmin)*(ymax-ymin)/512/512)    
            
            #print(xmin,ymin,xmax,ymax) 
            
            #print(s_ind) 
            
            s_dir = "{}/{}/{}".format(subset,c,s) 
            line = "{} {} {} {} {} {} {} {}\n".format(s_dir,c,s_ind[0],s_ind[-1],xmin,ymin,xmax,ymax)
            #print(line)            
            
            #if (s_ind[-1]-s_ind[0]+1) < 16:
            #if len(s_ind)<16: 
            #    continue 
                
            fp.write(line)     
            
            '''
                        
            means,stds = [], []     
            means1,stds1 = [], []     
            means2,stds2 = [], []     
                
            
            count = 0 
            for ind in range(s_ind[0],s_ind[-1]+1): 
                
                f = s_files[ind]
                #print(f)
                 
                img_fn = covidx_img_dir + f.split()[0]
                mask_fn = covidx_mask_dir + f.split()[0]
                
                #print(img_fn)
                #print(mask_fn)
                
                img = cv2.imread(img_fn,0)
                if img.shape != (512,512):
                    img = cv2.resize(img,(512,512))
                
                mask = cv2.imread(mask_fn,0)
                
                #print(img.shape,mask.shape)
                
                black_ind = mask==0                 
                img_mask = img.copy()
                img_mask[black_ind] = 0 
               
                
                img_crop = img[ymin:ymax,xmin:xmax] 
                mask_crop = mask[ymin:ymax,xmin:xmax] 
                img_mask_crop = img_mask[ymin:ymax,xmin:xmax] 
                
                means.append(np.mean(img/255))
                stds.append(np.std(img/255))
                
                means1.append(np.mean(img_crop/255))
                stds1.append(np.std(img_crop)/255)
                
                means2.append(np.mean(img_mask_crop/255))
                stds2.append(np.std(img_mask_crop/255))
                
                count += 1 
                
                    
                if count>50 and count<100 and False:
                #if True: 
                    fig, (ax1,ax2,ax3,ax4,ax5) = plt.subplots(1,5)
                    ax1.imshow(img,cmap='gray')
                    ax2.imshow(img_crop,cmap='gray')
                    ax3.imshow(mask,cmap='gray')
                    ax4.imshow(img_mask,cmap='gray')               
                    ax5.imshow(img_mask_crop,cmap='gray')   
                    

            #input('dbg')            
            
            print(np.mean(means))
            print(np.mean(stds))
            print(np.mean(means1))
            print(np.mean(stds1))
            print(np.mean(means2))
            print(np.mean(stds2))
            
            break 
            '''
                        
        #break 
            
            
    fp.close() 
    
    





  0%|          | 0/165 [00:00<?, ?it/s][A[A[A


  2%|▏         | 4/165 [00:00<00:05, 32.00it/s][A[A[A

/media/ubuntu/MyHDataStor2/products/COVID-19/ICCV-MAI/3D-CNN-BERT/BERT/datasets/settings/covid/val_rgb_split4.txt





  4%|▍         | 7/165 [00:00<00:05, 28.69it/s][A[A[A


  6%|▌         | 10/165 [00:00<00:05, 27.01it/s][A[A[A


  8%|▊         | 13/165 [00:00<00:05, 25.81it/s][A[A[A


  9%|▉         | 15/165 [00:00<00:06, 22.89it/s][A[A[A


 10%|█         | 17/165 [00:00<00:06, 21.64it/s][A[A[A


 12%|█▏        | 20/165 [00:00<00:06, 22.33it/s][A[A[A


 14%|█▍        | 23/165 [00:00<00:06, 21.71it/s][A[A[A


 16%|█▌        | 26/165 [00:01<00:06, 22.08it/s][A[A[A


 18%|█▊        | 29/165 [00:01<00:06, 21.28it/s][A[A[A


 19%|█▉        | 32/165 [00:01<00:06, 21.70it/s][A[A[A


 21%|██        | 35/165 [00:01<00:05, 22.02it/s][A[A[A


 23%|██▎       | 38/165 [00:01<00:05, 21.39it/s][A[A[A


 25%|██▍       | 41/165 [00:01<00:05, 20.91it/s][A[A[A


 27%|██▋       | 44/165 [00:01<00:05, 21.48it/s][A[A[A


 28%|██▊       | 47/165 [00:02<00:05, 22.01it/s][A[A[A


 30%|███       | 50/165 [00:02<00:05, 21.15it/s][A[A[A


 32%|███▏      | 53/165 [00:02<00:05, 

/media/ubuntu/MyHDataStor2/products/COVID-19/ICCV-MAI/3D-CNN-BERT/BERT/datasets/settings/covid/train_rgb_split4.txt






  0%|          | 2/687 [00:00<00:59, 11.54it/s][A[A[A[A



  0%|          | 3/687 [00:00<01:18,  8.66it/s][A[A[A[A



  1%|          | 4/687 [00:00<01:34,  7.22it/s][A[A[A[A



  1%|          | 5/687 [00:00<01:41,  6.70it/s][A[A[A[A



  1%|          | 6/687 [00:00<01:47,  6.34it/s][A[A[A[A



  1%|          | 7/687 [00:01<01:52,  6.05it/s][A[A[A[A



  1%|          | 8/687 [00:01<01:54,  5.95it/s][A[A[A[A



  1%|▏         | 9/687 [00:01<01:55,  5.88it/s][A[A[A[A



  1%|▏         | 10/687 [00:01<01:56,  5.82it/s][A[A[A[A



  2%|▏         | 11/687 [00:01<01:56,  5.79it/s][A[A[A[A



  2%|▏         | 12/687 [00:01<01:58,  5.70it/s][A[A[A[A



  2%|▏         | 13/687 [00:02<01:59,  5.65it/s][A[A[A[A



  2%|▏         | 14/687 [00:02<02:02,  5.50it/s][A[A[A[A



  2%|▏         | 15/687 [00:02<02:02,  5.49it/s][A[A[A[A



  2%|▏         | 16/687 [00:02<02:01,  5.51it/s][A[A[A[A



  2%|▏         | 17/687 [00:02<02:04,  5.36i

 19%|█▊        | 128/687 [00:24<01:39,  5.61it/s][A[A[A[A



 19%|█▉        | 129/687 [00:24<01:40,  5.56it/s][A[A[A[A



 19%|█▉        | 130/687 [00:25<01:46,  5.23it/s][A[A[A[A



 19%|█▉        | 131/687 [00:25<01:50,  5.02it/s][A[A[A[A



 19%|█▉        | 132/687 [00:25<01:53,  4.89it/s][A[A[A[A



 19%|█▉        | 133/687 [00:25<01:55,  4.79it/s][A[A[A[A



 20%|█▉        | 134/687 [00:26<01:53,  4.89it/s][A[A[A[A



 20%|█▉        | 135/687 [00:26<01:50,  5.01it/s][A[A[A[A



 20%|█▉        | 136/687 [00:26<01:47,  5.14it/s][A[A[A[A



 20%|█▉        | 137/687 [00:26<01:47,  5.14it/s][A[A[A[A



 20%|██        | 138/687 [00:26<01:49,  5.01it/s][A[A[A[A



 20%|██        | 139/687 [00:27<01:47,  5.10it/s][A[A[A[A



 20%|██        | 140/687 [00:27<01:46,  5.16it/s][A[A[A[A



 21%|██        | 141/687 [00:27<01:43,  5.29it/s][A[A[A[A



 21%|██        | 142/687 [00:27<01:41,  5.39it/s][A[A[A[A



 21%|██        | 143/687 

 37%|███▋      | 254/687 [00:48<01:17,  5.57it/s][A[A[A[A



 37%|███▋      | 255/687 [00:49<01:17,  5.57it/s][A[A[A[A



 37%|███▋      | 256/687 [00:49<01:24,  5.11it/s][A[A[A[A



 37%|███▋      | 257/687 [00:49<01:21,  5.27it/s][A[A[A[A



 38%|███▊      | 258/687 [00:49<01:19,  5.36it/s][A[A[A[A



 38%|███▊      | 259/687 [00:49<01:18,  5.47it/s][A[A[A[A



 38%|███▊      | 260/687 [00:50<01:17,  5.54it/s][A[A[A[A



 38%|███▊      | 261/687 [00:50<01:16,  5.60it/s][A[A[A[A



 38%|███▊      | 262/687 [00:50<01:15,  5.64it/s][A[A[A[A



 38%|███▊      | 263/687 [00:50<01:14,  5.65it/s][A[A[A[A



 38%|███▊      | 264/687 [00:50<01:15,  5.64it/s][A[A[A[A



 39%|███▊      | 265/687 [00:50<01:14,  5.64it/s][A[A[A[A



 39%|███▊      | 266/687 [00:51<01:14,  5.66it/s][A[A[A[A



 39%|███▉      | 267/687 [00:51<01:14,  5.68it/s][A[A[A[A



 39%|███▉      | 268/687 [00:51<01:13,  5.66it/s][A[A[A[A



 39%|███▉      | 269/687 

 55%|█████▌    | 380/687 [01:12<00:55,  5.53it/s][A[A[A[A



 55%|█████▌    | 381/687 [01:13<00:54,  5.57it/s][A[A[A[A



 56%|█████▌    | 382/687 [01:13<00:54,  5.60it/s][A[A[A[A



 56%|█████▌    | 383/687 [01:13<00:53,  5.63it/s][A[A[A[A



 56%|█████▌    | 384/687 [01:13<00:53,  5.67it/s][A[A[A[A



 56%|█████▌    | 385/687 [01:13<00:53,  5.68it/s][A[A[A[A



 56%|█████▌    | 386/687 [01:13<00:52,  5.68it/s][A[A[A[A



 56%|█████▋    | 387/687 [01:14<00:52,  5.69it/s][A[A[A[A



 56%|█████▋    | 388/687 [01:14<00:52,  5.70it/s][A[A[A[A



 57%|█████▋    | 389/687 [01:14<00:52,  5.70it/s][A[A[A[A



 57%|█████▋    | 390/687 [01:14<00:52,  5.69it/s][A[A[A[A



 57%|█████▋    | 391/687 [01:14<00:52,  5.69it/s][A[A[A[A



 57%|█████▋    | 392/687 [01:14<00:51,  5.69it/s][A[A[A[A



 57%|█████▋    | 393/687 [01:15<00:51,  5.69it/s][A[A[A[A



 57%|█████▋    | 394/687 [01:15<00:51,  5.70it/s][A[A[A[A



 57%|█████▋    | 395/687 

 74%|███████▎  | 506/687 [01:36<00:33,  5.38it/s][A[A[A[A



 74%|███████▍  | 507/687 [01:36<00:34,  5.26it/s][A[A[A[A



 74%|███████▍  | 508/687 [01:36<00:34,  5.26it/s][A[A[A[A



 74%|███████▍  | 509/687 [01:36<00:33,  5.28it/s][A[A[A[A



 74%|███████▍  | 510/687 [01:36<00:33,  5.31it/s][A[A[A[A



 74%|███████▍  | 511/687 [01:37<00:32,  5.37it/s][A[A[A[A



 75%|███████▍  | 512/687 [01:37<00:32,  5.31it/s][A[A[A[A



 75%|███████▍  | 513/687 [01:37<00:33,  5.25it/s][A[A[A[A



 75%|███████▍  | 514/687 [01:37<00:34,  5.02it/s][A[A[A[A



 75%|███████▍  | 515/687 [01:37<00:33,  5.19it/s][A[A[A[A



 75%|███████▌  | 516/687 [01:38<00:32,  5.33it/s][A[A[A[A



 75%|███████▌  | 517/687 [01:38<00:31,  5.45it/s][A[A[A[A



 75%|███████▌  | 518/687 [01:38<00:30,  5.54it/s][A[A[A[A



 76%|███████▌  | 519/687 [01:38<00:29,  5.60it/s][A[A[A[A



 76%|███████▌  | 520/687 [01:38<00:32,  5.17it/s][A[A[A[A



 76%|███████▌  | 521/687 

 92%|█████████▏| 632/687 [02:00<00:10,  5.26it/s][A[A[A[A



 92%|█████████▏| 633/687 [02:00<00:10,  5.26it/s][A[A[A[A



 92%|█████████▏| 634/687 [02:00<00:10,  5.28it/s][A[A[A[A



 92%|█████████▏| 635/687 [02:01<00:09,  5.29it/s][A[A[A[A



 93%|█████████▎| 636/687 [02:01<00:09,  5.23it/s][A[A[A[A



 93%|█████████▎| 637/687 [02:01<00:09,  5.19it/s][A[A[A[A



 93%|█████████▎| 638/687 [02:01<00:09,  5.07it/s][A[A[A[A



 93%|█████████▎| 639/687 [02:01<00:09,  5.25it/s][A[A[A[A



 93%|█████████▎| 640/687 [02:02<00:09,  5.14it/s][A[A[A[A



 93%|█████████▎| 641/687 [02:02<00:08,  5.17it/s][A[A[A[A



 93%|█████████▎| 642/687 [02:02<00:08,  5.33it/s][A[A[A[A



 94%|█████████▎| 643/687 [02:02<00:08,  5.45it/s][A[A[A[A



 94%|█████████▎| 644/687 [02:02<00:07,  5.52it/s][A[A[A[A



 94%|█████████▍| 645/687 [02:02<00:07,  5.55it/s][A[A[A[A



 94%|█████████▍| 646/687 [02:03<00:08,  4.63it/s][A[A[A[A



 94%|█████████▍| 647/687 

  8%|▊         | 68/865 [00:13<02:39,  5.01it/s][A[A[A[A[A




  8%|▊         | 69/865 [00:14<02:38,  5.02it/s][A[A[A[A[A




  8%|▊         | 70/865 [00:14<02:35,  5.10it/s][A[A[A[A[A




  8%|▊         | 71/865 [00:14<02:49,  4.69it/s][A[A[A[A[A




  8%|▊         | 72/865 [00:14<02:46,  4.76it/s][A[A[A[A[A




  8%|▊         | 73/865 [00:14<02:42,  4.86it/s][A[A[A[A[A




  9%|▊         | 74/865 [00:15<02:42,  4.87it/s][A[A[A[A[A




  9%|▊         | 75/865 [00:15<02:43,  4.84it/s][A[A[A[A[A




  9%|▉         | 76/865 [00:15<02:41,  4.89it/s][A[A[A[A[A




  9%|▉         | 77/865 [00:15<02:38,  4.96it/s][A[A[A[A[A




  9%|▉         | 78/865 [00:16<02:50,  4.61it/s][A[A[A[A[A




  9%|▉         | 79/865 [00:16<02:58,  4.40it/s][A[A[A[A[A




  9%|▉         | 80/865 [00:16<02:51,  4.58it/s][A[A[A[A[A




  9%|▉         | 81/865 [00:16<02:44,  4.76it/s][A[A[A[A[A




  9%|▉         | 82/865 [00:16<02:41,  4.86it/s]

 22%|██▏       | 187/865 [00:39<02:20,  4.81it/s][A[A[A[A[A




 22%|██▏       | 188/865 [00:39<02:17,  4.93it/s][A[A[A[A[A




 22%|██▏       | 189/865 [00:39<02:17,  4.91it/s][A[A[A[A[A




 22%|██▏       | 190/865 [00:39<02:16,  4.96it/s][A[A[A[A[A




 22%|██▏       | 191/865 [00:39<02:14,  5.01it/s][A[A[A[A[A




 22%|██▏       | 192/865 [00:40<02:11,  5.10it/s][A[A[A[A[A




 22%|██▏       | 193/865 [00:40<02:10,  5.14it/s][A[A[A[A[A




 22%|██▏       | 194/865 [00:40<02:09,  5.18it/s][A[A[A[A[A




 23%|██▎       | 195/865 [00:40<02:08,  5.22it/s][A[A[A[A[A




 23%|██▎       | 196/865 [00:40<02:07,  5.24it/s][A[A[A[A[A




 23%|██▎       | 197/865 [00:40<02:13,  5.00it/s][A[A[A[A[A




 23%|██▎       | 198/865 [00:41<02:16,  4.89it/s][A[A[A[A[A




 23%|██▎       | 199/865 [00:41<02:13,  4.99it/s][A[A[A[A[A




 23%|██▎       | 200/865 [00:41<02:10,  5.08it/s][A[A[A[A[A




 23%|██▎       | 201/865 [00:41<02

 35%|███▌      | 305/865 [01:03<02:33,  3.65it/s][A[A[A[A[A




 35%|███▌      | 306/865 [01:03<02:50,  3.28it/s][A[A[A[A[A




 35%|███▌      | 307/865 [01:03<03:00,  3.10it/s][A[A[A[A[A




 36%|███▌      | 308/865 [01:04<03:04,  3.02it/s][A[A[A[A[A




 36%|███▌      | 309/865 [01:04<03:16,  2.82it/s][A[A[A[A[A




 36%|███▌      | 310/865 [01:05<03:32,  2.61it/s][A[A[A[A[A




 36%|███▌      | 311/865 [01:05<03:37,  2.54it/s][A[A[A[A[A




 36%|███▌      | 312/865 [01:05<03:34,  2.58it/s][A[A[A[A[A




 36%|███▌      | 313/865 [01:06<03:20,  2.75it/s][A[A[A[A[A




 36%|███▋      | 314/865 [01:06<02:52,  3.19it/s][A[A[A[A[A




 36%|███▋      | 315/865 [01:06<02:32,  3.60it/s][A[A[A[A[A




 37%|███▋      | 316/865 [01:06<02:22,  3.86it/s][A[A[A[A[A




 37%|███▋      | 317/865 [01:07<02:20,  3.91it/s][A[A[A[A[A




 37%|███▋      | 318/865 [01:07<02:21,  3.88it/s][A[A[A[A[A




 37%|███▋      | 319/865 [01:07<02

 49%|████▉     | 423/865 [01:33<01:44,  4.24it/s][A[A[A[A[A




 49%|████▉     | 424/865 [01:34<01:45,  4.20it/s][A[A[A[A[A




 49%|████▉     | 425/865 [01:34<01:46,  4.12it/s][A[A[A[A[A




 49%|████▉     | 426/865 [01:34<02:00,  3.66it/s][A[A[A[A[A




 49%|████▉     | 427/865 [01:35<01:58,  3.71it/s][A[A[A[A[A




 49%|████▉     | 428/865 [01:35<01:47,  4.05it/s][A[A[A[A[A




 50%|████▉     | 429/865 [01:35<01:41,  4.31it/s][A[A[A[A[A




 50%|████▉     | 430/865 [01:35<01:35,  4.54it/s][A[A[A[A[A




 50%|████▉     | 431/865 [01:35<01:31,  4.72it/s][A[A[A[A[A




 50%|████▉     | 432/865 [01:35<01:29,  4.83it/s][A[A[A[A[A




 50%|█████     | 433/865 [01:36<01:28,  4.89it/s][A[A[A[A[A




 50%|█████     | 434/865 [01:36<01:27,  4.92it/s][A[A[A[A[A




 50%|█████     | 435/865 [01:36<01:27,  4.91it/s][A[A[A[A[A




 50%|█████     | 436/865 [01:36<01:36,  4.45it/s][A[A[A[A[A




 51%|█████     | 437/865 [01:37<01

 63%|██████▎   | 541/865 [02:03<01:03,  5.11it/s][A[A[A[A[A




 63%|██████▎   | 542/865 [02:03<01:03,  5.12it/s][A[A[A[A[A




 63%|██████▎   | 543/865 [02:03<01:02,  5.16it/s][A[A[A[A[A




 63%|██████▎   | 544/865 [02:03<01:02,  5.15it/s][A[A[A[A[A




 63%|██████▎   | 545/865 [02:03<01:02,  5.15it/s][A[A[A[A[A




 63%|██████▎   | 546/865 [02:04<01:03,  5.03it/s][A[A[A[A[A




 63%|██████▎   | 547/865 [02:04<01:14,  4.28it/s][A[A[A[A[A




 63%|██████▎   | 548/865 [02:04<01:12,  4.37it/s][A[A[A[A[A




 63%|██████▎   | 549/865 [02:04<01:10,  4.48it/s][A[A[A[A[A




 64%|██████▎   | 550/865 [02:04<01:08,  4.59it/s][A[A[A[A[A




 64%|██████▎   | 551/865 [02:05<01:06,  4.71it/s][A[A[A[A[A




 64%|██████▍   | 552/865 [02:05<01:04,  4.82it/s][A[A[A[A[A




 64%|██████▍   | 553/865 [02:05<01:03,  4.89it/s][A[A[A[A[A




 64%|██████▍   | 554/865 [02:05<01:02,  5.01it/s][A[A[A[A[A




 64%|██████▍   | 555/865 [02:05<01

 76%|███████▌  | 659/865 [02:27<00:42,  4.87it/s][A[A[A[A[A




 76%|███████▋  | 660/865 [02:28<00:41,  4.94it/s][A[A[A[A[A




 76%|███████▋  | 661/865 [02:28<00:40,  5.02it/s][A[A[A[A[A




 77%|███████▋  | 662/865 [02:28<00:41,  4.95it/s][A[A[A[A[A




 77%|███████▋  | 663/865 [02:28<00:40,  5.04it/s][A[A[A[A[A




 77%|███████▋  | 664/865 [02:28<00:43,  4.67it/s][A[A[A[A[A




 77%|███████▋  | 665/865 [02:29<00:41,  4.85it/s][A[A[A[A[A




 77%|███████▋  | 666/865 [02:29<00:40,  4.96it/s][A[A[A[A[A




 77%|███████▋  | 667/865 [02:29<00:40,  4.92it/s][A[A[A[A[A




 77%|███████▋  | 668/865 [02:29<00:39,  5.02it/s][A[A[A[A[A




 77%|███████▋  | 669/865 [02:29<00:38,  5.09it/s][A[A[A[A[A




 77%|███████▋  | 670/865 [02:30<00:37,  5.15it/s][A[A[A[A[A




 78%|███████▊  | 671/865 [02:30<00:39,  4.85it/s][A[A[A[A[A




 78%|███████▊  | 672/865 [02:30<00:38,  4.96it/s][A[A[A[A[A




 78%|███████▊  | 673/865 [02:30<00

 90%|████████▉ | 777/865 [02:52<00:19,  4.61it/s][A[A[A[A[A




 90%|████████▉ | 778/865 [02:52<00:18,  4.79it/s][A[A[A[A[A




 90%|█████████ | 779/865 [02:52<00:17,  4.84it/s][A[A[A[A[A




 90%|█████████ | 780/865 [02:52<00:17,  4.92it/s][A[A[A[A[A




 90%|█████████ | 781/865 [02:53<00:18,  4.63it/s][A[A[A[A[A




 90%|█████████ | 782/865 [02:53<00:18,  4.59it/s][A[A[A[A[A




 91%|█████████ | 783/865 [02:53<00:17,  4.76it/s][A[A[A[A[A




 91%|█████████ | 784/865 [02:53<00:16,  4.84it/s][A[A[A[A[A




 91%|█████████ | 785/865 [02:53<00:16,  4.92it/s][A[A[A[A[A




 91%|█████████ | 786/865 [02:54<00:18,  4.28it/s][A[A[A[A[A




 91%|█████████ | 787/865 [02:54<00:18,  4.23it/s][A[A[A[A[A




 91%|█████████ | 788/865 [02:54<00:17,  4.34it/s][A[A[A[A[A




 91%|█████████ | 789/865 [02:54<00:16,  4.56it/s][A[A[A[A[A




 91%|█████████▏| 790/865 [02:55<00:16,  4.64it/s][A[A[A[A[A




 91%|█████████▏| 791/865 [02:55<00

In [7]:
#06/24 added for test dataset 
subsets = ['test']

#this code is to generate bounding box and percent of lung mask for test dataset
from tqdm import tqdm
debug = False 

c = 'covid' #all unknown for test dataset 

for subset in subsets: 
    
    slice_lens = dict() 
    
    label_file = covidx_dir+'{}_ICCV_MAI.txt'.format(subset)
    fnames, classes, bboxes,ratios,lines = load_labels_covidx(label_file)

    list_file =  data_list_dir + '{}_rgb_split{}.txt'.format(subset,split) 
    print(list_file) 
    
    fp = open(list_file,'w')  
  
    
    #use the following code on test datset 
    testsubsets = ["subset{}".format(x) for x in range(1,9)]
    print(testsubsets) 
    
    annots = dict()
    
    for testset in testsubsets:     

        print(testset)
        
        slice_lens[testset] = []
        
        annots[testset] = [] 
        
        subset_dir = 'test/' + testset     
        
        scan_dirs = os.listdir(covidx_img_dir + subset_dir)
        
        print(subset_dir)    
        
        
        pbar = tqdm(total=len(scan_dirs))
        
        for s in scan_dirs:
            
            pbar.update() 
            
            s_dir = subset_dir  + '/' + s         
            #print(s_dir)     
            
            s_files = [x for x in lines if testset in x and x.split()[0].split('/')[2] == s]           
            
            s_files.sort(key = lambda x: int(x.split()[0].split('/')[-1].split('.')[0]) )
            
            s_ratios = [float(x.split()[-1]) for x in s_files]   
            
            #print(s_files)
            #print(ratios)
            
            s_ratio_max = np.max(s_ratios) 
            #print(s_ratio_max)        
            

            for thresh_ind in range(7,0,-1):
                #print(thresh_ind)
                thresh = thresh_ind/10 
                s_ind = np.where(s_ratios >= s_ratio_max*thresh)[0]  
                
                if len(s_ind)>=2:
                    if s_ind[-1]-s_ind[0] >=0: 
                        break 
            
            slice_lens[testset].append(s_ind[-1]-s_ind[0])    
                

            xmins  = [float(x.split()[2]) for x in s_files[s_ind[0]:s_ind[-1]+1]]    
            ymins  = [float(x.split()[3]) for x in s_files[s_ind[0]:s_ind[-1]+1]]    
            xmaxs  = [float(x.split()[4]) for x in s_files[s_ind[0]:s_ind[-1]+1]]    
            ymaxs  = [float(x.split()[5]) for x in s_files[s_ind[0]:s_ind[-1]+1]]              
            
            xmin = int(np.min(xmins))
            ymin = int(np.min(ymins))
            xmax = int(np.max(xmaxs))   
            ymax = int(np.max(ymaxs))
            
            #print(xmin,ymin,xmax,ymax)             
            #print(s_ind) 
            
            s_dir = "{}/{}/{}".format(subset,testset,s)             
            line = "{} {} {} {} {} {} {} {}\n".format(s_dir,c,s_ind[0],s_ind[-1],xmin,ymin,xmax,ymax)
            #print(line)            
            fp.write(line)     
            #input('dbg')
        
        print(len(slice_lens[testset]),max(slice_lens[testset]),min(slice_lens[testset]))
        
    fp.close()




  0%|          | 0/450 [00:00<?, ?it/s][A[A

  1%|          | 3/450 [00:00<00:24, 18.33it/s][A[A

/media/ubuntu/MyHDataStor2/products/LateTemporalModeling3DCNN/datasets/settings/covid/test_rgb_split6.txt
['subset1', 'subset2', 'subset3', 'subset4', 'subset5', 'subset6', 'subset7', 'subset8']
subset1
test/subset1




  1%|          | 5/450 [00:00<00:28, 15.80it/s][A[A

  2%|▏         | 7/450 [00:00<00:30, 14.47it/s][A[A

  2%|▏         | 9/450 [00:00<00:32, 13.67it/s][A[A

  2%|▏         | 11/450 [00:00<00:33, 13.20it/s][A[A

  3%|▎         | 13/450 [00:00<00:33, 12.90it/s][A[A

  3%|▎         | 15/450 [00:01<00:34, 12.71it/s][A[A

  4%|▍         | 17/450 [00:01<00:34, 12.57it/s][A[A

  4%|▍         | 19/450 [00:01<00:34, 12.43it/s][A[A

  5%|▍         | 21/450 [00:01<00:34, 12.32it/s][A[A

  5%|▌         | 23/450 [00:01<00:34, 12.30it/s][A[A

  6%|▌         | 25/450 [00:01<00:34, 12.25it/s][A[A

  6%|▌         | 27/450 [00:02<00:34, 12.21it/s][A[A

  6%|▋         | 29/450 [00:02<00:34, 12.19it/s][A[A

  7%|▋         | 31/450 [00:02<00:34, 12.19it/s][A[A

  7%|▋         | 33/450 [00:02<00:34, 12.17it/s][A[A

  8%|▊         | 35/450 [00:02<00:34, 12.15it/s][A[A

  8%|▊         | 37/450 [00:02<00:34, 12.13it/s][A[A

  9%|▊         | 39/450 [00:03<00:33, 12.10it/s]

KeyboardInterrupt: 