[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jayhansuh/COLAB-FILES/blob/main/12lbs/adCLIP-gritbench.ipynb)

# Adapting-CLIP

In [14]:
######## Mount the drive ########
from google.colab import drive
drive.mount('/content/drive/')

######## DIRPATH ########
DIRPATH_GRIT = '/content/drive/MyDrive/grit_official/'
DIRPATH_DATA = DIRPATH_GRIT + 'data/downloaded_logs/default_job/data/downloaded/GRIT/'
DIRPATH_LOCAL = '/content/GRIT-LOCAL/'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
######## Install the dependencies ########
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

In [16]:
import os
import json
from PIL import Image

## Load images from Google Drive 

In [17]:
# Load GRIT data
if(not os.path.exists(DIRPATH_LOCAL)):
    os.makedirs(DIRPATH_LOCAL)
    %cd /content/
    !cp /content/drive/MyDrive/COLAB-FILES/12lbs/grit_local.tar.gz /content/
    !tar -xzf grit_local.tar.gz    

In [18]:
%cd /content/drive/MyDrive/adapting-CLIP/

#import argparse
#import os.path as osp
from tqdm import tqdm
import numpy as np
import torch
from models.slic_vit import SLICViT
from models.ss_baseline import SSBaseline
from models.resnet_high_res import ResNetHighRes
from utils.zsg_data import FlickrDataset, VGDataset
from utils.grounding_evaluator import GroundingEvaluator

/content/drive/MyDrive/adapting-CLIP


In [19]:
# ! python eval.py --model vit14 --dataset flickr_s1_val --iou_thr 0.5 --num_samples 500

model = SLICViT
args = {
    'model': 'vit14',
    'alpha': 0.75,
    'aggregation': 'mean',
    'n_segments': list(range(100, 601, 50)),
    'temperature': 0.02,
    'upsample': 2,
    'start_block': 0,
    'compactness': 50,
    'sigma': 0,
}
dataset_full = FlickrDataset(data_type='flickr30k_c1/val')
model = model(**args).cuda()
#model.eval()



## Load json file

In [20]:
# Chekc the images in the json file
with open(os.path.join(DIRPATH_DATA,'samples/ablation/localization.json')) as f:
    data = json.load(f)

    # Example of the json element
    # [
    #     {
    #         "example_id": "coco_loc_test-reserve_spoon_527067",
    #         "image_id": "coco/test2015/COCO_test2015_000000527067.jpg",
    #         "output_options": null,
    #         "task_bbox": null,
    #         "task_name": "localization",
    #         "task_query": "spoon"
    #     },
    #     ... 
    # ]

In [21]:
# Check the number of the images
print(len(data))

# Check if the images are in the folder using vectorization
def checkfile(di):
  return os.path.exists(os.path.join(DIRPATH_LOCAL,'images',di['image_id']))
cnt = len(list(filter(checkfile,data)))
print("ALL THE IMAGES EXISTING - ",cnt==len(data),cnt,len(data))

# Check the types of the images need to be downloaded
folderset=set({})
for di in data:
  folderset.add("/".join(di['image_id'].split('/')[:-1]))
  # check if the image exists
folderset

21078
ALL THE IMAGES EXISTING -  True 21078 21078


{'coco/test2015',
 'distorted/localization/coco/test2015',
 'nyuv2',
 'open_images/test'}

In [22]:
OUTPUT_DIR = '/content/drive/MyDrive/COLAB-FILES/12lbs/jsonfiles/'
# For the second session
# data = data[::-1]
# OUTPUT_DIR = '/content/drive/MyDrive/COLAB-FILES/12lbs/jsonfiles2/'

output = []
# Output format example
# [
#    {
#        "example_id" : str
#        "confidence" : float in [0,1]
#        "bboxes"     : 2d list of int [[x1,y1,x2,y2],...] # box coordinates, per instance
#    },
#    ...
#]

# Load previous results if exists
def readLatestJSON(OUTPUT_DIR):
  # Load previous results
  filelist = os.listdir(OUTPUT_DIR)
  # Naming format - f'ablation_localization_{len(output)//500}.json'
  filelist.sort(key=lambda x: int((x.split('.')[0]).split('_')[-1]))
  with open(os.path.join(OUTPUT_DIR,filelist[-1]),'r') as f:
      result = json.load(f)
  return result

output = readLatestJSON(OUTPUT_DIR)
print(len(output))

# Check if the output is complete
if(len(output)==len(data)):
  print("LOADED OUTPUT IS COMPLETE")

# Check image ids in the output
for i in range(len(output)):
  if(output[i]['example_id']!=data[i]['example_id']):
    print("LOADED OUTPUT HAS A MISMATCH")
    break

11500


In [None]:
## Predict
for json_chunk in tqdm(data[len(output):]):
    image_path = os.path.join(DIRPATH_LOCAL,'images',json_chunk['image_id'])
    if(not os.path.exists(image_path)):
        print("Image not found - ",image_path)
        break

    # Load the image
    image = np.array(Image.open(image_path).convert("RGB"))
    # Load the text
    text = json_chunk['task_query']
    
    # Predict
    pred_boxes, _ = model(image, text)

    # Append the result
    output_chunk = {
        "example_id" : json_chunk['example_id'],
        "confidence" : 0.5, #float(pred_scores.mean()),
        "bboxes"     : pred_boxes.tolist()
    }
    output.append(output_chunk)

    # Save the intermediate results
    if(len(output)%500==0):
        OUTPUT_FILENAME = os.path.join(OUTPUT_DIR, f'ablation_localization_{len(output)//500}.json')
        with open(OUTPUT_FILENAME, 'w') as f:
            json.dump(output, f)

# Submission file for GRIT-Bench

In [23]:
# Make the directory
DIRPATH_SUBMIT = '/content/drive/MyDrive/COLAB-FILES/12lbs/'
%cd $DIRPATH_SUBMIT
!mkdir ablation

/content/drive/MyDrive/COLAB-FILES/12lbs
mkdir: cannot create directory ‘ablation’: File exists


In [24]:
# Construct the prediction file
OUTPUT_DIR_1 = '/content/drive/MyDrive/COLAB-FILES/12lbs/jsonfiles/'
OUTPUT_DIR_2 = '/content/drive/MyDrive/COLAB-FILES/12lbs/jsonfiles2/'

result1 = readLatestJSON(OUTPUT_DIR_1)
result2 = readLatestJSON(OUTPUT_DIR_2)
# Check if the outputs are complete
if(len(result1)+len(result2)>=len(data)):
    print("LOADED OUTPUTS ARE COMPLETE")

    # Merge the two outputs
    result2 = result2[:(len(data)-len(result1))] # Remove the extra elements
    result2 = result2[::-1] # Reverse the order
    output = result1 + result2

    # Check image ids in the output
    for i in range(len(output)):
        if(output[i]['example_id']!=data[i]['example_id']):
            print("LOADED OUTPUT HAS A MISMATCH")
            break

    # Save the final results
    OUTPUT_FILENAME = os.path.join(DIRPATH_SUBMIT,'ablation/localization.json')
    with open(OUTPUT_FILENAME, 'w') as f:
        json.dump(output, f)

else:
    print("LOADED OUTPUTS ARE INCOMPLETE - ",len(result1)+len(result2),len(data))

LOADED OUTPUTS ARE COMPLETE


In [30]:
# Check the output
with open(os.path.join(DIRPATH_SUBMIT,'ablation/localization.json')) as f:
    data = json.dumps(json.load(f), indent=2)
    print(data[:500])

[
  {
    "example_id": "coco_loc_test-reserve_spoon_527067",
    "confidence": 0.5,
    "bboxes": [
      [
        234.2857208251953,
        73.00892639160156,
        571.4285888671875,
        478.8526916503906
      ]
    ]
  },
  {
    "example_id": "coco_loc_test-reserve_airplane_415398",
    "confidence": 0.5,
    "bboxes": [
      [
        31.428571701049805,
        117.35713958740234,
        545.7142944335938,
        363.4285583496094
      ]
    ]
  },
  {
    "example_id": "coco


In [25]:
# Count the number of the parameters(need for the GRIT submission)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total Parameters: {total_params}")
print(f"Trainable Parameters: {trainable_params}")

params_dict = {
    "params_in_millions": int(total_params/1e6),
}

OUTPUT_FILENAME = os.path.join(DIRPATH_SUBMIT,'ablation/params.json')
with open(OUTPUT_FILENAME, 'w') as f:
    json.dump(params_dict, f)

Total Parameters: 428402945
Trainable Parameters: 428402945


In [26]:
# Zip the directory
%cd $DIRPATH_SUBMIT
!zip -r ablation.zip ablation

/content/drive/MyDrive/COLAB-FILES/12lbs
  adding: ablation/ (stored 0%)
  adding: ablation/localization.json (deflated 84%)
  adding: ablation/params.json (stored 0%)
