In [1]:
import pandas as pd
import shutil
import os
import open_clip
import torch
import pickle
# from transformers import (
#     AutoModelForCausalLM,
#     AutoTokenizer,
#     BitsAndBytesConfig,
#     pipeline
# )
import json
from tqdm import tqdm
# from transformers.pipelines.pt_utils import KeyDataset

  from .autonotebook import tqdm as notebook_tqdm


In [15]:
def jsonConvert(json_list, key_field):
    result_dict = {}
    for obj in json_list:
        key = obj.get(key_field)
        if key is not None:
            obj_copy = {k: v for k, v in obj.items() if k != key_field}
            result_dict[key] = obj_copy
        else:
            raise KeyError(f"Key '{key_field}' not found in JSON object: {obj}")
    return result_dict

In [2]:
randomseed = 420

In [3]:
test_data = pd.read_csv('../datasets/NWPU-Captions/test.csv', sep='\t')
test_data_onedesc = test_data.groupby('filepath', as_index=False).sample(1, random_state=randomseed).reset_index(drop=True)

In [4]:
airplane_df = test_data_onedesc[test_data_onedesc['class']=='airplane'].sample(25, random_state=randomseed).reset_index(drop=True)
airplane_df['class_id'] = 0
ship_df = test_data_onedesc[test_data_onedesc['class']=='ship'].sample(25, random_state=randomseed).reset_index(drop=True)
ship_df['class_id'] = 1
basketball_df = test_data_onedesc[test_data_onedesc['class']=='basketball_court'].sample(25, random_state=randomseed).reset_index(drop=True)
basketball_df['class_id'] = 2
bridge_df = test_data_onedesc[test_data_onedesc['class']=='bridge'].sample(25, random_state=randomseed).reset_index(drop=True)
bridge_df['class_id'] = 3

In [5]:
four_class_df = pd.concat([airplane_df, ship_df, basketball_df, bridge_df], ignore_index=True).sample(random_state=randomseed, frac=1)

In [6]:
dest = '../datasets/PrioEval'

for i,row in four_class_df.iterrows():
    file_path = row['filepath']
    # Extract the filename from the file path
    file_name = os.path.basename(file_path)
    
    # Construct the full destination path
    destination_path = os.path.join(dest, file_name)
    
    # Copy the file to the destination directory
    shutil.copy(file_path, destination_path)
    row['filepath'] = destination_path


In [7]:
four_class_df.to_csv('./PrioEval.csv', sep='\t')

In [10]:
four_class_df['caption']

12    An aerial photograph with description: A plane...
48    An aerial photograph with description: The car...
41    An aerial photograph with description: An oran...
21    An aerial photograph with description: There a...
45    An aerial photograph with description: A light...
                            ...                        
31    An aerial photograph with description: A red a...
63    An aerial photograph with description: There a...
6     An aerial photograph with description: Two pla...
72    An aerial photograph with description: Two bas...
49    An aerial photograph with description: There a...
Name: caption, Length: 100, dtype: object

In [3]:
clip_eval_captions = [
    "An aerial photograph with description: A plane on the ground.",
    "An aerial photograph with description: A ship or boat in the water.",
    "An aerial photograph with description: A basketball court is present.",
    "An aerial photograph with description: A bridge is present."
]

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Torch device: ', device)
base_model = 'ViT-B-16'
model, _, preprocess = open_clip.create_model_and_transforms(base_model, pretrained=f'./CLIP/{base_model}.pt')
model.eval()  # model in train mode by default, impacts some models with BatchNorm or stochastic depth active
tokenizer = open_clip.get_tokenizer(base_model)
text_tokens = tokenizer(clip_eval_captions)
with torch.no_grad():
        text_features = model.encode_text(text_tokens).float()
        text_features /= text_features.norm(dim=-1, keepdim=True)

Torch device:  cuda


  checkpoint = torch.load(checkpoint_path, map_location=map_location)


In [5]:
with open(f'./CLIP/desc_{base_model}.pkl', 'wb') as file:
    pickle.dump(text_features, file)

In [7]:
obb_eval_captions = [
    'A remote sensing image containing more than 2 planes with any average distance.',
    'A remote sensing image containing more than 2 ships with any average distance.',
    'A remote sensing image containing more than 2 backetball courts with any average distance.',
    'A remote sensing image containing more than 2 bridges with any average distance.'
]

In [3]:
base_model = '../OBB/llama2_dota'

compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.31s/it]


In [20]:
base_prompt = "Genereate the object bounding box properties for a remote sensing image with the following description as JSON only: "
characters_to_remove = '` \n'
translation_table = str.maketrans('', '', characters_to_remove)

errors = 0
y_pred = []
prompts = []
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=546, truncation=True)
for desc in obb_eval_captions:
    prompt = base_prompt + str(desc)
    prompts.append({'text':f"<s>[INST] {prompt} [/INST]"})

for result in tqdm(pipe(KeyDataset(prompts, 'text'))):
    try:
        json_only_result = str(result[0]['generated_text'].split('[/INST]')[1]).translate(translation_table).replace("'", '"').replace("None", "0")
        index = json_only_result.find(']')
        json_only_result = json.loads(json_only_result[:index+1])
        y_pred.append(jsonConvert(json_only_result, 'class'))
    except:
        y_pred.append({})
        errors += 1

100%|██████████| 4/4 [00:02<00:00,  1.90it/s]


In [21]:
y_pred

[{'plane': {'count': 3, 'avg_dist': 0}},
 {'ship': {'count': 2, 'avg_dist': 0}},
 {'backetball-court': {'count': 3, 'avg_dist': 0}},
 {'bridge': {'count': 3, 'avg_dist': 0}}]

In [22]:
with open('./YOLO/desc.pkl', 'wb') as file:
    pickle.dump(y_pred, file)

In [7]:
pd.read_csv('./PrioEval.csv', sep='\t').drop(labels=['Unnamed: 0'], axis=1).to_csv('./PrioEval.csv', sep='\t', index=False)