In [1]:
from torch.utils.data import DataLoader
import os


import torch
from peft import PeftModel, PeftModelForCausalLM, prepare_model_for_int8_training, LoraConfig, get_peft_model
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel, AutoFeatureExtractor

from general_dataset import GeneralDataset
from agi_utils import *
from tqdm import tqdm
from undecorated import undecorated
from types import MethodType

import numpy as np
from IPython.utils import io
import random
from evaluate import load
from torchvision import transforms
from torchmetrics.multimodal import CLIPScore
from combine_model_seq import SeqCombine

import time


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /usr/local/cuda-11.2/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 112
CUDA SETUP: Loading binary /research/cbim/vast/zl502/anaconda3/envs/peft_agi/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda112.so...


  warn(msg)
  warn(f"Failed to load image Python extension: {e}")


In [2]:
"""
assign openagi data path 
"""
data_path = "YOUR_DATA_PATH"

task_discriptions = txt_loader("./task_description.txt")
test_task_idx = [2,3,10,15,20,35,45,55,65,70,70,90,106,107]
test_dataloaders = []
for i in tqdm(test_task_idx):
    dataset = GeneralDataset(i, data_path)
    dataloader = DataLoader(dataset, batch_size=20)
    test_dataloaders.append(dataloader)
    
test_tasks = [task_discriptions[i].strip() for i in test_task_idx]

100%|███████████████████████████████████████████| 14/14 [00:06<00:00,  2.21it/s]


In [3]:
# base_model = "eachadea/vicuna-7b-1.1"
base_model = "meta-llama/Llama-2-13b-chat-hf"
# base_model = "TheBloke/Llama-2-13B-chat-GGML"
# base_model = "chainyo/alpaca-lora-7b"
load_8bit = True

hf_token = "YOUR_HUGGINGFACE_KEY"

max_memory_mapping = {
    0: "48GB",
    1: "48GB",
    2: "48GB",
    3: "48GB",
    4: "0GB",
    5: "0GB",
    6: "0GB",
    # 7: "0GB",
}

# max_memory_mapping = {
#     0: "0GB",
#     1: "0GB",
#     2: "24GB",
#     3: "24GB",
# }

tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    use_auth_token=hf_token
    # padding_side='left'
)
# tokenizer.add_special_tokens({'pad_token': '<pad>'})
tokenizer.pad_token_id = 0

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    max_memory=max_memory_mapping,
    use_auth_token=hf_token
)

lora_weights = "YOUR_LORA_WEIGHTS"

model = PeftModelForCausalLM.from_pretrained(
    model,
    lora_weights,
    torch_dtype=torch.float16,
    is_trainable=False,
    device_map="auto",
    max_memory=max_memory_mapping,
)

model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

trainable params: 0 || all params: 13022417920 || trainable%: 0.0


In [5]:
import openai

openai.api_key = "YOUR_OPENAI_KEY"

def generate_module_list_with_gpt(generated_module_seq):
    todo_prompt = "You are a key phrase extractor who is able to extract potential module names from the given context. You have already known all the module names in the full module list. The full module list is: [Image Classification, Colorization, Object Detection, Image Deblurring, Image Denoising, Image Super Resolution, Image Captioning, Text to Image Generation, Visual Question Answering, Sentiment Analysis, Question Answering, Text Summarization, Machine Translation]. Given the following context: '{}'. Please extract a module sequence from this context and remove module names which do not exist in the full module list from this sequence. Output the module sequence after filtering as the format of 'module: module1, module: module2, module: module3, etc...'. "
    prompt = todo_prompt.format(generated_module_seq)

    completion = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
        {"role": "user", "content": prompt}
      ]
    )

    time.sleep(2)

    content = completion.choices[0].message["content"]
    
    # print(content)
    
    content = content.split("module: ")[1:]
    
    result = ""
    for c in content:
        result += c
    
    # result = result[:-1] if len(result) > 0 else result
    
    return result

# generated_module_list = generate_module_list_with_gpt(response[prompt_length:])
# print(generated_module_list)

In [6]:
"""
Loading Evaluation Metrics
"""

clip_score = CLIPScore(model_name_or_path="openai/clip-vit-base-patch16")


# Load a pre-trained Vision Transformer model and its feature extractor
vit_ckpt = "nateraw/vit-base-beans"
vit = AutoModel.from_pretrained(vit_ckpt)
vit.eval()
vit_extractor = AutoFeatureExtractor.from_pretrained(vit_ckpt)

f = transforms.ToPILImage()
bertscore = load("bertscore")

# device_list = ["cuda:1","cuda:2","cuda:3","cuda:4","cuda:5","cuda:7","cpu"]
device_list = ["cuda:5", "cpu"]
seqCombination = SeqCombine(device_list)

Some weights of the model checkpoint at nateraw/vit-base-beans were not used when initializing ViTModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at nateraw/vit-base-beans and are newly initialized: ['vit.pooler.dense.weight', 'vit.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fetching 21 files:   0%|          | 0/21 [00:00<?, ?it/s]

The config attributes {'scaling_factor': 0.18215} were passed to AutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.


In [9]:
from sentence_transformers import SentenceTransformer, util
sentence_model = SentenceTransformer('all-MiniLM-L6-v2', device="cpu")

module_length = 10
num_beams = 1
num_return_sequences = 1

eval_device = "cuda:4"

random_seeds = [0, 1, 2, 3, 4]

total_avg_clips = []
total_avg_berts = []
total_avg_similarities = []
total_avg_rewards = []

for idx, seed in enumerate(tqdm(random_seeds)):
    torch.manual_seed(seed)
    rewards = []
    clips = []
    berts = []
    similarities = []
    for i, task_description in enumerate(tqdm(test_tasks)):
        # if i == 1:
        #     break
            
        print(task_description)
        task_rewards = []
        with torch.no_grad():
            input_s = ["### Human: "+task_description+"\n### Assistant: "]
            input_ids = tokenizer.batch_encode_plus(
                input_s, padding="longest", return_tensors="pt"
            )["input_ids"].to(eval_device)
            output = model.generate(
                input_ids=input_ids,
                max_length=2048, 
                return_dict_in_generate=True, 
                output_scores=True, 
                num_beams=1,
                output_hidden_states=True,
                repetition_penalty=1.25
            )
    
        generated_seq = tokenizer.decode(
            output["sequences"][0], skip_special_tokens=True, temperature=0, top_p=0.8, repetition_penalty=1.25
        )

        # print(generated_seq)

        # generated_seq = generated_seq[len(input_s[0]):]
        
        # print(generated_seq)
        
        vicuna_steps = generate_module_list_with_gpt(generated_seq[len(input_s[0]):]).split(",")
        module_list = match_module_seq(vicuna_steps, sentence_model)
        # module_list = "Image Denoising, Image Deblurring, Colorization"
        print(module_list)
    
        if len(module_list) >= 1 and whole_module_seq_filter(module_list, test_task_idx[i]):
            seqCombination.construct_module_seq(module_list)
    
            for idx, batch in tqdm(enumerate(test_dataloaders[i])):
                inputs = list(batch['input'][0])
                # print("Inputs: ", inputs)
                predictions = seqCombination.run_module_seq(inputs)
                # try:
                #     predictions = seqCombination.run_module_seq(inputs)
                #     print(prediction)
                # except:
                #     ave_task_reward = 0
                #     break
    
                if 0 <= test_task_idx[i] <= 14:
                    outputs = list(batch['output'][0])
                    dist = image_similarity(predictions, outputs, vit, vit_extractor)
                    task_rewards.append(dist / 100)
                elif 15 <= test_task_idx[i] <= 104 or 107 <= test_task_idx[i]:
                    outputs = list(batch['output'][0])
                    f1 = np.mean(txt_eval(predictions, outputs, bertscore, device=eval_device))
                    
                    task_rewards.append(f1)
                else:
                    score = clip_score(predictions, inputs)
                    task_rewards.append(score.detach()/100)
                    
            ave_task_reward = np.mean(task_rewards)    
            seqCombination.close_module_seq()
                
        else:
            ave_task_reward = 0
    
        print(ave_task_reward)
            
        if 0 <= test_task_idx[i] <= 14:
            similarities.append(ave_task_reward)
        elif 15 <= test_task_idx[i] <= 104 or 107 <= test_task_idx[i]:
            berts.append(ave_task_reward)
        else:
            clips.append(ave_task_reward)
    
        rewards.append(ave_task_reward)     

    avg_clips = np.mean(clips)
    avg_berts = np.mean(berts)
    avg_similarities = np.mean(similarities)
    avg_rewards = (avg_clips + avg_berts + avg_similarities) / 3

    print([avg_clips, avg_berts, avg_similarities, avg_rewards])

    total_avg_clips.append(avg_clips)
    total_avg_berts.append(avg_berts)
    total_avg_similarities.append(avg_similarities)
    total_avg_rewards.append(avg_rewards)
    # print([avg_clips, avg_berts, avg_similarities, avg_rewards])

print([total_avg_clips, total_avg_berts, total_avg_similarities, total_avg_rewards])

print([np.mean(total_avg_clips), np.mean(total_avg_berts), np.mean(total_avg_similarities), np.mean(total_avg_rewards)])

print("Finished testing!")    

  0%|                                                     | 0/1 [00:00<?, ?it/s]
  0%|                                                    | 0/14 [00:00<?, ?it/s][A

Given low-resolutioned blurry grayscale image, how to return the regular image step by step?
Image Denoising, Image Deblurring, Colorization




0it [00:00, ?it/s][A[A

1it [00:12, 12.12s/it][A[A

2it [00:24, 12.34s/it][A[A

3it [00:35, 11.79s/it][A[A

4it [00:47, 11.61s/it][A[A

5it [00:58, 11.71s/it][A[A

  7%|███                                        | 1/14 [02:04<26:59, 124.57s/it][A

0.805744842529297
Given blurry grayscale image, how to return the regular image step by step?
Image Denoising, Colorization




0it [00:00, ?it/s][A[A

1it [00:10, 10.13s/it][A[A

2it [00:18,  9.22s/it][A[A

3it [00:28,  9.32s/it][A[A

4it [00:37,  9.45s/it][A[A

5it [00:46,  9.25s/it][A[A

 14%|██████▏                                    | 2/14 [04:19<26:06, 130.54s/it][A

0.7587722473144531
Given low-resolutioned blurry image, how to return the regular image step by step?
Image Deblurring, Image Super Resolution




0it [00:00, ?it/s][A[A

1it [00:17, 17.50s/it][A[A

2it [00:33, 16.45s/it][A[A

3it [00:49, 16.25s/it][A[A

4it [01:04, 15.85s/it][A[A

5it [01:21, 16.39s/it][A[A

 21%|█████████▏                                 | 3/14 [06:59<26:26, 144.24s/it][A

0.6047112579345704
Given low-resolutioned noisy blurry grayscale image, how to return the caption in German step by step?
Image Denoising, Colorization, Object Detection, Machine Translation




0it [00:00, ?it/s][A[A

1it [00:09,  9.06s/it][A[A

2it [00:17,  8.50s/it][A[A

3it [00:25,  8.40s/it][A[A

4it [00:33,  8.32s/it][A[A

5it [00:41,  8.35s/it][A[A

 29%|████████████▎                              | 4/14 [09:45<25:26, 152.69s/it][A

0.46778397977352143
Given low-resolutioned noisy blurry grayscale image, how to return the object names in English step by step?
Image Denoising, Image Deblurring, Colorization, Object Detection, Machine Translation




0it [00:00, ?it/s][A[A

1it [00:09,  9.01s/it][A[A

2it [00:18,  9.36s/it][A[A

3it [00:30, 10.35s/it][A[A

4it [00:41, 10.77s/it][A[A

5it [00:53, 10.61s/it][A[A

 36%|███████████████▎                           | 5/14 [11:42<20:58, 139.87s/it][A

0.4233705446124077
Given blurry grayscale image, how to return the object names in German step by step?
Image Denoising, Colorization, Object Detection, Machine Translation




0it [00:00, ?it/s][A[A

1it [00:08,  8.48s/it][A[A

2it [00:15,  7.52s/it][A[A

3it [00:24,  8.32s/it][A[A

4it [00:33,  8.61s/it][A[A

5it [00:42,  8.57s/it][A[A

 43%|██████████████████▍                        | 6/14 [14:49<20:45, 155.70s/it][A

0.5442356157302857
Given noisy grayscale image, how to return the caption in German step by step?



 50%|█████████████████████▌                     | 7/14 [16:56<17:05, 146.51s/it][A

Image Denoising, Object Detection, Image Classification, Text to Image Generation, Machine Translation
0
Given low-resolutioned grayscale image, how to return the class label in English step by step?



 57%|████████████████████████▌                  | 8/14 [18:29<12:57, 129.56s/it][A

Colorization, Object Detection, Image Classification
0
Given low-resolutioned noisy blurry image, how to return the object names in German step by step?
Image Denoising, Image Deblurring, Colorization, Object Detection














5it [00:51, 10.31s/it][A[A

 64%|███████████████████████████▋               | 9/14 [21:05<11:28, 137.79s/it][A

0.24602972485125063
Given noisy blurry image, how to return the class label in German step by step?



 71%|██████████████████████████████            | 10/14 [21:35<06:57, 104.34s/it][A

Image Denoising, Image Deblurring, Colorization
0
Given noisy blurry image, how to return the class label in German step by step?



 79%|█████████████████████████████████         | 11/14 [23:40<05:32, 110.68s/it][A

Image Denoising, Colorization, Object Detection, Image Classification, Machine Translation
0
Given low-resolutioned noisy image, how to return the caption in English step by step?
Image Denoising, Image Deblurring, Colorization, Object Detection, Machine Translation




0it [00:00, ?it/s][A[A

1it [00:11, 11.00s/it][A[A

2it [00:22, 11.31s/it][A[A

3it [00:31, 10.28s/it][A[A

4it [00:41, 10.00s/it][A[A

5it [00:52, 10.41s/it][A[A

 86%|████████████████████████████████████      | 12/14 [26:00<03:59, 119.80s/it][A

0.48116223186254503
Given English text, how to generate a image step by step?



 93%|███████████████████████████████████████   | 13/14 [27:28<01:50, 110.01s/it][A

Text to Image Generation, Sentiment Analysis, Object Detection
0
Given clozed English text, how to return the summarization in German step by step?
Machine Translation, Text Summarization




0it [00:00, ?it/s][A[A

1it [00:10, 10.42s/it][A[A

2it [00:20, 10.40s/it][A[A

3it [00:29,  9.64s/it][A[A

4it [00:41, 10.67s/it][A[A

5it [00:55, 11.18s/it][A[A

100%|██████████████████████████████████████████| 14/14 [29:00<00:00, 124.30s/it][A
100%|███████████████████████████████████████████| 1/1 [29:00<00:00, 1740.15s/it]

0.1968381667137146
[0.0, 0.23594202635437247, 0.7230761159261068, 0.31967271409349307]
[[0.0], [0.23594202635437247], [0.7230761159261068], [0.31967271409349307]]
[0.0, 0.23594202635437247, 0.7230761159261068, 0.31967271409349307]
Finished testing!





In [3]:
s1 = np.array([0.0, 0.18858468844741583, 0.801707077026367, 0.3300972551579276])
s2 = np.array([0.0, 0.2142740244194865, 0.7530156758626302, 0.3224299000940389])
s3 = np.array([0.3124078, 0.31503928893804545, 0.8067062479654948, 0.47805110950602425])
s4 = np.array([0.29803005, 0.24689940550178288, 0.7077802251180013, 0.417569893076188])
s5 = np.array([0.0, 0.23594202635437247, 0.7230761159261068, 0.31967271409349307])

avg_s = (s1 + s2 + s3 + s4 + s5) / 5

print(avg_s)

[0.12208757 0.24014789 0.75845707 0.37356417]
