# Keyword Bias Generation
### Install packages

In [1]:
%%capture
import sys

!{sys.executable} -m pip install spacy
!{sys.executable} -m pip install openai
!{sys.executable} -m spacy download en
!{sys.executable} -m pip install --upgrade diffusers[torch] -q
!{sys.executable} -m pip install -U autotrain-advanced

### Get prompt keywords

In [2]:
import spacy
nlp = spacy.load('en_core_web_sm')

def retrieve_keywords(doc):
    keywords = []
    for token in doc:
        if token.pos_ in {'NOUN', 'VERB', 'PROPN', 'ADJ'}:
            keywords.append(token.text)
        elif token.ent_type_ in {'NORP'}:
            keywords.append(token.text)
    return keywords

In [3]:
prompt = "A female Malaysian is eating."

In [4]:
doc = nlp(prompt)
keywords = retrieve_keywords(doc)
keywords

['female', 'Malaysian', 'eating']

### Get biases related to prompt

In [5]:
from openai import OpenAI

client = OpenAI(
    api_key="sk-jJJVpWQAtoKpzNCOUr4F83gSgGUFX9lH5a4UjoItYnI3Ylv5",
    base_url="https://api.chatanywhere.tech/v1"
)

In [6]:
def gpt_35_api(messages: list):
    completion = client.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
    return completion.choices[0].message.content

In [7]:
# Few shot prompting
bias_inst = """
### Instructions ###
Your task is to provide common biases related to the given keywords and say nothing else.

Output only the keyword and its associated biases where each bias is separated with commas, as shown in the format below.

### Format ###
keyword 1: bias 1, bias 2, bias 3 ...
keyword 2: bias 1, bias 2, bias 3 ...
keyword 3: bias 1, bias 2, bias 3 ...

### Keywords ###
{keywords}
""".format(keywords=', '.join(keywords))
bias_inst

'\n### Instructions ###\nYour task is to provide common biases related to the given keywords and say nothing else.\n\nOutput only the keyword and its associated biases where each bias is separated with commas, as shown in the format below.\n\n### Format ###\nkeyword 1: bias 1, bias 2, bias 3 ...\nkeyword 2: bias 1, bias 2, bias 3 ...\nkeyword 3: bias 1, bias 2, bias 3 ...\n\n### Keywords ###\nfemale, Malaysian, eating\n'

In [8]:
messages = [{'role': 'user','content': bias_inst}]
result = gpt_35_api(messages)
result

'female: gender bias, stereotype threat, confirmation bias  \nMalaysian: ethnic bias, cultural bias, stereotype threat  \neating: food bias, dietary bias, confirmation bias'

In [9]:
def convert2Dict(inp):
    entries = inp.split('\n')
    result_dict = {}
    for entry in entries:
        key, values = entry.split(': ')
        values_list = [value.strip() for value in values.split(',')]
        result_dict[key] = values_list
    return result_dict

key_bias = convert2Dict(result)
key_bias

{'female': ['gender bias', 'stereotype threat', 'confirmation bias'],
 'Malaysian': ['ethnic bias', 'cultural bias', 'stereotype threat'],
 'eating': ['food bias', 'dietary bias', 'confirmation bias']}

# Input Prompt Images
### Set up SDXL

In [10]:
import torch

# Check if GPU is available
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

NVIDIA A100 80GB PCIe MIG 3g.40gb


In [11]:
from diffusers import DiffusionPipeline, AutoencoderKL

vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix",
    torch_dtype=torch.float16
)
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
)
pipe.to("cuda");



### Generate 10 images from the prompt

In [None]:
image = pipe(prompt=prompt, num_inference_steps=25, num_images_per_prompt = 10)

In [None]:
save_path = "./gen_img"

In [None]:
from PIL import Image
import os


def image_grid(imgs, rows, cols, resize=256):
    assert len(imgs) == rows * cols

    if resize is not None:
        imgs = [img.resize((resize, resize)) for img in imgs]

    w, h = imgs[0].size
    grid_w, grid_h = cols * w, rows * h
    grid = Image.new("RGB", size=(grid_w, grid_h))

    # Check if save_path exists
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    for i, img in enumerate(imgs):
        img.save(f"{save_path}/{i}.png")
        x = i % cols * w
        y = i // cols * h
        grid.paste(img, box=(x, y))

    return grid

In [None]:
image_grid(image.images, 2, 5)

# Generated Images VQA
### Set up MiniGPT-v2

In [None]:
%%capture

import os
import random
import re
import torch
import html
import numpy as np
from PIL import Image
import cv2
import torchvision.transforms as T
from collections import defaultdict
from minigpt4.common.config import Config
from minigpt4.common.registry import registry
from minigpt4.conversation.conversation import Conversation, SeparatorStyle, Chat
import torch.backends.cudnn as cudnn
import argparse

In [None]:
#Initialize random seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
cudnn.benchmark = False
cudnn.deterministic = True

In [None]:
# Configuration and model loading
def initialize_model(config_path, gpu_id):
    args = argparse.Namespace(cfg_path=config_path, gpu_id=gpu_id, options=None)
    print(args)
    cfg = Config(args)
    device = f'cuda:{args.gpu_id}'
    model_config = cfg.model_cfg
    model_config.device_8bit = args.gpu_id
    model_cls = registry.get_model_class(model_config.arch)
    model = model_cls.from_config(model_config).to(device)
    model = model.eval()
    vis_processor_cfg = cfg.datasets_cfg.cc_sbu_align.vis_processor.train
    vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)
    return model, vis_processor, device

# Upload and process image
def process_image(image_path, vis_processor):
    image = Image.open(image_path).convert("RGB")
    image_tensor = vis_processor(image).unsqueeze(0).to(device)
    return image_tensor

# Provide a prompt to return output
def ask_question(model, vis_processor, device, image_tensor, prompt):
    chat = Chat(model, vis_processor, device=device)
    chat_state = Conversation(
        system="",
        roles=(r"<s>[INST] ", r" [/INST]"),
        messages=[],
        offset=2,
        sep_style=SeparatorStyle.SINGLE,
        sep="",
    )
    img_list = [image_tensor]

    chat.upload_img(image_tensor, chat_state, img_list)
    chat.ask(prompt, chat_state)

    llm_message = chat.answer(conv=chat_state, img_list=img_list, temperature=0.6, max_new_tokens=500, max_length=2000)[0]
    return llm_message

# Main function to use the above utilities
def main(image_path, prompt, config_path='./eval_configs/minigptv2_eval.yaml', gpu_id=0):
    model, vis_processor, device = initialize_model(config_path, gpu_id)
    image_tensor = process_image(image_path, vis_processor)
    answer = ask_question(model, vis_processor, device, image_tensor, prompt)
    return answer

In [None]:
gen_image_path = "./gen_img/0.png"
prompt = "Describe this image in detail."
output = main(gen_image_path, prompt)
output

### Set up MiniGPT 4

In [None]:
import argparse
import os
import random

import numpy as np
import torch
import torch.backends.cudnn as cudnn
from PIL import Image

from transformers import StoppingCriteriaList

from minigpt4.common.config import Config
from minigpt4.common.dist_utils import get_rank
from minigpt4.common.registry import registry
from minigpt4.conversation.conversation import Chat, CONV_VISION_Vicuna0, CONV_VISION_LLama2, StoppingCriteriaSub

# imports modules for registration
from minigpt4.datasets.builders import *
from minigpt4.models import *
from minigpt4.processors import *
from minigpt4.runners import *
from minigpt4.tasks import *

In [None]:
# Function to parse arguments
def parse_args(args_list):
    parser = argparse.ArgumentParser(description="Demo")
    parser.add_argument("--cfg-path", required=True, help="path to configuration file.")
    parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
    parser.add_argument(
        "--options",
        nargs="+",
        help="override some settings in the used config, the key-value pair "
        "in xxx=yyy format will be merged into config file (deprecate), "
        "change to --cfg-options instead.",
    )
    args = parser.parse_args(args_list)
    return args

# Function to set up seeds
def setup_seeds(config):
    seed = config.run_cfg.seed + get_rank()

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    cudnn.benchmark = False
    cudnn.deterministic = True

# Function to initialize the model
def initialize_model(cfg_path, gpu_id):
    args = parse_args(['--cfg-path', cfg_path, '--gpu-id', str(gpu_id)])
    cfg = Config(args)
    
    setup_seeds(cfg)
    
    model_config = cfg.model_cfg
    model_config.device_8bit = args.gpu_id
    model_cls = registry.get_model_class(model_config.arch)
    model = model_cls.from_config(model_config).to('cuda:{}'.format(args.gpu_id))
    
    conv_dict = {'pretrain_vicuna0': CONV_VISION_Vicuna0, 'pretrain_llama2': CONV_VISION_LLama2}
    CONV_VISION = conv_dict[model_config.model_type]
    
    vis_processor_cfg = cfg.datasets_cfg.cc_sbu_align.vis_processor.train
    vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)
    
    stop_words_ids = [[835], [2277, 29937]]
    stop_words_ids = [torch.tensor(ids).to(device='cuda:{}'.format(args.gpu_id)) for ids in stop_words_ids]
    stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
    
    chat = Chat(model, vis_processor, device='cuda:{}'.format(args.gpu_id), stopping_criteria=stopping_criteria)
    
    return chat, CONV_VISION, vis_processor

# Function to process image and prompt
def process_image_and_prompt(chat, CONV_VISION, vis_processor, image_path, prompt):
    img = Image.open(image_path).convert('RGB')
    chat_state = CONV_VISION.copy()
    img_list = []
    llm_message = chat.upload_img(img, chat_state, img_list)
    chat.encode_img(img_list)
    chat.ask(prompt, chat_state)
    response = chat.answer(conv=chat_state,
                           img_list=img_list,
                           num_beams=1,
                           temperature=1.0,
                           max_new_tokens=300,
                           max_length=2000)[0]
    return response

In [None]:
# Initialize the model (provide your config path and GPU ID here)
cfg_path = "/ibm/gpfs/home/lchu0039/eval_configs/minigpt4_eval.yaml"
gpu_id = 0  # change if necessary
chat, CONV_VISION, vis_processor = initialize_model(cfg_path, gpu_id)

# Process an image and prompt (provide your image path and prompt here)
gen_image_path = "./gen_img/0.png"
prompt = "Describe this image in detail."
response = process_image_and_prompt(chat, CONV_VISION, vis_processor, image_path, prompt)
response

### Set up LLaMA 3 with vision capabilities utilising SIGLIP

In [None]:
%%capture
!{sys.executable} -m pip install --upgrade torch transformers pillow

In [None]:
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig

bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_skip_modules=["mm_projector", "vision_model"],
)

model_path = "./llama-3-vision-alpha-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    quantization_config=bnb_cfg,
)

tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    use_fast=True,
)


In [None]:
# Few shot prompting
phrase_inst = """
### Instructions ###
Your task is to provide the phrases from the given sentence related to the given topic and say nothing else.

Output only the phrases where each phrase is separated with slashes, as shown in the format below.

### Format ###
phrase 1 / phrase 2 / phrase 3 ...

### Sentence ###
{sentence}

### Topic ###
{topic}
"""

def generate_captions(prompt, topic): 
  phrase_col = []
  for i in range(10):
    gen_image_path = "./gen_img/" + str(i) + ".png"
    image = Image.open(gen_image_path)

    desc_output = tokenizer.decode(model.answer_question(image, prompt, tokenizer), skip_special_tokens=True)
    # print(str(i) + ": " + desc_output)
    
    mod_inst = phrase_inst.format(sentence=desc_output, topic=topic)
    
    messages = [{'role': 'user','content': mod_inst}]
    result = gpt_35_api(messages)
    phrase_col.append((i, result))
  return phrase_col

def show_phrases(arr):
  for i in range(len(arr)):
    print("Image " + str(i) + ": ")
    print(output[i][1])

In [None]:
prompt = "Describe the image"
topic = 'gender'
output = generate_captions(prompt, topic)
show_phrases(output)

In [None]:
topic = 'gender bias'
output = generate_captions(prompt, topic)
show_phrases(output)

In [None]:
topic = 'food preference'
output = generate_captions(prompt, topic)
show_phrases(output)

In [None]:
topic = 'food preference bias'
output = generate_captions(prompt, topic)
show_phrases(output)