### Installing lib

In [1]:
!pip install gradio
!pip install git+https://github.com/huggingface/transformers
!pip install torch

Collecting gradio
  Downloading gradio-4.43.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<0.113.0 (from gradio)
  Downloading fastapi-0.112.4-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m303.2 kB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (

### Getting model

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import gradio as gr
import time

In [8]:
torch.set_default_device('cuda')

In [9]:
model = AutoModelForCausalLM.from_pretrained(
    "MILVLG/imp-v1-3b",
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    "MILVLG/imp-v1-3b",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

KeyboardInterrupt: 

### Testing model inference

In [14]:
USER_PROMPT = ""
SYSTEM_PROMPT = f"""
    A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed testcase related to image uploaded. You are tasked with generating detailed, step-by-step test cases for software functionality based on uploaded images. The user will provide one or more images of a software or website interface. For each image, generate a separate set of test cases following the format below:

    '''Description: Provide a brief explanation of the functionality being tested, as inferred from the image.
    Pre-conditions: Identify any setup requirements, dependencies, or conditions that must be met before testing can begin (e.g., user logged in, specific data pre-populated, etc.).
    Testing Steps: Outline a clear, numbered sequence of actions that a user would take to test the functionality in the image.
    Expected Result: Specify the expected outcome if the functionality is working as intended.'''

    Ensure that:
    Testcases should be related to validation of data, component interactions, navigation, etc.
    Each testcase should have it's own Description, Pre-conidtions, Testing Steps, Expected Result.

    USER: <image>\n{USER_PROMPT}
    ASSISTANT:
    """
image = Image.open('EXAMPLE.png')
input_ids = tokenizer(SYSTEM_PROMPT, return_tensors="pt").input_ids
image_tensor = model.image_preprocess(image)

print(f"input : {input_ids.shape}")
print(f"image : {image_tensor.shape}")

output_ids = model.generate(
    input_ids,
    max_new_tokens=400,
    images=image_tensor,
    use_cache=False,
    )[0]

RES  = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()

print(f"LLM OUTPUT : {RES}")

input : torch.Size([1, 256])
image : torch.Size([1, 3, 384, 384])
LLM OUTPUT : Description: The image shows a computer screen with a chatbot interface displayed. The chatbot has a button labeled "Undo" on the left side, and a button labeled "Clear" on the right side.
    Pre-conditions: The user is logged in to the chatbot system and has already interacted with the chatbot.
    Testing Steps:
    1. Click on the "Undo" button to revert any previous actions performed by the user.
    2. Click on the "Clear" button to clear the chatbot's history and reset the conversation.
    Expected Result: After clicking the "Undo" button, the chatbot should display the previous interaction or message that was undone. After clicking the "Clear" button, the chatbot's history should be cleared, and the conversation should start from the beginning.


### Using gradio

In [None]:
def generate_answer(IMAGES: list, SYSTEM_PROMPT) -> str:
    print(len(IMAGES))

    INPUT_IDS = tokenizer(SYSTEM_PROMPT, return_tensors="pt").input_ids

    RESULT = ""
    for EACH_IMG in IMAGES:
      image_path = EACH_IMG['path']
      image = Image.open(image_path)
      image_tensor = model.image_preprocess(image)

      output_ids = model.generate(
          inputs=INPUT_IDS,
          max_new_tokens=500,
          images=image_tensor,
          use_cache=False,
      )[0]
      CUR_RESULT = tokenizer.decode(
          output_ids[INPUT_IDS.shape[1] :], skip_special_tokens=True
      ).strip()

      RESULT = f"{RESULT} /n/n {CUR_RESULT}"

    return RESULT

In [None]:
def response(USER_DATA, TOKEN) -> str:
    print(USER_DATA)
    MESSAGE = USER_DATA["text"]
    NUM_FILES = len(USER_DATA["files"])
    FILES = USER_DATA["files"]

    SYSTEM_PROMPT = f"""
    A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed testcase related to image uploaded. You are tasked with generating detailed, step-by-step test cases for software functionality based on uploaded images. The user will provide one or more images of a software or website interface. For each image, generate a separate set of test cases following the format below:

    '''Description: Provide a brief explanation of the functionality being tested, as inferred from the image.
    Pre-conditions: Identify any setup requirements, dependencies, or conditions that must be met before testing can begin (e.g., user logged in, specific data pre-populated, etc.).
    Testing Steps: Outline a clear, numbered sequence of actions that a user would take to test the functionality in the image.
    Expected Result: Specify the expected outcome if the functionality is working as intended.'''

    Ensure that:
    Testcases should be related to validation of data, component interactions, navigation, etc.
    Each testcase should have it's own Description, Pre-conidtions, Testing Steps, Expected Result.

    USER: <image>\n{MESSAGE}
    ASSISTANT:
    """

    RES = generate_answer(FILES, SYSTEM_PROMPT)

    response = f"{RES}."
    return response
    # for i in range(len(response)):
    #     time.sleep(0.025)
    #     yield response[: i + 1]

In [None]:
with gr.Blocks() as llm:
    slider = gr.Slider(10, 100, render=False)
    chatbot = gr.ChatInterface(
        fn=response,
        multimodal=True,
        title="MultiModel LLM for Testcase generation",
        cache_examples=True,
    )


llm.launch(debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://5d8b86553975ef71c9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


{'text': '', 'files': [{'path': '/tmp/gradio/6d59aea67fdb80547fe932ab7b7c1abc10ff9066dbab894061d271467c2c6fc3/Screenshot 2024-09-07 230747.png', 'url': 'https://5d8b86553975ef71c9.gradio.live/file=/tmp/gradio/6d59aea67fdb80547fe932ab7b7c1abc10ff9066dbab894061d271467c2c6fc3/Screenshot 2024-09-07 230747.png', 'size': 49675, 'orig_name': 'Screenshot 2024-09-07 230747.png', 'mime_type': 'image/png', 'is_stream': False, 'meta': {'_type': 'gradio.FileData'}}]}
1
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://5d8b86553975ef71c9.gradio.live


