# AI test task

Installing dependencies

In [1]:
!pip install transformers einops
!pip install -q gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.2/57.2 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.4/320.4 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [11]:
import requests
from io import BytesIO
from PIL import Image
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM

class ModelLoader:
    def __init__(self):
        model_name = "vikhyatk/moondream2"
        revision = "2024-08-26"
        #model_id = "vikhyatk/moondream2"
        #revision = "2024-05-08"
        self._model = AutoModelForCausalLM.from_pretrained(
            model_name, trust_remote_code=True, revision=revision
        ).to("cuda")

        self._tokenizer = AutoTokenizer.from_pretrained(
            model_name, revision=revision
        )
        self._model.eval()

    def get_image_by_url(self, url):
        response = requests.get(url)
        return Image.open(BytesIO(response.content))

    def image_ask(self, url, question):
        enc_image = self._model.encode_image(self.get_image_by_url(url))
        return self._model.answer_question(
            enc_image, question,
            self._tokenizer
        )


model_loader = ModelLoader()

Interface implementation

In [13]:
import gradio as gr


TITLE = """
    <div style="text-align: center; max-width: 650px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
        "
        >
        <h1 style="font-weight: 900; margin-bottom: 7px;">
            CLIP Interrogator
        </h1>
        </div>
    </div>
"""

IMAGES = [
    "https://raw.githubusercontent.com/andreyklumchyk/pyt-snd/main/img/cat.jpg",
    "https://raw.githubusercontent.com/andreyklumchyk/pyt-snd/main/img/dog.jpg",
    "https://raw.githubusercontent.com/andreyklumchyk/pyt-snd/main/img/turtle.jpg",
    "https://raw.githubusercontent.com/andreyklumchyk/pyt-snd/main/img/human.jpg",
    "https://raw.githubusercontent.com/andreyklumchyk/pyt-snd/main/img/woman.jpg"
]


CURRENT_IMAGE_URL = None


def select_image(selection: gr.SelectData):
    global CURRENT_IMAGE_URL
    CURRENT_IMAGE_URL = selection.value['image']['path']
    gr.Info("Image selected")


def selected_image_describe():
    if not CURRENT_IMAGE_URL:
        gr.Warning("No image selected!")
        return None

    return model_loader.image_ask(CURRENT_IMAGE_URL, "Describe this image.")


def selected_image_test():
    if not CURRENT_IMAGE_URL:
        gr.Warning("No image selected!")
        return None

    questions = [
        "Is it a photo? Answer only \"Yes\" or \"No\".",
        "Is it a human? Answer only \"Yes\" or \"No\".",
        "Is it alone? Answer only \"Yes\" or \"No\".",
        "Is it a woman? Answer only \"Yes\" or \"No\".",
        "Is it happy? Answer only \"Yes\" or \"No\"."
    ]
    result = []
    for question in questions:
        out = model_loader.image_ask(CURRENT_IMAGE_URL, question)
        result.append(out == 'Yes')

    return result


def analyze_tab():
    with gr.Row():
        with gr.Column():
            gallery = gr.Gallery(
                label="Pickup any image",
                preview=True,
                value=IMAGES,
                show_label=False,
                elem_id="gallery",
                columns=[2], rows=[2],
                object_fit="contain",
                height=800,
                allow_preview=False,
                selected_index=None,
            )

            gallery.select(select_image, inputs=None, outputs=None)

        with gr.Column():
            output_text = gr.Textbox(label="Description", elem_id="output-txt")
            describe_button = gr.Button("Describe")

            is_photo = gr.Checkbox(label="Is a photo?")
            is_human = gr.Checkbox(label="Is a human?")
            is_alone = gr.Checkbox(label="Alone?")
            is_woman = gr.Checkbox(label="Is it a woman?")
            is_happy = gr.Checkbox(label="Is it happy?")
            test_button = gr.Button("Test")

    describe_button.click(selected_image_describe, inputs=[], outputs=[output_text])
    test_button.click(selected_image_test, inputs=[], outputs=[is_photo, is_human, is_alone, is_woman, is_happy])

with gr.Blocks() as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(TITLE)

        with gr.Tab("Analyze"):
            analyze_tab()


demo.launch(show_error=True, share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://dc46273161dcd629a2.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


