# Import Libraries

In [None]:
# !pip install torch transformers gradio re
# !git clone https://github.com/deepseek-ai/DeepSeek-VL
# %cd DeepSeek-VL
# !pip install -e .

In [1]:
import re
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
from deepseek_vl.utils.io import load_pil_images



# Load Models & Tokenizers

In [2]:
# ---- Code Generation Model ----
code_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
code_model = AutoModelForCausalLM.from_pretrained(code_model_id)
code_tokenizer = AutoTokenizer.from_pretrained(code_model_id)
code_model.eval()
if torch.cuda.is_available():
    code_model.to("cuda")

In [3]:
# ---- Image Description Model ----
vl_model_path = "deepseek-ai/deepseek-vl-1.3b-chat"
vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(vl_model_path)
vl_tokenizer = vl_chat_processor.tokenizer
vl_model: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
    vl_model_path,
    trust_remote_code=True
)
vl_model.eval()

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.


MultiModalityCausalLM(
  (vision_model): CLIPVisionTower(
    (vision_tower): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Identity()
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in

In [4]:
# Move to bfloat16 and GPU if available
vl_model = vl_model.to(torch.bfloat16)
if torch.cuda.is_available():
    vl_model = vl_model.to("cuda")

# Functions
## (Code Generation & Image Description)

In [12]:
def generate_code_snippet(
    user_prompt: str,
    max_new_tokens: int = 1500,
    temperature: float = 0.7,
    top_p: float = 0.9,
    top_k: int = 50
) -> str:
    """Enhanced code generation with adjustable parameters"""
    # 1. System message
    system_message = {
        "role": "system",
        "content": (
            "You are a code-generation AI. When given a prompt, "
            "you must return only the Python code snippet that fulfills the request. "
            "Do NOT include any explanations, JSON schemas, commentary, or special tokens."
        )
    }

    # 2. Examples user-assistant interaction (to prime the model)
    user_assistant_message = {
        "role": "user",
        "content": (
            "Generate a Python code snippet that fulfills this request:\n\n"
            "\"\"\"\n"
            "Create a Python function to calculate Fibonacci sequence with O(n) time complexity\n"
            "\"\"\"\n"
            "Include any necessary imports and ensure the snippet is functional."
        )
    }

    assistant_message = {
        "role": "assistant",
        "content": (
            """
            def fibonacci(n):
                if n == 0:
                    return 0
                elif n == 1:
                    return 1
                elif n == 2:
                    return 1
                else:
                    a, b = 0, 1
                    for i in range(2, n):
                        a, b = b, a + b
                    return b
            
            n = int(input("Enter the number of the Fibonacci sequence: "))
            print(fibonacci(n))
            """
        )
    }

    # user_assistant_message2 = {
    #     "role": "user",
    #     "content": (
    #         "Generate a Python code snippet that fulfills this request:\n\n"
    #         "\"\"\"\n"
    #         "Create a Python function to calculate bubble sort\n"
    #         "\"\"\"\n"
    #         "Include any necessary imports and ensure the snippet is functional."
    #     )
    # }

    # assistant_message2 = {
    #     "role": "assistant",
    #     "content": (
    #         """
    #         def bubble_sort(arr):
    #             n = len(arr)
    #             for i in range(n):
    #                 for j in range(0, n - i - 1):
    #                     if arr[i] > arr[j + 1]:
    #                         arr[i], arr[j + 1] = arr[j + 1], arr[i]
    #             return arr
            
    #         n = int(input("Enter the number of elements: "))
    #         arr = list(map(int, input("Enter the array elements: ").split()))
    #         print("Sorted array:", bubble_sort(arr))
    #         """
    #     )
    # }

    # 3. Actual user prompt
    user_message = {
        "role": "user",
        "content": (
            f"Generate a Python code snippet that fulfills this request:\n\n"
            f"\"\"\"\n{user_prompt}\n\"\"\"\n"
        )
    }

    # 4. Aggregate messages
    messages = [system_message, user_assistant_message, assistant_message, user_message]
    text_input = code_tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        continue_final_message=True
    )

    # Tokenize and generate
    model_inputs = code_tokenizer([text_input], return_tensors="pt")
    if torch.cuda.is_available():
        model_inputs = {k: v.to("cuda") for k, v in model_inputs.items()}

    generated_output = code_model.generate(
        **model_inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        do_sample=True,
        # pad_token_id=code_tokenizer.eos_token_id
    )

    batch_input_ids = model_inputs["input_ids"]
    generated_ids = [
        output_ids[len(input_ids):]
        for input_ids, output_ids in zip(batch_input_ids, generated_output)
    ]
    

    # # Decode and clean
    # generated_snippet = code_tokenizer.decode(
    #     generated_output[0], 
    #     skip_special_tokens=True
    # )

    generated_snippet = code_tokenizer.batch_decode(
        generated_ids, 
        skip_special_tokens=True)[0]

    return clean_generated_output(generated_snippet)

In [8]:
def generate_image_description(
    image,
    max_new_tokens: int = 512,
    temperature: float = 0.5,
    top_p: float = 0.9,
    top_k: int = 50,
    detail_level: str = "Detailed"
) -> str:
    """Enhanced image description with adjustable parameters"""
    # Configure detail level
    detail_prompt = {
        "Concise": "Describe an image concisely.",
        "Detailed": "Describe an image in detail.",
        "Extreme": "Describe every visible detail exhaustively of this image."
    }.get(detail_level, "Describe in detail.")
    
    # Build conversation
    conversation = [
        {"role": "User", "content": f"<image_placeholder> {detail_prompt}"},
        {"role": "Assistant", "content": ""}
    ]
    
    # Prepare inputs
    pil_images = [image]
    prepared = vl_chat_processor(
        conversations=conversation,
        images=pil_images,
        force_batchify=True
    ).to(vl_model.device)
    
    # Generate description
    inputs_embeds = vl_model.prepare_inputs_embeds(**prepared)
    output_ids = vl_model.language_model.generate(
        inputs_embeds=inputs_embeds,
        attention_mask=prepared.attention_mask,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        do_sample=True,
        pad_token_id=vl_tokenizer.eos_token_id,
        eos_token_id=vl_tokenizer.eos_token_id
    )
    
    # Decode and clean
    description = vl_tokenizer.decode(
        output_ids[0].cpu().tolist(), 
        skip_special_tokens=True
    )
    return description.strip()

In [5]:
def clean_generated_output(code: str) -> str:
    """Comprehensive cleaning of generated code output"""
    # Remove the specific unwanted instruction text
    code = re.sub(
        r'Include any necessary imports and ensure the snippet is functional\.?\s*',
        '', 
        code,
        flags=re.IGNORECASE
    )
    
    # Remove special tokens and thinking blocks
    code = re.sub(r'<\|.*?\|>', '', code, flags=re.DOTALL)
    code = re.sub(r'</?think>', '', code, flags=re.IGNORECASE)
    
    # Remove code fences and the word "python" in any context
    code = re.sub(r'```\s*(python)?\s*|```', '', code, flags=re.IGNORECASE)
    code = re.sub(r'\bpython\b', '', code, flags=re.IGNORECASE)
    
    # Remove empty lines at start and end
    return re.sub(r'^\s*\n+|\n+\s*$', '', code).strip()

# Gradio Interface

In [9]:
theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="gray",
    font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
)

In [10]:
with gr.Blocks(title="DeepSeek Multi-Task App", theme=theme) as app:
    gr.Markdown(
        """
        # 🚀 DeepSeek Multi-Model Application
        *Code Generation with DeepSeek-R1-Distill-Qwen-1.5B & Image Description with deepseek-vl-1.3b-chat*
        """
    )
    

    with gr.Tabs():
        # === Tab 1: Code Generation ===
        with gr.TabItem("🧑‍💻 Code Generation (DeepSeek-R1-Distill-Qwen-1.5B)"):
            gr.Markdown("### Python Code Generation using DeepSeek-R1-Distill-Qwen-1.5B\n"
                "Transform natural language requests into functional Python code snippet.")
            
            with gr.Row():
                with gr.Column(scale=3):
                    prompt_input = gr.Textbox(
                        label="Code Request",
                        placeholder="e.g., Create a Python function to calculate factorial of a number",
                        value="Create a Python function to calculate Fibonacci sequence with O(n) time complexity",  
                        lines=4
                    )
                    with gr.Accordion("⚙️ Generation Parameters", open=False):
                        with gr.Row():
                            max_tokens = gr.Slider(100, 2048, value=512, label="Max Tokens")
                            temperature = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Temperature")
                        with gr.Row():
                            top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top-p")
                            top_k = gr.Slider(1, 100, value=50, step=1, label="Top-k")
                    
                    generate_btn = gr.Button("Generate Code", variant="primary")
                
                with gr.Column(scale=2):
                    code_output = gr.Textbox(
                        label="Generated Python Code",
                        lines=20,
                        # interactive=True
                    )
            
            generate_btn.click(
                fn=generate_code_snippet,
                inputs=[prompt_input, max_tokens, temperature, top_p, top_k],
                outputs=code_output
            )

        # === Tab 2: Image Description ===
        with gr.TabItem("🖼️ Image Description (deepseek-vl-1.3b-chat)"):
            gr.Markdown("### Image Description using deepseek-vl-1.3b-chat\n"
                "Generate detailed descriptions from uploaded images.")
            
            with gr.Row():
                with gr.Column(scale=1):
                    image_input = gr.Image(
                        type="pil",
                        label="Upload Image",
                        sources=["upload", "clipboard"]
                    )
                    with gr.Accordion("⚙️ Description Parameters", open=False):
                        detail_level = gr.Radio(
                            ["Concise", "Detailed", "Extreme"],
                            value="Detailed",
                            label="Detail Level"
                        )
                        with gr.Row():
                            img_max_tokens = gr.Slider(100, 2048, value=512, label="Max Tokens")
                            img_temp = gr.Slider(0.1, 1.0, value=0.6, step=0.1, label="Temperature")
                        with gr.Row():
                            img_top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top-p")
                            img_top_k = gr.Slider(1, 100, value=50, step=1, label="Top-k")
                    
                    describe_btn = gr.Button("Describe Image", variant="primary")
                
                with gr.Column(scale=2):
                    description_output = gr.Textbox(
                        label="Image Description",
                        lines=20,
                        # interactive=True
                    )
            
            describe_btn.click(
                fn=generate_image_description,
                inputs=[image_input, img_max_tokens, img_temp, img_top_p, img_top_k, detail_level],
                outputs=description_output
            )

In [13]:
# Launch the app
if __name__ == "__main__":
    app.launch(share=True)

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----
Running on public URL: https://f488dcb67641e253b4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
