# Gradio Interface:


In [5]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
#!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

!pip install -q bitsandbytes
!pip install -q evaluate jsonlines rouge_score bert-score
!pip install transformers peft accelerate bitsandbytes jsonlines
!pip install evaluate gradio
import evaluate
2

## 1. Choosing the model:

In [6]:
from transformers import AutoModel, AutoTokenizer

max_seq_length = 2048  # Choose any! We auto support ROPE scaling internally!
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, bFloat16 for Ampere+

Developed_model = [
    "davnas/Italian_Cousine",                  # unsloth/Llama-3.2-1B-Instruct with 3 epochs
    "davnas/Italian_Cousine_1.2",              # unsloth/Llama-3.2-1B-Instruct with 5 epochs
    "davnas/Italian_Cousine_1.3",              # unsloth/Llama-3.2-1B-Instruct with 7 epochs
    "davnas/Italian_Cousine_2",                # Llama-3.2-1B-Instruct-bnb-4bit with 3 epochs
    "davnas/Italian_Cousine_2.0",              # Llama-3.2-1B-Instruct-bnb-4bit with 5 epochs
    "davnas/Italian_Cousine_2.1",              # Llama-3.2-1B-Instruct-bnb-4bit with 7 epochs
]

model_name_or_path = "davnas/Italian_Cousine"

from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name_or_path,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
    # token = "hf_...", #se il nostro modello non è public
    # Use one if using gated models like meta-llama/Llama-2-7b-hf
)


==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


## 2. Interface:

In [7]:
from unsloth import FastLanguageModel
import gradio as gr
import re

# Enable faster inference
FastLanguageModel.for_inference(model)

def chatbot(user_input):
    # Add a system role for better context
    messages = [
        {"role": "system", "content": "You are a professional chef assistant who provides accurate and detailed recipes."},
        {"role": "user", "content": user_input}
    ]
    # Tokenize inputs
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Generate output with an increased token limit
    outputs = model.generate(inputs, max_new_tokens=256, temperature=0.7)  # Increased max_new_tokens
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the recipe content (skip system and user roles)
    recipe_start = response.lower().find("preparation time")  # Look for where the recipe content begins
    if recipe_start != -1:
        response = response[recipe_start:]  # Trim everything before the recipe content

    # Bold specific labels like preparation time, cooking time, portions, etc.
    response = re.sub(
        r"(preparation time:|cooking time:|portions:|ingredients:|procedure:|nutrients:)",
        r'<span style="font-size:1.2em; font-weight:bold;">\1</span>',
        response,
        flags=re.IGNORECASE
    )

    # Replace **text** with bold HTML tags and make the font size larger
    formatted_response = re.sub(
        r"\*\*(.*?)\*\*",
        r'<span style="font-size:1.2em; font-weight:bold;">\1</span>',
        response
    )

    # Indent lines starting with "-" by wrapping them in a div with padding
    formatted_response = re.sub(
        r"^- (.*)",
        r'<div style="margin-left: 20px;">- \1</div>',
        formatted_response,
        flags=re.MULTILINE
    )

    # Indent lines starting with numbers (e.g., 1., 2., etc.)
    formatted_response = re.sub(
        r"^\d+\.\s(.*)",
        r'<div style="margin-left: 20px;">\g<0></div>',
        formatted_response,
        flags=re.MULTILINE
    )

    formatted_response = formatted_response.replace("\n", "<br>")  # Replace newlines with <br> for HTML

    # Ensure no trailing ** or incomplete text
    if "**" in formatted_response:
        formatted_response = formatted_response.replace("**", "")  # Remove unclosed asterisks

    return formatted_response

# Set up the Gradio interface with a user-friendly input label and placeholder
interface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(
        label="Ask a question about Italian cuisine or share an ingredient!",  # User-friendly label
        placeholder="e.g., How do I make pizza milkshake?",  # Example input for clarity
    ),
    outputs="html",  # Use "html" output for better formatting
    title="Italian Cuisine Chatbot",
    description="Ask me anything about Italian cuisine or cooking!",
    allow_flagging="never"  # Disable the flag button
)

# Launch the Gradio interface
interface.launch(share=True)




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://13f2d29407afd5b4c6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


