In [1]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
#!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

!pip install -q bitsandbytes
!pip install -q evaluate jsonlines rouge_score bert-score
!pip install transformers peft accelerate bitsandbytes jsonlines
!pip install evaluate gradio
import evaluate


In [2]:
from transformers import AutoModel, AutoTokenizer

max_seq_length = 2048  # Choose any! We auto support ROPE scaling internally!
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, bFloat16 for Ampere+

model_name_or_path = "jacopoda/lora_model"

from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name_or_path,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
    # token = "hf_...", #se il nostro modello non è public
    # Use one if using gated models like meta-llama/Llama-2-7b-hf
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

In [None]:
from unsloth import FastLanguageModel
import gradio as gr

# Enable faster inference
FastLanguageModel.for_inference(model)

# Global variable to store the correct answer
current_question = {"question": "", "answer": ""}

def generate_question(argument):
    """Generates a question based on the provided argument."""
    messages = [
        {"role": "system", "content": "You are a knowledgeable assistant creating a quiz. Generate a question based on the given argument and provide the correct answer."},
        {"role": "user", "content": f"Generate a question about: {argument}"}
    ]

    # Tokenize inputs
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Generate question and answer
    outputs = model.generate(inputs, max_new_tokens=256, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Split into question and answer (assuming the model outputs them clearly)
    if "Answer:" in response:
        question, answer = response.split("Answer:", 1)
    else:
        # Fallback if model didn't clearly separate
        question = response.strip()
        answer = "Not provided"

    current_question["question"] = question.strip()
    current_question["answer"] = answer.strip()

    return question.strip()

def evaluate_answer(user_answer):
    """Evaluates the user's answer and provides a score."""
    correct_answer = current_question["answer"]
    user_answer = user_answer.strip().lower()
    correct_answer = correct_answer.lower()

    # Simple scoring based on word overlap similarity
    correct_words = set(correct_answer.split())
    user_words = set(user_answer.split())
    if len(correct_words) > 0:
        score = (len(user_words & correct_words) / len(correct_words)) * 100
    else:
        score = 0.0

    feedback = (
        f"<b>Correct Answer:</b> {current_question['answer']}<br>"
        f"<b>Your Score:</b> {round(score, 2)}%"
    )
    return feedback

def reset_fields():
    """Resets the question, answer, and feedback fields for a new topic."""
    current_question["question"] = ""
    current_question["answer"] = ""
    return "", "", ""

# Set up the Gradio interface
with gr.Blocks() as interactive_quiz:
    gr.Markdown("## 🧠 Interactive Quiz with FineTome100k Dataset")
    gr.Markdown("Provide a topic, answer a generated question, and see how well you did!")

    with gr.Row():
        argument_input = gr.Textbox(label="Topic or Argument", placeholder="E.g., Machine Learning Trends")
        generate_btn = gr.Button("Generate Question")
        change_topic_btn = gr.Button("Change Topic")

    question_display = gr.Textbox(label="Generated Question", interactive=False)
    user_answer_input = gr.Textbox(label="Your Answer", placeholder="Type your answer here...")
    evaluate_btn = gr.Button("Submit Answer")
    feedback_output = gr.HTML(label="Feedback")

    # Define interactions
    generate_btn.click(
        generate_question,
        inputs=argument_input,
        outputs=question_display
    )

    evaluate_btn.click(
        evaluate_answer,
        inputs=user_answer_input,
        outputs=feedback_output
    )

    # Reset fields when changing the topic
    change_topic_btn.click(
        reset_fields,
        inputs=[],
        outputs=[question_display, user_answer_input, feedback_output]
    )

# Launch the Gradio interface
interactive_quiz.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://b03b6334019d77a9eb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [4]:
from unsloth import FastLanguageModel
import gradio as gr
import re

# Enable faster inference
FastLanguageModel.for_inference(model)

def dataset_assistant(user_input):
    # Add a system role for better context
    messages = [
        {"role": "system", "content": "You are a knowledgeable assistant trained on the FineTome100k dataset, ready to answer detailed questions."},
        {"role": "user", "content": user_input}
    ]
    # Tokenize inputs
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Generate output with an increased token limit
    outputs = model.generate(inputs, max_new_tokens=256, temperature=0.7)  # Increased max_new_tokens
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Apply formatting for better readability
    formatted_response = re.sub(
        r"(key insights:|statistics:|example data:|important factors:)",
        r'<span style="font-size:1.2em; font-weight:bold; color:#2C3E50;">\1</span>',
        response,
        flags=re.IGNORECASE
    )

    # Replace **text** with bold HTML tags and make the font size larger
    formatted_response = re.sub(
        r"\*\*(.*?)\*\*",
        r'<span style="font-size:1.2em; font-weight:bold; color:#D35400;">\1</span>',
        formatted_response
    )

    # Indent lines starting with "-" or numbers for a structured look
    formatted_response = re.sub(
        r"^- (.*)",
        r'<div style="margin-left: 20px; font-family: Arial, sans-serif; color:#34495E;">- \1</div>',
        formatted_response,
        flags=re.MULTILINE
    )
    formatted_response = re.sub(
        r"^\d+\.\s(.*)",
        r'<div style="margin-left: 20px; font-family: Arial, sans-serif; color:#34495E;">\g<0></div>',
        formatted_response,
        flags=re.MULTILINE
    )

    formatted_response = formatted_response.replace("\n", "<br>")  # Replace newlines with <br> for HTML

    return formatted_response

# Set up the Gradio interface with tailored descriptions for FineTome100k
interface = gr.Interface(
    fn=dataset_assistant,
    inputs=gr.Textbox(
        label="🧠 FineTome100k Dataset Assistant 📊",
        placeholder="E.g., What are the key insights on recent trends?",
        lines=2,  # Adjust for better user input
    ),
    outputs=gr.HTML(),  # Use rich HTML output
    title="📖 FineTome100k Knowledge Assistant 🌟",
    description=(
        "Your gateway to detailed insights and data-driven answers from the FineTome100k dataset. "
        "Ask anything about patterns, trends, or specific details!"
    ),
    theme="compact",  # Use a compact theme for a clean interface
    live=True,  # Enable dynamic updates
    examples=[
        ["Summarize the key insights from the dataset."],
        ["What trends are visible in the data?"],
        ["Can you provide examples related to category X?"]
    ],  # Provide example queries
    css=(
        "body { font-family: 'Roboto', sans-serif; background-color: #F8F9FA; } "
        ".gradio-title { color: #2C3E50; font-size: 2em; font-weight: bold; } "
        ".gradio-description { font-size: 1.2em; color: #7F8C8D; } "
    )
)

# Launch the Gradio interface
interface.launch(share=True)



Sorry, we can't find the page you are looking for.


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c97d753ce1fe873e42.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [5]:
from unsloth import FastLanguageModel
import gradio as gr
import re

# Enable faster inference
FastLanguageModel.for_inference(model)

def chatbot(user_input):
    # Add a system role for better context
    messages = [
        {"role": "system", "content": "You are a professional assistant who provides accurate and detailed."},
        {"role": "user", "content": user_input}
    ]
    # Tokenize inputs
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Generate output with an increased token limit
    outputs = model.generate(inputs, max_new_tokens=256, temperature=0.7)  # Increased max_new_tokens
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the recipe content (skip system and user roles)
    recipe_start = response.lower().find("preparation time")  # Look for where the recipe content begins
    if recipe_start != -1:
        response = response[recipe_start:]  # Trim everything before the recipe content

    # Bold specific labels like preparation time, cooking time, portions, etc.
    response = re.sub(
        r"(preparation time:|cooking time:|portions:|ingredients:|procedure:|nutrients:)",
        r'<span style="font-size:1.2em; font-weight:bold;">\1</span>',
        response,
        flags=re.IGNORECASE
    )

    # Replace **text** with bold HTML tags and make the font size larger
    formatted_response = re.sub(
        r"\*\*(.*?)\*\*",
        r'<span style="font-size:1.2em; font-weight:bold;">\1</span>',
        response
    )

    # Indent lines starting with "-" by wrapping them in a div with padding
    formatted_response = re.sub(
        r"^- (.*)",
        r'<div style="margin-left: 20px;">- \1</div>',
        formatted_response,
        flags=re.MULTILINE
    )

    # Indent lines starting with numbers (e.g., 1., 2., etc.)
    formatted_response = re.sub(
        r"^\d+\.\s(.*)",
        r'<div style="margin-left: 20px;">\g<0></div>',
        formatted_response,
        flags=re.MULTILINE
    )

    formatted_response = formatted_response.replace("\n", "<br>")  # Replace newlines with <br> for HTML

    # Ensure no trailing ** or incomplete text
    if "**" in formatted_response:
        formatted_response = formatted_response.replace("**", "")  # Remove unclosed asterisks

    return formatted_response

# Set up the Gradio interface with a user-friendly input label and placeholder
interface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(
        label="Ask a question about Italian cuisine or share an ingredient!",  # User-friendly label
        placeholder="e.g., How do I make pizza milkshake?",  # Example input for clarity
    ),
    outputs="html",  # Use "html" output for better formatting
    title="Italian Cuisine Chatbot",
    description="Ask me anything about Italian cuisine or cooking!",
    allow_flagging="never"  # Disable the flag button
)

# Launch the Gradio interface
interface.launch(share=True)




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a08a1cb29f48fde9c7.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


