In [1]:
!pip install transformers torch
!pip install unsloth
!pip install bitsandbytes
!pip install huggingface_hub==0.25.2



In [2]:
from transformers import AutoModel, AutoTokenizer

max_seq_length = 2048  # Choose any! We auto support ROPE scaling internally!
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, bFloat16 for Ampere+

model_name_or_path = "jacopoda/lora_model"

from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name_or_path,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
    # token = "hf_...", #se il nostro modello non è public
    # Use one if using gated models like meta-llama/Llama-2-7b-hf
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.12.4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [3]:
!pip install gradio



In [None]:
from unsloth import FastLanguageModel
import gradio as gr

# Enable faster inference
FastLanguageModel.for_inference(model)

# Global variable to store the correct answer
current_question = {"question": "", "answer": ""}

def generate_question(argument):
    """Generates a question based on the provided argument."""
    messages = [
        {"role": "system", "content": "You are a knowledgeable assistant creating a quiz. Generate a question based on the given argument and provide the correct answer."},
        {"role": "user", "content": f"Generate a question about: {argument}"},
        {"role": "user", "content": f"Generate a answer about: {argument}"}
    ]

    # Tokenize inputs
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    # Generate question and answer
    outputs = model.generate(inputs, max_new_tokens=256, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Split into question and answer (assuming the model outputs them clearly)
    if "Answer:" in response:
        question, answer = response.split("Answer:", 1)
    #elif "Question:" in response:  # Handle alternative formats
     #   parts = response.split("Question:", 1)
      #  question = parts[1].strip() if len(parts) > 1 else "Not provided"
       # answer = "Not provided"
    else:
        question = response.strip()
        answer = "Not provided"

    current_question["question"] = question.strip()
    current_question["answer"] = answer.strip()

    return question.strip()

def evaluate_answer(user_answer):
    """Evaluates the user's answer and provides a score."""
    correct_answer = current_question["answer"]
    user_answer = user_answer.strip().lower()
    correct_answer = correct_answer.lower()

    # Simple scoring based on word overlap similarity
    correct_words = set(correct_answer.split())
    user_words = set(user_answer.split())

    feedback = (
        f"<b>Correct Answer:</b> {current_question['answer']}<br>"
    )
    return feedback

def reset_fields():
    """Resets the question, answer, and feedback fields for a new topic."""
    current_question["question"] = ""
    current_question["answer"] = ""
    return "", "", ""

# Set up the Gradio interface
with gr.Blocks() as interactive_quiz:
    gr.Markdown("## 🧠 Interactive Quiz with FineTome100k Dataset")
    gr.Markdown("Provide a topic, answer a generated question, and see how well you did!")

    with gr.Row():
        argument_input = gr.Textbox(label="Topic or Argument", placeholder="E.g., Machine Learning Trends")
        generate_btn = gr.Button("Generate Question")
        change_topic_btn = gr.Button("Change Topic")

    question_display = gr.Textbox(label="Generated Question", interactive=False)
    user_answer_input = gr.Textbox(label="Your Answer", placeholder="Type your answer here...")
    evaluate_btn = gr.Button("Submit Answer")
    feedback_output = gr.HTML(label="Feedback")

    # Define interactions
    generate_btn.click(
        generate_question,
        inputs=argument_input,
        outputs=question_display
    )

    evaluate_btn.click(
        evaluate_answer,
        inputs=user_answer_input,
        outputs=feedback_output
    )

    # Reset fields when changing the topic
    change_topic_btn.click(
        reset_fields,
        inputs=[],
        outputs=[question_display, user_answer_input, feedback_output]
    )

# Launch the Gradio interface
interactive_quiz.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://fd728b7c0783bd0739.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
