In [6]:
import gradio as gr
import os
import spaces
import random
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread

# Load model and tokenizer
HF_TOKEN = os.environ.get("HF_TOKEN", None)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3", device_map="auto")

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("")
]

@spaces.GPU(duration=120)
def mistral7b_v3_chatbot(message: str, 
                         history: list, 
                         temperature: float, 
                         max_new_tokens: int
                        ) -> str:
    conversation = []
    for user, assistant in history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
    
    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

    generate_kwargs = dict(
        input_ids= input_ids,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        eos_token_id=terminators,
    )
    
    if temperature == 0:
        generate_kwargs['do_sample'] = False
        
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)

# Function to generate a random reading passage and create fill-in-the-blank questions
def generate_passage_and_questions():
    passages = [
        "Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods with the help of chlorophyll. The process involves the conversion of carbon dioxide and water into glucose and oxygen.",
        "Quantum computing is a type of computation that takes advantage of quantum mechanics. Unlike classical computing, which relies on bits as the smallest unit of information, quantum computing uses qubits, which can represent both 0 and 1 simultaneously.",
        "Climate change refers to long-term changes in temperature, precipitation, and other atmospheric conditions on Earth. It is largely caused by human activities such as burning fossil fuels, deforestation, and industrial processes that release greenhouse gases into the atmosphere."
    ]
    
    passage = random.choice(passages)
    blanks = passage.split()
    blanks[random.randint(0, len(blanks)-1)] = "____"
    question = " ".join(blanks)
    return passage, question

# Function to check user's answer
def check_answer(passage, user_input):
    return user_input in passage

# Placeholder text for the chat interface & Description for the chat interface
PLACEHOLDER = "Chat with 〽️istral-7b-v0.3"
DESCRIPTION = "Welcome to the Mistral chat interface. Answer the fill-in-the-blank questions!"

# Gradio interface
passage, question = generate_passage_and_questions()

def handle_submission(user_input):
    return "Correct!" if check_answer(passage, user_input) else "Incorrect!"

chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')

with gr.Blocks(fill_height=True, css="xiaobaiyuan/theme_brief") as demo:
    gr.Markdown(DESCRIPTION)
    gr.Markdown(f"Passage: {passage}")
    gr.Markdown(f"Question: {question}")
    user_input = gr.Textbox(label="Your Answer")
    submit_button = gr.Button("Submit")
    output = gr.Textbox(label="Result")
    submit_button.click(handle_submission, inputs=user_input, outputs=output)

if __name__ == "__main__":
    demo.launch(share=True)


tokenizer_config.json:   0%|          | 0.00/138k [00:00<?, ?B/s]



Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://4817a127c983004d00.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
