In [1]:
import gradio as gr
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from threading import Thread
import io
import sys
from io import StringIO

# --- Model and Tokenizer Configuration ---
CHECKPOINT_PATH = "Qwen2.5-7B-Instruct-1M"

def load_model_tokenizer():
    """Loads the pre-trained model and tokenizer."""
    print("Loading model and tokenizer...")
    
    tokenizer = AutoTokenizer.from_pretrained(
        CHECKPOINT_PATH,
        resume_download=True,
    )

    device_map = "cuda" if torch.cuda.is_available() else "cpu"
    model = AutoModelForCausalLM.from_pretrained(
        CHECKPOINT_PATH,
        torch_dtype="auto",
        device_map=device_map,
        resume_download=True,
    ).eval()
    
    print("Model and tokenizer loaded successfully.")
    return model, tokenizer

# --- Core Chat Logic ---
def chat_stream(model, tokenizer, query, history):
    """Generates a response from the model in a streaming fashion."""
    messages = []
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        if assistant_msg is not None:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": query})

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    # We need a TextIteratorStreamer for this to work
    from transformers import TextIteratorStreamer
    streamer = TextIteratorStreamer(
        tokenizer=tokenizer, skip_prompt=True, timeout=60.0, skip_special_tokens=True
    )
    
    generation_kwargs = {
        **model_inputs,
        "streamer": streamer,
        "max_new_tokens": 2048,
        "do_sample": True,
        "top_p": 0.95,
        "top_k": 50,
        "temperature": 0.7,
    }
    
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    for new_text in streamer:
        yield new_text

# --- Main Application UI and Logic ---
def build_chatbot_ui(model, tokenizer):
    """Builds the Gradio web interface for the chatbot."""
    
    with gr.Blocks(
        theme=gr.themes.Soft(),
        css=".control-height { height: 500px; overflow: auto; }"
    ) as demo:
        df_state = gr.State(None)
        task_history = gr.State([])

        gr.Markdown(
            """
            <div style="text-align: center;">
                <h1>🤖 Conversational AI Data Analyst</h1>
                <p>Upload a CSV, ask a question, and the AI will analyze the data and explain the results conversationally.</p>
            </div>
            """
        )

        with gr.Row():
            with gr.Column(scale=1):
                file_uploader = gr.File(label="Upload your CSV", file_types=[".csv"])
            
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(label="Chatbox", elem_classes="control-height")
                query_box = gr.Textbox(lines=3, label="Your Question", placeholder="e.g., What percentage of conversations were on each topic?")
                with gr.Row():
                    submit_btn = gr.Button("Submit", variant="primary")
                    regenerate_btn = gr.Button("Regenerate")
                    clear_btn = gr.Button("Clear History")

        def load_csv_data(file, chatbot_history):
            if file is not None:
                try:
                    df = pd.read_csv(file.name)
                    chatbot_history.append(("✅ File loaded successfully.", "What would you like to know about the data?"))
                    return df, chatbot_history
                except Exception as e:
                    return None, chatbot_history + [("", f"❌ Error: {e}")]
            return None, chatbot_history

        def predict(query, chatbot_history, df, task_history_state):
            if df is None:
                chatbot_history.append((query, "Please upload a CSV file first."))
                yield chatbot_history
                return

            # --- Step 1: Generate and execute code to get the raw data ---
            string_buffer = io.StringIO()
            df.info(buf=string_buffer)
            df_info = string_buffer.getvalue()

            code_generation_prompt = f"""
You are a data analysis AI. Based on the user's question and the DataFrame schema, write a Python script using pandas to get the data needed to answer the question. The DataFrame is named `df`.
Your script must print the result. Provide only raw Python code.

**Schema (df.info()):**
{df_info}
**User Question:** "{query}"
"""
            chatbot_history.append((query, "🤔 Step 1: Analyzing data..."))
            yield chatbot_history

            # Use a non-streaming call to get the code block
            text = tokenizer.apply_chat_template([{"role": "user", "content": code_generation_prompt}], tokenize=False, add_generation_prompt=True)
            model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
            generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=1024, do_sample=False)
            response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
            generated_code = response_text.split(code_generation_prompt)[-1].strip()
            if "```python" in generated_code:
                generated_code = generated_code.split("```python")[1].split("```")[0].strip()

            # Execute the code
            old_stdout = sys.stdout
            redirected_output = sys.stdout = StringIO()
            try:
                exec(generated_code, {'df': df, 'pd': pd})
                sys.stdout = old_stdout
                raw_result = redirected_output.getvalue()

                # --- Step 2: Summarize the result conversationally ---
                chatbot_history[-1] = (query, "✍️ Step 2: Summarizing results...")
                yield chatbot_history

                if not raw_result:
                    summary = "The analysis ran successfully but produced no specific data to summarize."
                else:
                    summarization_prompt = f"""
You are a helpful AI assistant. Your task is to explain the following data to a user in a clear, conversational way.
The user originally asked: "{query}"
Summarize the key findings from the data below to answer their question. Don't just list the numbers; explain what they mean.

**Data to Summarize:**
{raw_result}
"""
                    # Use the streaming function for the final conversational answer
                    final_summary = ""
                    for new_text in chat_stream(model, tokenizer, summarization_prompt, history=[]):
                        final_summary += new_text
                        chatbot_history[-1] = (query, final_summary)
                        yield chatbot_history
                    task_history_state.append((query, final_summary))
                    return

            except Exception as e:
                sys.stdout = old_stdout
                error_message = f"❌ **Error during code execution:**\n\n```\n{str(e)}\n```"
                chatbot_history[-1] = (query, error_message)
                task_history_state.append((query, error_message))
                yield chatbot_history

        def regenerate(chatbot_history, task_history_state, df):
            if not task_history_state:
                yield chatbot_history
                return
            last_query, _ = task_history_state.pop(-1)
            chatbot_history.pop(-1)
            yield from predict(last_query, chatbot_history, df, task_history_state)

        def clear_history():
            return [], None, []

        def reset_user_input():
            return gr.update(value="")

        # Wire up components
        file_uploader.upload(load_csv_data, [file_uploader, chatbot], [df_state, chatbot])
        submit_btn.click(predict, [query_box, chatbot, df_state, task_history], [chatbot]).then(reset_user_input, [], [query_box])
        regenerate_btn.click(regenerate, [chatbot, task_history, df_state], [chatbot])
        clear_btn.click(clear_history, [], [chatbot, df_state, task_history])
        
    return demo

def main():
    model, tokenizer = load_model_tokenizer()
    demo = build_chatbot_ui(model, tokenizer)
    demo.queue().launch(share=True, inbrowser=True)

if __name__ == "__main__":
    main()

Loading model and tokenizer...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model and tokenizer loaded successfully.


  chatbot = gr.Chatbot(label="Chatbox", elem_classes="control-height")


* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://9bf2429760b6d367c0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Skipping data after last boundary
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=i