In [None]:
!pip install --upgrade pip wheel setuptools

In [None]:
!pip install gradio flask ninja torch accelerate transformers

In [None]:
!MAX_JOBS=12 python -m pip -v install flash-attn --no-build-isolation  --use-pep517


In [None]:
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging
from transformers import set_seed
from peft import PeftModel

# Set a fixed random seed for reproducibility.
set_seed(0)
logging.set_verbosity_error()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------------- Sentiment Analysis ----------------
# Load a sentiment-analysis pipeline using DistilBERT.
sentiment_pipeline = None

# ---------------- Model Loading ----------------
# Load the Phi-3.5-mini-instruct model and tokenizer.
model_name = "microsoft/Phi-3.5-mini-instruct"
# Load the model (trust_remote_code must be True for this model).
model = None

tokenizer = None

# Create the text-generation pipeline using our model.
# (This pipeline expects a list of message dictionaries as input.)
chatbot = None

# ---------------- Response Function ----------------
def respond(message: str,
            history: list,
            system_message: str,
            max_tokens: int,
            temperature: float,
            top_p: float):
    """
    This function builds a conversation history (a list of dicts)
    and calls the Phi-3.5-mini-instruct pipeline with that history.

    It first ensures that the history starts with the provided system message.
    It then runs sentiment analysis on the new user message.
    If the sentiment is strongly negative, the user message is prefixed with "Angry:".
    The updated history is then passed to the pipeline.

    Generation parameters (max_tokens, temperature, top_p) are passed along.

    Troubleshooting suggestions:
      - If the responses seem hallucinated or off-topic, try adjusting temperature (try higher for more creative, lower for deterministic) or top_p.
      - You can print the history inside this function to verify the conversation structure.
      - Ensure that the input history is a list of dictionaries with "role" and "content" keys.
    """
    # If no history exists, initialize with the system message.
    if history is None or len(history) == 0:
        history = [{"role": "system", "content": system_message}]

    # Run sentiment analysis on the user message.
    sentiment = sentiment_pipeline(message)[0]
    if sentiment["label"] == "NEGATIVE" and sentiment["score"] > 0.85:
        user_entry = {"role": "user", "content": "Angry: " + message}
    else:
        user_entry = {"role": "user", "content": message}

    history.append(user_entry)

    generation_args = {None}

    # Pass the full conversation history (a list of dicts) directly to the pipeline.
    output = None
    assistant_reply = None

    history.append({"role": "assistant", "content": assistant_reply})
    return assistant_reply

# ---------------- Gradio Chat Interface ----------------
# This interface uses additional inputs for system message and generation parameters.
demo = gr.ChatInterface(
    fn=respond,
    type="messages",  # Conversation history is a list of message dictionaries.
    title="Phi-3.5-mini Chatbot",
    description=(
        "A chatbot powered by microsoft/Phi-3.5-mini-instruct. "
        "It uses sentiment analysis to tag angry messages and accepts conversation history as a list of dicts. "
        "Adjust parameters below to test and troubleshoot responses."
    ),
    additional_inputs=[
        gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=500, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.0, maximum=4.0, value=1.0, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
    ]
)

if __name__ == "__main__":
    demo.launch(debug=True, share=True)
