# LLM chat demo

for this one, you need to get permission to use the Llama 3.2 1b model from Meta via huggingface. Do that, and then get a huggingface token, and you'll have to log in at the next cell, with that token.

In [None]:
!pip install -q streamlit
!npm install localtunnel
!pip install --upgrade huggingface_hub
from huggingface_hub import login
login()

In [None]:
#copypasta and create 

%%writefile app.py

### BACKEND
import os
import copy

#local demo imports and config
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer, TextIteratorStreamer


# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")

torch_dtype = torch.float16 if torch_device in ["cuda", "mps"] else torch.float32

llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct", 
                                           #  quantization_config=quantization_config, 
                                           torch_dtype=torch_dtype, 
                                           device_map=torch_device)

llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")

streamer = TextStreamer(llama_tokenizer)


# def llama32_1b_streamchat(messages):
#     inputs = llama_tokenizer.apply_chat_template(messages, add_generation_prompt = True)
#     inputs = torch.tensor(inputs).to(torch_device).unsqueeze(0)
#     stream = llama_model.generate(inputs, streamer=streamer, max_new_tokens = 256)
#     return stream

llama32_1b_pipe = pipeline(
    "text-generation",
    model=llama_model,
    tokenizer=llama_tokenizer,
    # streamer = streamer,
)

def llama32_1b_chat(messages) -> str: 
    "simplifies pipeline output to only return generated text"
    outputs = llama32_1b_pipe(
        messages,
        max_new_tokens=512
    )
    return outputs[-1]['generated_text'][-1]['content']
    

### FRONTEND
import streamlit as st
import random
import time
from llm_demo import llama32_1b_chat


# Streamed response emulator
def response_generator():
    response = random.choice(
        [
            "Hello there! How can I assist you today?",
            "Hi, human! Is there anything I can help you with?",
            "Do you need help?",
        ]
    )
    for word in response.split():
        yield word + " "
        time.sleep(0.05)


st.title("llm chat demo")

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "system", "content": "You are a helpful chatbot who will assist the end user as best as possible."}, 
                                 {"role": "assistant", "content": "Hi there, how can I help you today?"}
                                ]

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    if message["role"] != "system": # don't display system messages
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

# Accept user input
if prompt := st.chat_input("What is up?"):
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    # Display user message in chat message container
    with st.chat_message("user"):
        st.markdown(prompt)

    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        generated_response = llama32_1b_chat(st.session_state.messages)
        response = st.write(generated_response)
        # making this a stream with write_stream isn't that simple weirdly
    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": generated_response})

# HOW TO RUN THE GUI:
Run this next cell, and then click the link generated. When it asks for a password, copy and paste the printed IP address and click "enter". IT should now work.

In [None]:
!streamlit run app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com
