In [None]:
# Install required libraries
# !pip install transformers accelerate streamlit --quiet
!pip cache purge

!pip install --upgrade pip setuptools wheel


In [None]:
!apt-get update
!apt-get install -y build-essential libssl-dev libffi-dev python3-dev


In [None]:
!pip install -q transformers accelerate text-generation

In [None]:
# check disk space
!df -h /
!du -sh /root/* /usr/* /content/* 2>/dev/null

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, pipeline
import torch

model_id = "deepseek-ai/deepseek-coder-6.7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)
# Create chatbot pipeline
chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
# check disk space
!df -h /
!du -sh /root/* /usr/* /content/* 2>/dev/null

In [None]:
# check the GPU allocated in virtual environment
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [21]:
def build_prompt(user_input, domain="healthcare"):
    system_prompt = f"You are a helpful assistant for {domain} customer service. Be accurate, empathetic and concise."
    return f"{system_prompt}\nUser: {user_input}\nAssistant:"


In [None]:
def get_response(prompt, max_tokens=512):
    response = chatbot(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
    return response[0]['generated_text']


In [None]:
user_input = "I need help understanding my insurance coverage for a recent surgery."
prompt = build_prompt(user_input, domain="healthcare")
response = get_response(prompt)
print(response)


In [23]:
import gradio as gr

def build_prompt(user_input, domain="healthcare"):
    system_prompt = f"You are a helpful assistant for {domain} customer service. Be accurate, empathetic, funny and concise."
    return f"{system_prompt}\nUser: {user_input}\nAssistant:"

def get_response(prompt, max_tokens=512):
    # Assuming 'chatbot' pipeline is already defined and loaded
    # from the previous cell (3c370015)
    response = chatbot(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
    # Extract the generated text from the response.
    # The pipeline output includes the input prompt, so we need to remove it.
    generated_text = response[0]['generated_text']
    # Find the start of the assistant's response (after the prompt)
    assistant_start = generated_text.find("Assistant:")
    if assistant_start != -1:
        return generated_text[assistant_start + len("Assistant:"):].strip()
    return generated_text.strip()


def respond(user_input, chat_history, selected_domain):
    if not user_input:
        return chat_history, ""

    prompt = build_prompt(user_input, selected_domain)
    bot_response = get_response(prompt)

    # Append user input and bot response to chat history
    chat_history.append((user_input, bot_response))

    # Return the updated chat history and clear the input box
    return chat_history, ""


with gr.Blocks() as demo:
    gr.Markdown("## 💬 DeepSeek Chatbot for Finance & Healthcare - Rajesh Aadi")
    gr.Markdown(
        """
        ### Watchout : This webapp uses AI Model, ```deepseek-coder-6.7b-instruct```,
        not really as smart as many other AI Models out there.
        """
        )


    domain = gr.Radio(["finance", "healthcare"], label="Choose Domain", value="finance")

    # Use Chatbot component to display conversation history
    chatbot_output = gr.Chatbot(label="Conversation")

    # Use a State component to store chat history
    chat_history_state = gr.State([])

    with gr.Row():
        chat_input = gr.Textbox(label="Your question", scale=4)
        submit_btn = gr.Button("Send", scale=1)

    # Modify the click function to pass chat_history_state and update chatbot_output
    submit_btn.click(
        respond,
        inputs=[chat_input, chat_history_state, domain],
        outputs=[chatbot_output, chat_input]
    )

    # Add event listener to chat_input to trigger submit_btn.click on Enter key press
    chat_input.submit(
        respond,
        inputs=[chat_input, chat_history_state, domain],
        outputs=[chatbot_output, chat_input]
    )


demo.launch(share=True)

  chatbot_output = gr.Chatbot(label="Conversation")


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1dd22282dc16f53c72.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# Commented code below because Deepseek 33B loads 37B params out of 685B total,
# that demands serious memory capacity
#from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load DeepSeek V3.1 from Hugging Face
#model_id = "deepseek-ai/deepseek-coder-33b-instruct-GPTQ"
#tokenizer = AutoTokenizer.from_pretrained(model_id)
#model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")

# Create chatbot pipeline
#chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)

""" Below code never worked , could not install GPTQ model on CUDA
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import torch

model_id = "deepseek-ai/deepseek-coder-33b-instruct-GPTQ"

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)
"""

In [None]:
# Save this as app.py and run with: streamlit run app.py
"""
import streamlit as st

st.title("DeepSeek Chatbot")
domain = st.selectbox("Choose domain", ["healthcare", "finance"])
user_input = st.text_input("Ask your question")

if user_input:
    prompt = build_prompt(user_input, domain)
    response = get_response(prompt)
    st.write(response)
"""