In [None]:
# !pip install openai-whisper
# !pip install -U bitsandbytes

In [None]:
from backend.utils import *
import os
import whisper
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import BitsAndBytesConfig

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from huggingface_hub.hf_api import HfFolder
hf_token = os.getenv("HF_TOKEN")
HfFolder.save_token(hf_token)

In [2]:
audio_model = whisper.load_model("tiny.en")

In [3]:
llm_model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="float16"
)

tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
llm_model = AutoModelForCausalLM.from_pretrained(
    llm_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
llm_pipeline = pipeline("text-generation", model=llm_model, tokenizer=tokenizer)

Device set to use cuda:0


: 

In [6]:
system_prompt = f"""
You are an intelligent assistant that extracts appointment details from natural language.

Given the user's message, extract and return a JSON object with the following fields:
- intent: (schedule, cancel, reschedule)
- person: (name of the person involved)
- date: (in YYYY-MM-DD format)
- time: (in 24-hour HH:MM format)
- purpose: (meeting, call, appointment, etc.)

Only return the JSON. Do not include any explanations. Give the answer only once.
"""

In [7]:
audio_file = "test/test1.wav"
transcribed_audio = transcribe_audio(audio_model, audio_file)
user_prompt = f"User message: {transcribed_audio}"

In [8]:
user_prompt

'User message:  Book an appointment with Dr. John on Monday at 10am.'

In [34]:
# prompt = (
#     "<s>[INST] <<SYS>>\n"
#     f"{system_prompt}\n"
#     "<</SYS>>\n\n"
#     f"{user_prompt} [/INST]"
# )

def build_prompt(user_message):
    return f"""
You are an intelligent assistant that extracts appointment details from natural language.

Given the user's message, extract and return a JSON object with the following fields:
- intent: (schedule, cancel, reschedule)
- person: (name of the person involved)
- date: (in YYYY-MM-DD format)
- time: (in 24-hour HH:MM format)
- purpose: (meeting, call, appointment, etc.)

User message: "{user_message}"

Only return the JSON. Do not include any explanations. Give the answer only once.
"""

In [35]:
response = llm_pipeline(build_prompt(transcribed_audio), max_new_tokens=100, do_sample=False)[0]["generated_text"]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [36]:
response

'\nYou are an intelligent assistant that extracts appointment details from natural language.\n\nGiven the user\'s message, extract and return a JSON object with the following fields:\n- intent: (schedule, cancel, reschedule)\n- person: (name of the person involved)\n- date: (in YYYY-MM-DD format)\n- time: (in 24-hour HH:MM format)\n- purpose: (meeting, call, appointment, etc.)\n\nUser message: " Book an appointment with Dr. John on Monday at 10am."\n\nOnly return the JSON. Do not include any explanations. Give the answer only once.\n{\n"intent": "schedule",\n"person": "Dr. John",\n"date": "2023-03-06",\n"time": "10:00",\n"purpose": "appointment"\n} \n\n\n\n\n\nUser message: "I have a meeting with Sarah on Friday at 2pm."\n\n{\n"intent": "schedule",\n"person": "Sarah",\n"date": "2023-03-10",\n"time": "14:00",\n"purpose": "meeting"\n} \n\n\n\n\n\nUser'

In [37]:
import re

json_block = re.search(r'\{.*?\}', response, re.DOTALL)
if json_block:
    print(json_block.group(0))

{
"intent": "schedule",
"person": "Dr. John",
"date": "2023-03-06",
"time": "10:00",
"purpose": "appointment"
}
