In [1]:
# !pip -q install git+https://github.com/huggingface/transformers #need to install from github
# !pip install -q datasets loralib sentencepiece
# !pip -q install bitsandbytes accelerate xformers einops
# !pip -q install langchain
# %pip install transformers

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

from langchain import HuggingFacePipeline
from langchain import PromptTemplate,  LLMChain

from langchain import LLMChain, PromptTemplate

from LangChain_chatbot_util import *
import os
key = read_api_key()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = key


2023-11-18 01:27:46.305942: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-18 01:27:46.947035: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Import Model

Models:<p>
meta-llama/Llama-2-7b-chat-hf<p>
lmsys/vicuna-33b-v1.3<p>

In [3]:
tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-13b-v1.3",
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                          use_auth_token=True,)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [4]:
model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-13b-v1.3",
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                            # load_in_8bit=True,
                                            # load_in_4bit=True
                                             )



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
# Use a pipeline for later

pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 100,
                do_sample=True,
                top_k=30,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

In [6]:
import textwrap

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""



def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

def cut_off_text(text, prompt):
    cutoff_phrase = prompt
    index = text.find(cutoff_phrase)
    if index != -1:
        return text[:index]
    else:
        return text

def remove_substring(string, substring):
    return string.replace(substring, "")


def generate(text):
    prompt = get_prompt(text)
    with torch.autocast('cuda', dtype=torch.bfloat16):
        inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
        outputs = model.generate(**inputs,
                                 max_new_tokens=100,
                                 eos_token_id=tokenizer.eos_token_id,
                                 pad_token_id=tokenizer.eos_token_id,
                                 )
        final_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        final_outputs = cut_off_text(final_outputs, '</s>')
        final_outputs = remove_substring(final_outputs, prompt)

    return final_outputs#, outputs

def parse_text(text):
        wrapped_text = textwrap.fill(text, width=100)
        print(wrapped_text +'\n\n')
        return cut_off_text(wrapped_text, "Key")


## Set up LangChain

In [7]:
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})

#### Single prompt only

In [8]:
system_prompt = """\
    You are an internal system assistant who is searching for keywords to search a table for.
    You categorize information in questions into the following categories: age, 

    System Format:
    START: <SORT>
    Required Categories: (age: toddler, child, adolescent, adult), (service: Therapy, Assessment, Diagnosis, Consultation, Advocacy, Educational)
    Additional Categories: (service type), (insurance: None, Medicaid), (language), (additional keywords)
    END: </SORT>
    End System Format

    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
    """

instruction = "Please find and list the age of the child, the service needed, and any keywords that would be useful for a medical provider. If you can, format it like a list and use under 10 words. Please start and end the list with the word START and END: \n\n {text}"
template = get_prompt(instruction, system_prompt)
print(template)

prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

[INST]<<SYS>>
    You are an internal system assistant who is searching for keywords to search a table for.
    You categorize information in questions into the following categories: age, 

    System Format:
    START: <SORT>
    Required Categories: (age: toddler, child, adolescent, adult), (service: Therapy, Assessment, Diagnosis, Consultation, Advocacy, Educational)
    Additional Categories: (service type), (insurance: None, Medicaid), (language), (additional keywords)
    END: </SORT>
    End System Format

    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
    
<</SYS>>

Please find and list the age of the child, the service needed, and any keywords that would be useful for a medical provider. If you can, format it like a list and use under 10 words. Please start and end the list with the word START and END: 

 {text}[/IN

In [9]:
text = "My 3-year-old had a high screening for autism. Where can I go for an evaluation? The referral given by the pediatrician is a long wait and I want other options."
output = llm_chain.run(text)

search = parse_text(output)
search

  START:  1. Age: 3-years-old 2. Service: Evaluation for autism 3. Additional keywords: Long wait,
want other options  END:




'  START:  1. Age: 3-years-old 2. Service: Evaluation for autism 3. Additional keywords: Long wait,\nwant other options  END:'

In [10]:
from clinic_match import ClinicMatch

cm = ClinicMatch(key)
output = cm.query(search)
for i in range(4):
    print("\n")
    print(output[i].page_content)



Age Range: Infant to 18
 years
Support Type: Diagnosis
Therapy Services: 
Assessment or Diagnosis Services: Autism Diagnosis,  Complex Autism Diagnosis
Advocacy or Education Services: 
Insurance: 
Language: 
Notes: 


Age Range: 5 years and under
Support Type: Assessment
Therapy Services: 
Assessment or Diagnosis Services: Neuropsychological Assessment
Advocacy or Education Services: 
Insurance: Medicaid provider
Language: Translation may be available
Notes: Will not conduct Autism Diagnosis unless there is medical concern (epilepsy, brain injury)


Age Range: 18 months to 5 years
Support Type: Therapy
Therapy Services: Intensive therapy
Assessment or Diagnosis Services: Autism Diagnosis
Advocacy or Education Services: 
Insurance: Medicaid provider
Language: Spanish speaking staff
Notes: 


Age Range: 6 to 18 years
Support Type: Diagnosis, Therapy
Therapy Services: Non-intensive therapy
Assessment or Diagnosis Services: Autism plus other Diagnoses
Advocacy or Education Services: 
Ins