In [1]:
pip install -q -U langchain transformers accelerate optimum bitsandbytes

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os

# Global Vars
cache_dir = "cache/"
data_path = "data/"

generative_model = "HuggingFaceH4/zephyr-7b-beta"

paths = [cache_dir, data_path]
for path in paths:
    os.makedirs(path, exist_ok=True)

os.environ["TRANSFORMERS_CACHE"] = cache_dir

In [3]:
import transformers
from langchain import PromptTemplate
from langchain.schema.output_parser import OutputParserException
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pandas as pd
import torch



In [21]:
patient_messages = pd.read_json("/kaggle/input/diagnoise-me/en_medical_dialog.json")['Patient']
patient_messages = patient_messages.sample(3).tolist()
patient_messages

["Why do I wake up with my arm or arms in the air?  sometimes they fall and I hit my husband who gets mad.  Is this a neurological issue as I've had 7 operations on my neck and back?  Is it due to med I take for pain or depression?  Stress induced?  Any ideas to stop this behavior? Thank you so very much for any input you may have.",
 'Hi I have a small solid lump on my chin. When squeezed thick white pus comes out and it smells bad.  No matter how hard I squeeze the lump remains. I believe it is a cyst.  Is this possible to treat topically or does it have to be removed surgically? Thank you',
 'hi sir my name is abdul im taking psychateric tablets for more 6yrs continuesly with different combination by my doctor(v.s.p. basiyam psychateric) advise but i felt tht he is not ready to stop my tablet but giving diffrent combination... so quit all tablet step by step by redu8cing dossage, but now im taking only duzella20mg for more 8months... i cant quit only thise. when i stoped for 1night,

In [5]:
config = transformers.AutoConfig.from_pretrained(
        generative_model, trust_remote_code=True, load_in_8bit=True
)

# Load in Model
tokenizer = AutoTokenizer.from_pretrained(generative_model, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    generative_model,
    torch_dtype=torch.bfloat16,
    config=config,
    cache_dir=cache_dir,
    trust_remote_code=True,
    device_map="auto",
#     load_in_8bit=True
)

end_key_token_id = tokenizer.encode("### End")[0]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=end_key_token_id,
    do_sample=True,
    temperature=0.2,
    max_new_tokens=100,
    repetition_penalty=1.1
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [18]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

template = """
<|system|>  You are a knowledgeable healthcare physician that is reviewing patient questions and responding in json. Your job depends on being correct and your job depends on responding in the json format described below. Stop after generating one json record.

<|user|>  The patient has the following question: 

{question}

Create a summary of the question, one word describing the problem, and severity of the question in the following output format. {format_instructions} 

Do not generate any additional text or comments besides the json record. Do not forget the comma after the value in the json schema.

<|assistant|>

"""

response_schemas = [
    ResponseSchema(name="summary", description="Write a brief summary of the patients question"),
    ResponseSchema(name="problem", description="Using only one word summarize the most important aspect of the question"),
    ResponseSchema(name="severity", description="Categorize the severity of the patient's question into one of these categories: mild or moderate or severe"),
    ResponseSchema(name="answer", description="in a friendly and fact based manner answer the patient's question directly")
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()

prompt = PromptTemplate(
    template=template,
    input_variables=["question"],
    partial_variables={"format_instructions": format_instructions}
)

print(prompt.format_prompt(question=patient_messages[1]).to_string())


<|system|>  You are a knowledgeable healthcare physician that is reviewing patient questions and responding in json. Your job depends on being correct and your job depends on responding in the json format described below. Stop after generating one json record.

<|user|>  The patient has the following question: 

Hi doctor, I am a 26 year old male. I am 5 feet and 9 inches tall and weigh 255 pounds. When I eat spicy food, I poop blood. Sometimes when I have constipation as well, I poop a little bit of blood. I am really scared that I have colon cancer. I do have diarrhea often. I do not have a family history of colon cancer. I got blood tests done last night. Please find my reports attached.

Create a summary of the question, one word describing the problem, and severity of the question in the following output format. The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"summary": string  // 

In [23]:
def structure_patient_questions(question, num_tries=3, verbose=False):
    
    def _parse_patient_questions(question):
        _input = prompt.format_prompt(question=question)
        output = pipe(_input.to_string(), max_new_tokens=150)
        trimmed_output = output[0]['generated_text'].split(_input.to_string())[-1].split("<|user|>")[0].rstrip().replace("\n", "")
        result = output_parser.parse(trimmed_output)
        return result
    
    for num in range(num_tries):
        try:
            _result = _parse_patient_questions(question)
            if _result is not None:
                _result["question"] = question
            return _result
        
        except OutputParserException:
            pass
    
    if verbose:
        print(f"Could not parse question: {question}")

In [22]:
structured_patient_messages = [structure_patient_questions(message) for message in patient_messages]
structured_patient_messages

Could not parse question: Why do I wake up with my arm or arms in the air?  sometimes they fall and I hit my husband who gets mad.  Is this a neurological issue as I've had 7 operations on my neck and back?  Is it due to med I take for pain or depression?  Stress induced?  Any ideas to stop this behavior? Thank you so very much for any input you may have.




Could not parse question: hi sir my name is abdul im taking psychateric tablets for more 6yrs continuesly with different combination by my doctor(v.s.p. basiyam psychateric) advise but i felt tht he is not ready to stop my tablet but giving diffrent combination... so quit all tablet step by step by redu8cing dossage, but now im taking only duzella20mg for more 8months... i cant quit only thise. when i stoped for 1night, the next day was irritated with n severe gliddyness n headache neck pain etc....  pls advice n direct me how to stop DUZELA20mg...


[None,
 {'summary': 'Small solid lump on chin with foul-smelling pus discharge',
  'problem': 'abscess',
  'severity': 'moderate',
  'answer': "Based on the symptoms you describe, it's likely an abscess which requires drainage and antibiotics. While it's possible for topical treatments to help manage smaller cysts, this appears to be beyond their scope. We recommend scheduling an appointment for further evaluation and treatment.",
  'question': 'Hi I have a small solid lump on my chin. When squeezed thick white pus comes out and it smells bad.  No matter how hard I squeeze the lump remains. I believe it is a cyst.  Is this possible to treat topically or does it have to be removed surgically? Thank you'},
 None]

In [None]:
import os

def remove_directory(path):
    if os.path.exists(path):
        for root, dirs, files in os.walk(path, topdown=False):
            for name in files:
                file_path = os.path.join(root, name)
                os.remove(file_path)
            for name in dirs:
                dir_path = os.path.join(root, name)
                os.rmdir(dir_path)
        os.rmdir(path)

for path in paths:
    remove_directory(path)