In [2]:
!pip install llama-cpp-python --no-cache-dir



Collecting llama-cpp-python
  Downloading llama_cpp_python-0.2.27.tar.gz (9.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting numpy>=1.20.0 (from llama-cpp-python)
  Downloading numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.2/61.2 kB[0m [31m424.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m257.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownlo

In [16]:
import os
os.listdir('./app')

['main.py', 'data']

In [17]:
documents = SimpleDirectoryReader("./app/data/").load_data()


In [11]:
import logging
import sys
import torch

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import ServiceContext
from llama_index.embeddings import LangchainEmbedding


In [90]:
import re

import re

def check_for_cpt_code(text):
    """
    Checks for the presence of a 5-digit CPT code in a given string.

    This function uses regular expressions to search for a 5-digit number (CPT code) in the provided text. 
    It returns a boolean indicating whether such a code is found and the code itself if present.

    Parameters:
    text (str): The text string to be searched for a CPT code.

    Returns:
    tuple: 
        - A boolean indicating whether a 5-digit CPT code is present.
        - The CPT code in integer format if present, or None if not present.
    """

    # Regular expression to find 5 consecutive digits
    match = re.search(r'\b\d{5}\b', text)

    if match:
        # Convert the found code to integer
        code = int(match.group())
        return True, code
    else:
        return False, None



def count_yes_no(strings):
    """
    Counts occurrences of 'Yes' and 'No' within the first five tokens of each string in a list.

    This function goes through each string in the provided list, tokenizes the string, and checks
    the first five tokens for occurrences of 'Yes' or 'No'. The search is case-insensitive. It counts
    the occurrences of each and returns the counts.

    Parameters:
    strings (list of str): A list of strings to be searched.

    Returns:
    tuple: A tuple containing two integers:
        - The count of 'Yes' occurrences.
        - The count of 'No' occurrences.
    """

    yes_count = 0
    no_count = 0

    for string in strings:
        tokens = string.split()[:5]
        for token in tokens:
            if re.search(r'\byes\b', token, re.IGNORECASE):
                yes_count += 1
                break
            elif re.search(r'\bno\b', token, re.IGNORECASE):
                no_count += 1
                break

    return yes_count, no_count


import re
from datetime import datetime

def calculate_age(dob_string, reference_date=None):
    """
    Calculate the age of a patient based on their date of birth extracted from a given string.

    Parameters:
    dob_string (str): A string containing the patient's date of birth in mm/dd/yyyy format.
    reference_date (str, optional): The date from which to calculate the age, in mm/dd/yyyy format.
                                   Defaults to the current date if not provided.

    Returns:
    int: The age of the patient.
    """
    # Regular expression to find the date of birth in the string
    dob_match = re.search(r'\b(\d{2}/\d{2}/\d{4})\b', dob_string)

    if not dob_match:
        raise ValueError("Date of birth not found in the provided string.")

    dob = datetime.strptime(dob_match.group(), '%m/%d/%Y')

    # Use the current date or the provided reference date to calculate the age
    if reference_date:
        reference_date = datetime.strptime(reference_date, '%m/%d/%Y')
    else:
        reference_date = datetime.today()

    # Calculate age
    age = reference_date.year - dob.year - ((reference_date.month, reference_date.day) < (dob.month, dob.day))

    return age

def detect_first_degree_relative(text):
    """
    Detects if a string contains any mention of a first-degree relative and identifies the relative.

    Parameters:
    text (str): The input string to be searched.

    Returns:
    tuple: 
        - A boolean indicating whether a first-degree relative is mentioned.
        - The first identified first-degree relative or None if none is mentioned.
    """
    # Define a list of first-degree relatives
    relatives = ["mother", "father", "brother", "sister", "son", "daughter"]

    # Create a regular expression pattern to find these relatives
    pattern = r'\b(?:' + '|'.join(relatives) + r')\b'

    # Search the text for the pattern
    match = re.search(pattern, text, re.IGNORECASE)

    # Check if a match is found and return the results
    if match:
        return True, match.group().lower()
    else:
        return False, None


In [18]:

# Configure logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# Load documents
documents = SimpleDirectoryReader("./app/data/").load_data()

# Initialize the LlamaCPP model
llm = LlamaCPP(
    model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q5_K_M.gguf',
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

# Initialize the embedding model
embed_model = LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="thenlper/gte-large")
)

# Set up the service context
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

# Measure execution time
%%time

# Create an index from documents
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

# Create a query engine
query_engine = index.as_query_engine()

# Perform a query and print the response
response = query_engine.query("who founded dunnhumby?")
print(response)

Downloading url https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q5_K_M.gguf to path /Users/jasperhajonides/Library/Caches/llama_index/models/mistral-7b-instruct-v0.1.Q5_K_M.gguf
total size (MB): 5131.41


4894it [01:24, 57.74it/s]                                                         
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/jasperhajonides/Library/Caches/llama_index/models/mistral-7b-instruct-v0.1.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: thenlper/gte-large
Load pretrained SentenceTransformer: thenlper/gte-large
Load pretrained SentenceTransformer: thenlper/gte-large
Load pretrained SentenceTransformer: thenlper/gte-large


.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/67.9k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

onnx/config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

onnx/special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

onnx/tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

onnx/tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

onnx/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/670M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu
Use pytorch device: cpu
Use pytorch device: cpu
Use pytorch device: cpu


UsageError: Line magic function `%%time` not found.


In [19]:

# Create an index from documents
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

# Create a query engine
query_engine = index.as_query_engine()

# Perform a query and print the response
response = query_engine.query("what is the diagnosis? ")
print(response)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

 Based on the provided context information, the diagnosis for James Maddison is internal hemorrhoids.



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      18.69 ms /    22 runs   (    0.85 ms per token,  1177.16 tokens per second)
llama_print_timings: prompt eval time =   17856.35 ms /   914 tokens (   19.54 ms per token,    51.19 tokens per second)
llama_print_timings:        eval time =    2078.17 ms /    21 runs   (   98.96 ms per token,    10.11 tokens per second)
llama_print_timings:       total time =   20120.35 ms


In [91]:

# Example usage
response = query_engine.query("Hospitals use CPT codes to treatments, these codes are 5 digits. Can you identify any of these codes related to treatments in this report?")
text = response.response
code_present, code = check_for_cpt_code(text)

print(f"CPT code present: {code_present}")
print(f"CPT code: {code if code is not None else 'None'}")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


 Yes, I can help with that. Based on the context information provided, there is one CPT code mentioned which is 45378. This code represents a colonoscopy procedure.
CPT code present: True
CPT code: 45378



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      14.74 ms /    41 runs   (    0.36 ms per token,  2781.74 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    4203.42 ms /    41 runs   (  102.52 ms per token,     9.75 tokens per second)
llama_print_timings:       total time =    4399.59 ms


In [43]:
response = query_engine.query("What is the requested procedure for this report?")
print(response)

import re
text = response.response
# Regular expression to find 5 consecutive digits
match = re.search(r'\b\d{5}\b', text)

if match:
    code = match.group()
    print(f"Found code: {code}")
else:
    print("No 5-digit code found.")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


 The requested procedure for this report is 45378, which represents a standard diagnostic colonoscopy.
Found code: 45378



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      16.41 ms /    24 runs   (    0.68 ms per token,  1462.52 tokens per second)
llama_print_timings: prompt eval time =    5722.14 ms /   479 tokens (   11.95 ms per token,    83.71 tokens per second)
llama_print_timings:        eval time =    2237.15 ms /    23 runs   (   97.27 ms per token,    10.28 tokens per second)
llama_print_timings:       total time =    8139.46 ms


In [47]:

checks = []
for i in range(15):
    response = query_engine.query("Has there been a previous treatment that successfully improved colonalrectal or absominal discomfort? Answer with a yes or a no")
    checks.append(response.response)
yes_count, no_count = count_yes_no(checks)
print(f"'Yes' count: {yes_count}")
print(f"'No' count: {no_count}")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.38 ms /     2 runs   (    0.69 ms per token,  1445.09 tokens per second)
llama_print_timings: prompt eval time =   10244.25 ms /   804 tokens (   12.74 ms per token,    78.48 tokens per second)
llama_print_timings:        eval time =      98.56 ms /     1 runs   (   98.56 ms per token,    10.15 tokens per second)
llama_print_timings:       total time =   10369.65 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.54 ms /     3 runs   (    0.51 ms per token,  1946.79 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     299.00 ms /     3 runs   (   99.67 ms per token,    10.03 tokens per second)
llama_print_timings:       total time =     318.04 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.48 ms /     2 runs   (    0.24 ms per token,  4149.38 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     202.19 ms /     2 runs   (  101.10 ms per token,     9.89 tokens per second)
llama_print_timings:       total time =     208.37 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.50 ms /     2 runs   (    0.25 ms per token,  4032.26 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     202.39 ms /     2 runs   (  101.20 ms per token,     9.88 tokens per second)
llama_print_timings:       total time =     210.51 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.02 ms /     3 runs   (    0.34 ms per token,  2955.67 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     304.98 ms /     3 runs   (  101.66 ms per token,     9.84 tokens per second)
llama_print_timings:       total time =     318.24 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.72 ms /     2 runs   (    0.36 ms per token,  2758.62 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     199.04 ms /     2 runs   (   99.52 ms per token,    10.05 tokens per second)
llama_print_timings:       total time =     209.45 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.46 ms /     2 runs   (    0.23 ms per token,  4357.30 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     206.19 ms /     2 runs   (  103.09 ms per token,     9.70 tokens per second)
llama_print_timings:       total time =     214.53 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.65 ms /     2 runs   (    0.32 ms per token,  3100.78 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     196.68 ms /     2 runs   (   98.34 ms per token,    10.17 tokens per second)
llama_print_timings:       total time =     205.90 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.84 ms /     2 runs   (    0.42 ms per token,  2375.30 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     201.56 ms /     2 runs   (  100.78 ms per token,     9.92 tokens per second)
llama_print_timings:       total time =     211.73 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      21.19 ms /    34 runs   (    0.62 ms per token,  1604.68 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3359.99 ms /    34 runs   (   98.82 ms per token,    10.12 tokens per second)
llama_print_timings:       total time =    3617.76 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.67 ms /     2 runs   (    0.33 ms per token,  2998.50 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     200.15 ms /     2 runs   (  100.07 ms per token,     9.99 tokens per second)
llama_print_timings:       total time =     207.98 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      72.84 ms /   109 runs   (    0.67 ms per token,  1496.43 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   10912.32 ms /   109 runs   (  100.11 ms per token,     9.99 tokens per second)
llama_print_timings:       total time =   11696.06 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.02 ms /     3 runs   (    0.34 ms per token,  2938.30 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     304.49 ms /     3 runs   (  101.50 ms per token,     9.85 tokens per second)
llama_print_timings:       total time =     318.75 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.33 ms /     3 runs   (    0.44 ms per token,  2257.34 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     296.99 ms /     3 runs   (   99.00 ms per token,    10.10 tokens per second)
llama_print_timings:       total time =     312.31 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


'Yes' count: 0
'No' count: 13



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.55 ms /     2 runs   (    0.27 ms per token,  3663.00 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     201.28 ms /     2 runs   (  100.64 ms per token,     9.94 tokens per second)
llama_print_timings:       total time =     211.96 ms


In [50]:
response = query_engine.query("""
We'd like to check if hte patient had Juvenile polyposis syndrome diagnosis indicated by 1 or more of the following:
- Age 12 years or older and symptomatic(eg, abdominalpain, iron deficiency anemia, rectal bleeding,
telangiectasia)
- Younger than 12 years and symptomatic(eg, abdominalpain, iron deficiency anemia, rectal bleeding,
telangiectasia)

Is there any indication? Yes or no?
""")
print(response.response)




Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      19.57 ms /    29 runs   (    0.67 ms per token,  1482.09 tokens per second)
llama_print_timings: prompt eval time =    4718.74 ms /   438 tokens (   10.77 ms per token,    92.82 tokens per second)
llama_print_timings:        eval time =    2799.90 ms /    28 runs   (  100.00 ms per token,    10.00 tokens per second)
llama_print_timings:       total time =    7750.70 ms


In [92]:
response = query_engine.query("""What is the date of birth of the patient in this report?
 """)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      30.27 ms /    45 runs   (    0.67 ms per token,  1486.77 tokens per second)
llama_print_timings: prompt eval time =    8191.06 ms /   792 tokens (   10.34 ms per token,    96.69 tokens per second)
llama_print_timings:        eval time =    4366.60 ms /    44 runs   (   99.24 ms per token,    10.08 tokens per second)
llama_print_timings:       total time =   12960.64 ms


In [93]:

# Example usage
dob_string = response.response
age = calculate_age(dob_string)
# Calculate age as of today, can also insert the date at which the report was created in calculate_age(). 
print(f"Patient's age: {calculate_age(dob_string)}")  


Patient's age: 41


In [94]:
str(age) + ' (' + dob_string +')'

'41 ( The date of birth for the patient in this report is 06/16/1982 for James Freeman and 03/15/1965 for James Maddison.)'

In [69]:


checks = []
for i in range(5):
    response = query_engine.query("Check if patient already had a colonoscopy in past 10 years, apart from one that is possible scheduled, yes or no?")
    checks.append(response.response)
yes_count, no_count = count_yes_no(checks)
print(f"'Yes' count: {yes_count}")
print(f"'No' count: {no_count}")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      56.46 ms /    81 runs   (    0.70 ms per token,  1434.57 tokens per second)
llama_print_timings: prompt eval time =     853.42 ms /    40 tokens (   21.34 ms per token,    46.87 tokens per second)
llama_print_timings:        eval time =    7709.97 ms /    80 runs   (   96.37 ms per token,    10.38 tokens per second)
llama_print_timings:       total time =    9211.11 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      25.91 ms /    40 runs   (    0.65 ms per token,  1543.87 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3862.91 ms /    40 runs   (   96.57 ms per token,    10.35 tokens per second)
llama_print_timings:       total time =    4148.94 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      29.46 ms /    40 runs   (    0.74 ms per token,  1357.87 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3860.77 ms /    40 runs   (   96.52 ms per token,    10.36 tokens per second)
llama_print_timings:       total time =    4170.10 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      26.74 ms /    40 runs   (    0.67 ms per token,  1495.83 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3887.81 ms /    40 runs   (   97.20 ms per token,    10.29 tokens per second)
llama_print_timings:       total time =    4165.71 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      28.44 ms /    40 runs   (    0.71 ms per token,  1406.62 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3854.97 ms /    40 runs   (   96.37 ms per token,    10.38 tokens per second)
llama_print_timings:       total time =    4166.26 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      26.86 ms /    40 runs   (    0.67 ms per token,  1489.37 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3872.40 ms /    40 runs   (   96.81 ms per token,    10.33 tokens per second)
llama_print_timings:       total time =    4163.73 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      23.62 ms /    31 runs   (    0.76 ms per token,  1312.45 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    2985.70 ms /    31 runs   (   96.31 ms per token,    10.38 tokens per second)
llama_print_timings:       total time =    3223.57 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      30.67 ms /    41 runs   (    0.75 ms per token,  1336.81 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3962.94 ms /    41 runs   (   96.66 ms per token,    10.35 tokens per second)
llama_print_timings:       total time =    4269.74 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      53.23 ms /    79 runs   (    0.67 ms per token,  1484.13 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    7610.94 ms /    79 runs   (   96.34 ms per token,    10.38 tokens per second)
llama_print_timings:       total time =    8212.73 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      27.65 ms /    40 runs   (    0.69 ms per token,  1446.81 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3877.39 ms /    40 runs   (   96.93 ms per token,    10.32 tokens per second)
llama_print_timings:       total time =    4164.18 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      30.57 ms /    40 runs   (    0.76 ms per token,  1308.56 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3858.31 ms /    40 runs   (   96.46 ms per token,    10.37 tokens per second)
llama_print_timings:       total time =    4161.21 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      40.26 ms /    60 runs   (    0.67 ms per token,  1490.42 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    5782.96 ms /    60 runs   (   96.38 ms per token,    10.38 tokens per second)
llama_print_timings:       total time =    6240.54 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      28.32 ms /    40 runs   (    0.71 ms per token,  1412.18 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3843.47 ms /    40 runs   (   96.09 ms per token,    10.41 tokens per second)
llama_print_timings:       total time =    4152.80 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      26.83 ms /    40 runs   (    0.67 ms per token,  1490.87 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3846.70 ms /    40 runs   (   96.17 ms per token,    10.40 tokens per second)
llama_print_timings:       total time =    4143.78 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


'Yes' count: 0
'No' count: 12



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      27.42 ms /    40 runs   (    0.69 ms per token,  1458.90 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3859.79 ms /    40 runs   (   96.49 ms per token,    10.36 tokens per second)
llama_print_timings:       total time =    4152.75 ms


In [76]:


checks = []
for i in range(5):
    response = query_engine.query("Is the patient symptomatic (e.g. abdominal pain, iron deficiency anemia, rectal bleeding)? Answer just yes or no.")
    checks.append(response.response)
print(checks)
yes_count, no_count = count_yes_no(checks)
print(f"'Yes' count: {yes_count}")
print(f"'No' count: {no_count}")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       2.76 ms /     3 runs   (    0.92 ms per token,  1086.56 tokens per second)
llama_print_timings: prompt eval time =   10478.48 ms /   866 tokens (   12.10 ms per token,    82.65 tokens per second)
llama_print_timings:        eval time =     202.03 ms /     2 runs   (  101.02 ms per token,     9.90 tokens per second)
llama_print_timings:       total time =   10713.64 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.07 ms /     3 runs   (    0.36 ms per token,  2793.30 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     303.25 ms /     3 runs   (  101.08 ms per token,     9.89 tokens per second)
llama_print_timings:       total time =     320.54 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.61 ms /     3 runs   (    0.54 ms per token,  1864.51 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     320.96 ms /     3 runs   (  106.99 ms per token,     9.35 tokens per second)
llama_print_timings:       total time =     339.61 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit

llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       0.89 ms /     3 runs   (    0.30 ms per token,  3363.23 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     305.01 ms /     3 runs   (  101.67 ms per token,     9.84 tokens per second)
llama_print_timings:       total time =     317.28 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


[' Yes.', ' Yes.', ' Yes.', ' Yes.', ' Yes.']
'Yes' count: 5
'No' count: 0



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =       1.24 ms /     3 runs   (    0.41 ms per token,  2421.31 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     296.82 ms /     3 runs   (   98.94 ms per token,    10.11 tokens per second)
llama_print_timings:       total time =     310.47 ms


In [95]:


checks = []
relative_yn = []
for i in range(5):
    response = query_engine.query("Is there any family history of colorectal cancer? If yes, answer just with the family relationship")
    checks.append(response.response)

    relative_present, relative = detect_first_degree_relative(response.response)
    relative_yn.append(relative_present)
    print(f"First-degree relative present: {relative_present}")
    print(f"Identified relative: {relative if relative else 'None'}")

print(checks)
print(relative_yn)



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


First-degree relative present: True
Identified relative: father



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      25.97 ms /    32 runs   (    0.81 ms per token,  1232.05 tokens per second)
llama_print_timings: prompt eval time =    1958.17 ms /    33 tokens (   59.34 ms per token,    16.85 tokens per second)
llama_print_timings:        eval time =    3063.94 ms /    31 runs   (   98.84 ms per token,    10.12 tokens per second)
llama_print_timings:       total time =    5298.15 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


First-degree relative present: True
Identified relative: father



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      24.36 ms /    32 runs   (    0.76 ms per token,  1313.68 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3165.18 ms /    32 runs   (   98.91 ms per token,    10.11 tokens per second)
llama_print_timings:       total time =    3411.96 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


First-degree relative present: True
Identified relative: father



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      24.31 ms /    32 runs   (    0.76 ms per token,  1316.44 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3168.37 ms /    32 runs   (   99.01 ms per token,    10.10 tokens per second)
llama_print_timings:       total time =    3409.14 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


First-degree relative present: True
Identified relative: father



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      13.28 ms /    21 runs   (    0.63 ms per token,  1581.44 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    2097.53 ms /    21 runs   (   99.88 ms per token,    10.01 tokens per second)
llama_print_timings:       total time =    2249.38 ms


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Llama.generate: prefix-match hit


First-degree relative present: True
Identified relative: father
[" Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68.", " Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68.", " Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68.", " Yes, James Freeman's father had colorectal cancer at age 68.", " Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68."]
[True, True, True, True, True]



llama_print_timings:        load time =   13681.85 ms
llama_print_timings:      sample time =      19.47 ms /    32 runs   (    0.61 ms per token,  1643.72 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3220.88 ms /    32 runs   (  100.65 ms per token,     9.94 tokens per second)
llama_print_timings:       total time =    3413.14 ms


5

In [85]:
checks = [" Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68.", " Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68.", " Yes, James Freeman's father had colorectal cancer at age 68.", " Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68.", " Yes, there is a family history of colorectal cancer. The patient's father had colorectal cancer at age 68."]
# Example usage
text = checks[4]


First-degree relative present: True
Identified relative: father
