In [2]:
import os
import random
import json
import pandas as pd
import numpy as np
import spacy
from sklearn.feature_extraction.text import CountVectorizer




In [3]:
dir = os.getcwd()
data_dir = os.path.join(dir, 'data')
os.makedirs(data_dir, exist_ok=True)
output_dir = os.path.join(dir, 'output')
os.makedirs(output_dir, exist_ok=True)

In [4]:
with open(os.path.join(output_dir, "kqa_answers_gpt4_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    gpt4 = [json.loads(line) for line in jsonl_file]

In [5]:
with open(os.path.join(output_dir, "kqa_answers_gpt4_five_1.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    gpt4_old = [json.loads(line) for line in jsonl_file]

In [6]:
with open(os.path.join(output_dir, "kqa_answers_llama_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    llama = [json.loads(line) for line in jsonl_file]

In [7]:
with open(os.path.join(output_dir, "kqa_answers_llama_five_1.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    llama_old = [json.loads(line) for line in jsonl_file]

In [8]:
for i in random.sample(llama, 5):
    print(i)
    print("__________________________")

{'Question': 'Is that normal for shoulder pain to come with breathing', 'Free_form_answer': 'If you are experiencing shoulder pain while breathing, it could be due to musculoskeletal problems. However, it could also be linked to issues with your lungs, pleura, abdomen or heart. It is important to consult a doctor for an evaluation. If you experience chest pain or shortness of breath along with your shoulder pain, seek immediate medical attention.', 'Must_have': ['Shoulder pain associated with breathing can be the result of abdominal issues.', ' You should seek immediate medical attention if your shoulder pain is accompanied by shortness of breath.', ' Shoulder pain associated with breathing can be the result of lung and pleura issues.', ' Shoulder pain associated with breathing can be the result of musculoskeletal issues.', ' You should seek immediate medical attention if you have chest pain.', ' Shoulder pain associated with breathing should be evaluated by a doctor.', ' Shoulder pain

# Number of words of GPT-4 and Llama answers

In [9]:
stats = []

for typ in ['gpt4', 'llama', 'physician']:

    key = typ
    if typ == 'physician':
        key = 'gpt4'
    with open(os.path.join(output_dir, f"kqa_answers_{key}_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
        data = [json.loads(line) for line in jsonl_file]
    
    # Load the spaCy English model
    nlp = spacy.load("en_core_web_sm")
    # Process the text with spaCy to segment into sentences
    n_words = []
    for d in data:
        if typ == 'physician':
            doc = nlp(d['Free_form_answer'])
        else:
            doc = nlp(d['answer'])
        n_words.append(len([token for token in doc]))
        if len([token for token in doc]) < 10:
            print(d)
    
    stats.append({'Type': typ,
                  'Mean # Tokens': np.mean(n_words),
                  'Q1 # Tokens': np.percentile(n_words, 25),
                  'Q2 # Tokens': np.percentile(n_words, 75),
                  'Max # Tokens': np.max(n_words),
                  'Min # Tokens': np.min(n_words),})

pd.DataFrame(stats)

Unnamed: 0,Type,Mean # Tokens,Q1 # Tokens,Q2 # Tokens,Max # Tokens,Min # Tokens
0,gpt4,124.74,93.75,136.25,291,43
1,llama,169.59,130.25,213.0,300,68
2,physician,106.36,73.0,122.5,330,25


# Number of sentences in GPT-4 and Llama answers

In [10]:
stats = []

for typ in ['gpt4', 'llama', 'physician']:

    key = typ
    if typ == 'physician':
        key = 'gpt4'
    with open(os.path.join(output_dir, f"kqa_answers_{key}_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
        data = [json.loads(line) for line in jsonl_file]
    
    # Load the spaCy English model
    nlp = spacy.load("en_core_web_sm")
    # Process the text with spaCy to segment into sentences
    n_sentences = []
    for d in data:
        if typ == 'physician':
            doc = nlp(d['Free_form_answer'])
        else:
            doc = nlp(d['answer'])
        n_sentences.append(len([sentence.text for sentence in doc.sents]))
    
    stats.append({'Type': typ,
                  'Mean # Sentences': np.mean(n_sentences),
                  'Q1 # Sentences': np.percentile(n_sentences, 25),
                  'Q2 # Sentences': np.percentile(n_sentences, 75),
                  'Max # Sentences': np.max(n_sentences),
                  'Min # Sentences': np.min(n_sentences),})

pd.DataFrame(stats)

Unnamed: 0,Type,Mean # Sentences,Q1 # Sentences,Q2 # Sentences,Max # Sentences,Min # Sentences
0,gpt4,5.89,4.0,6.0,21,2
1,llama,7.19,5.0,8.0,20,3
2,physician,4.62,3.0,6.0,15,1


# Number of cutoff responses

In [7]:
cutoff_stats = []
for typ in ['gpt4', 'llama']:
    
    for version in ['old', 'new']:
        if version == 'old':
            path = os.path.join(output_dir, f"kqa_answers_{typ}_five_1.jsonl")
        if version == 'new':
            path = os.path.join(output_dir, f"kqa_answers_{typ}_five.jsonl")
    
        with open(path, 'r', encoding='utf-8') as jsonl_file:
            data = [json.loads(line) for line in jsonl_file]
        
        flag = 0
        for i in data:
            if i['answer'][-1] != '.':
                if version == 'new':
                    print(typ, version, i['id'])
                    print(i['answer'], '\n')
                flag += 1
        
        d = {'answer_type': typ,
            'version': version,
            'cutoff responses #': flag
            }
        cutoff_stats.append(d)
    

gpt4 new 59
To measure blood pressure, you will need a blood pressure cuff (sphygmomanometer) and a stethoscope. Here are the steps to correctly measure blood pressure:

1. **Prepare the Patient**: The patient should relax, sitting comfortably with their back supported, legs uncrossed, and not having smoked, exercised, or consumed caffeine within 30 minutes prior to measurement.

2. **Position the Arm**: Arm should be supported on a flat surface at heart level. The upper arm should be bare, with clothing removed or sleeves raised.

3. **Place the Cuff**: Wrap the cuff around the upper arm, making sure it’s snug but not too tight. The lower edge of the cuff should be about an inch above the bend of the elbow.

4. **Position the Stethoscope**: Place the stethoscope’s earpieces in your ears and the diaphragm on the inside of the elbow crease directly over the brachial artery.

5. **Inflate the Cuff**: Inflate the cuff by pumping the bulb until the gauge reads about 20-30 mmHg above usual 

In [4]:
pd.DataFrame(cutoff_stats)

Unnamed: 0,answer_type,version,cutoff responses #
0,gpt4,old,4
1,gpt4,new,1
2,llama,old,6
3,llama,new,6


In [4]:
cutoff_stats = []
for typ in ['gpt4', 'llama']:
    
    for version in ['old', 'new']:
        path = os.path.join(output_dir, f"pilot_{typ}_answers.jsonl")
    
        with open(path, 'r', encoding='utf-8') as jsonl_file:
            data = [json.loads(line) for line in jsonl_file]
        
        flag = 0
        for i in data:
            if i['answer'][-1] != '.':
                if version == 'new':
                    print(typ, version, i['id'])
                    print(i['answer'], '\n')
                flag += 1
        
        d = {'answer_type': typ,
            'version': version,
            'cutoff responses #': flag
            }
        cutoff_stats.append(d)

llama new 146
If you are diagnosed with prostatitis, your treatment will depend on the underlying cause. 

For bacterial prostatitis, you will likely be prescribed antibiotics. The most commonly used antibiotics for bacterial prostatitis are fluoroquinolones, such as ciprofloxacin (Cipro) or levofloxacin (Levaquin), and sulfonamides, such as trimethoprim-sulfamethoxazole (Bactrim or Septra). The duration of treatment can range from 4-12 weeks.

If you have chronic bacterial prostatitis, you may need a longer course of antibiotics, often 3-4 months. In some cases, low-dose antibiotics may be prescribed for an extended period to help manage symptoms.

For non-bacterial prostatitis, also known as chronic pelvic pain syndrome (CPPS), treatment may focus on managing symptoms. Alpha-blockers, such as alfuzosin (Uroxatral) or tamsulosin (Flomax), may be prescribed to help relax the muscles in the prostate and bladder neck, making it easier to urinate. Pain relievers, such as acetaminophen (Ty