In [1]:
import os
import random
import json
import pandas as pd
import numpy as np
import spacy
from sklearn.feature_extraction.text import CountVectorizer




In [2]:
dir = os.getcwd()
data_dir = os.path.join(dir, 'data')
os.makedirs(data_dir, exist_ok=True)
output_dir = os.path.join(dir, 'output')
os.makedirs(output_dir, exist_ok=True)

In [11]:
with open(os.path.join(output_dir, "kqa_answers_gpt4_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    gpt4 = [json.loads(line) for line in jsonl_file]

In [13]:
with open(os.path.join(output_dir, "kqa_answers_gpt4_five_1.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    gpt4_old = [json.loads(line) for line in jsonl_file]

In [4]:
with open(os.path.join(output_dir, "kqa_answers_llama_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    llama = [json.loads(line) for line in jsonl_file]

In [5]:
with open(os.path.join(output_dir, "kqa_answers_llama_five_1.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    llama_old = [json.loads(line) for line in jsonl_file]

In [None]:
for i in random.sample(llama, 5):
    print(i)
    print("__________________________")

# Number of words of GPT-4 and Llama answers

In [None]:
stats = []

for typ in ['gpt4', 'llama', 'physician']:

    key = typ
    if typ == 'physician':
        key = 'gpt4'
    with open(os.path.join(output_dir, f"kqa_answers_{key}_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
        data = [json.loads(line) for line in jsonl_file]
    
    # Load the spaCy English model
    nlp = spacy.load("en_core_web_sm")
    # Process the text with spaCy to segment into sentences
    n_words = []
    for d in data:
        if typ == 'physician':
            doc = nlp(d['Free_form_answer'])
        else:
            doc = nlp(d['answer'])
        n_words.append(len([token for token in doc]))
        if len([token for token in doc]) < 10:
            print(d)
    
    stats.append({'Type': typ,
                  'Mean # Tokens': np.mean(n_words),
                  'Q1 # Tokens': np.percentile(n_words, 25),
                  'Q2 # Tokens': np.percentile(n_words, 75),
                  'Max # Tokens': np.max(n_words),
                  'Min # Tokens': np.min(n_words),})

pd.DataFrame(stats)

# Number of words and sentences in GPT-4 and Llama answers

In [None]:
stats = []

for typ in ['gpt4', 'llama', 'physician']:

    key = typ
    if typ == 'physician':
        key = 'gpt4'
    with open(os.path.join(output_dir, f"kqa_answers_{key}_five.jsonl"), 'r', encoding='utf-8') as jsonl_file:
        data = [json.loads(line) for line in jsonl_file]
    
    # Load the spaCy English model
    nlp = spacy.load("en_core_web_sm")
    # Process the text with spaCy to segment into sentences
    n_sentences = []
    for d in data:
        if typ == 'physician':
            doc = nlp(d['Free_form_answer'])
        else:
            doc = nlp(d['answer'])
        n_sentences.append(len([sentence.text for sentence in doc.sents]))
    
    stats.append({'Type': typ,
                  'Mean # Sentences': np.mean(n_sentences),
                  'Q1 # Sentences': np.percentile(n_sentences, 25),
                  'Q2 # Sentences': np.percentile(n_sentences, 75),
                  'Max # Sentences': np.max(n_sentences),
                  'Min # Sentences': np.min(n_sentences),})

pd.DataFrame(stats)

# Number of cutoff responses

In [7]:
cutoff_stats = []
for typ in ['gpt4', 'llama']:
    
    for version in ['old', 'new']:
        if version == 'old':
            path = os.path.join(output_dir, f"kqa_answers_{typ}_five_1.jsonl")
        if version == 'new':
            path = os.path.join(output_dir, f"kqa_answers_{typ}_five.jsonl")
    
        with open(path, 'r', encoding='utf-8') as jsonl_file:
            data = [json.loads(line) for line in jsonl_file]
        
        flag = 0
        for i in data:
            if i['answer'][-1] != '.':
                if version == 'new':
                    print(typ, version, i['id'])
                    print(i['answer'], '\n')
                flag += 1
        
        d = {'answer_type': typ,
            'version': version,
            'cutoff responses #': flag
            }
        cutoff_stats.append(d)
    

gpt4 new 59
To measure blood pressure, you will need a blood pressure cuff (sphygmomanometer) and a stethoscope. Here are the steps to correctly measure blood pressure:

1. **Prepare the Patient**: The patient should relax, sitting comfortably with their back supported, legs uncrossed, and not having smoked, exercised, or consumed caffeine within 30 minutes prior to measurement.

2. **Position the Arm**: Arm should be supported on a flat surface at heart level. The upper arm should be bare, with clothing removed or sleeves raised.

3. **Place the Cuff**: Wrap the cuff around the upper arm, making sure it’s snug but not too tight. The lower edge of the cuff should be about an inch above the bend of the elbow.

4. **Position the Stethoscope**: Place the stethoscope’s earpieces in your ears and the diaphragm on the inside of the elbow crease directly over the brachial artery.

5. **Inflate the Cuff**: Inflate the cuff by pumping the bulb until the gauge reads about 20-30 mmHg above usual 

In [4]:
pd.DataFrame(cutoff_stats)

Unnamed: 0,answer_type,version,cutoff responses #
0,gpt4,old,4
1,gpt4,new,1
2,llama,old,6
3,llama,new,6
