In [1]:
import os, sys
import random
import time

from openai import OpenAI
from pathlib import Path
import glob
import json

import pandas as pd
import numpy as np
import collections
from collections import defaultdict


In [2]:
# Import Langchain and Ollama
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate

model1 = OllamaLLM(base_url="localhost:11435", 
                  model="llama3.1:70b-instruct-q4_0", 
                  temperature=0.5, 
                  num_ctx = 6144,
                  top_k = 40,
                  top_p = 0.95)

In [3]:
def get_llama_response(messages_list):
    '''obtain response from llama'''

    response = model1.invoke(messages_list)

    return response

In [4]:
def get_coverage(coverage_prompt):
    
    # Call the local LLaMA model instead of OpenAI's API
    content = get_llama_response(coverage_prompt)
    
    try:
        specificity_val = int(content.split("### Score")[-1][:4].strip())
    except ValueError:
        specificity_val = np.NaN
    
    return content, specificity_val

In [5]:
import sys, os, csv, ast, random, time, re, json
import pandas as pd
import re
from pathlib import Path

patients_path = Path(r"C:\Users\aleynaw\Desktop\transcript_generation-main\patient_creation\llm_patients.csv")
patients_df = pd.read_csv(patients_path,delimiter="|")
# print(patients_df)

parent_directory = Path.cwd().resolve().parent.parent.parent.parent
print(f"Parent Directory: {parent_directory}")

sys.path.append(str(parent_directory))

try:
    sys.path.append('../../transcript_generation')
    from transcript_generation import helper_fns as helper
except ModuleNotFoundError:
    import helper_fns as helper

    
patients_list = []
        
for i, patient in patients_df.iterrows():
    
    if "Edge Case Scenario" in patient:  # if there is an edge case
        edge_case = patient["Edge Case Scenario"]
        ignore_keys = ["Clinician Name", "Appointment Date", 'Conversational Tone', 'Reason for Appointment']
    else:  # if there is no edge case
        edge_case = ""
        ignore_keys = ["Clinician Name", "Appointment Date", 'Conversational Tone', "Edge Case Scenario", 'Reason for Appointment']

    # print(patient)
    patient_dict = patient.dropna().to_dict()
    patient=patient_dict
    patient_string = helper.patient_to_str(patient, ignore_keys)
    
    patients_list.append(patient_string)
    
print(len(patients_list))
# print(patients_list[0])

Parent Directory: C:\Users\aleynaw\Desktop\transcript_generation-main
15


In [6]:
def read_txt_files(folder_path):
    text_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]  # Get all .txt files
    text_contents = []

    for file in text_files:
        file_path = os.path.join(folder_path, file)
        with open(file_path, 'r', encoding='utf-8') as f:
            text_contents.append(f.read())  # Read and store the content as a string

    return text_contents

In [7]:
# Define a function to load messages by role from JSON
def load_filtered_messages(filepath, role):
    with open(filepath, "r") as f:
        conversation = json.load(f)
    
    # Debugging: Print structure of conversation
    print(f"Loaded conversation from {filepath}:")
    
    if isinstance(conversation, list) and all(isinstance(msg, dict) for msg in conversation):
        return [msg["content"] for msg in conversation if msg.get("role") == role]
    else:
        raise ValueError(f"Unexpected format in {filepath}: conversation should be a list of dictionaries.")

In [8]:
def replace_all(text, dic):
    for i, j in dic.items():
        text = text.replace(i, str(j))
    return text

In [9]:
interview_path = r"C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\DM"
interviews = read_txt_files(interview_path)

summary_path = r"C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\Summaries"
summaries = read_txt_files(summary_path)
    
questions_path = r"C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\prompts\questions_test\Analysis\questionbank_chunked.txt"
questions = Path(questions_path).read_text()

filepath = interview_path
# Check the files in the `filepath` directory
print(f"Checking files in {filepath}...")
json_files = list(Path(filepath).glob("*.json"))
print(f"Found {len(json_files)} JSON files.")

for index, json_file in enumerate(json_files):
    start_time = time.time()  # Start timing the evaluation for this file
    try:
        # Load messages from the JSON file
        questions_asked = load_filtered_messages(json_file, "assistant")
        patient_responses = load_filtered_messages(json_file, "user")
        
    except Exception as e:
        print(f"Error processing {json_file.name}: {e}")

# Score for summary coverage
coverage_prompt_patInfo_SummaryEval = '''
    Score the following typed interview summary with respect to coverage of the Patient Information given the questions asked during the interview on a continuous scale from 0 to 100, where a score of zero means "no coverage" and a score of one hundred means "perfect coverage". 

    Coverage measures whether or not characteristic content from the Patient Information that was asked about in the questions is addressed in the Interview Summary, but **do not penalize for missing information unless a related question was explicitly asked**. If the patient was not asked a question about a specific characteristic in the Patient Information, you MUST ignore that characteristic in the scoring process. 

    For example:
    - If "Previous Hospitalization" is marked as "no" in Patient Information but a detailed history is reported in the Interview Summary, this should be treated as no coverage.
    - If "Allergies" contains two allergens in Patient Information but only one is reported in the Interview Summary, this should be treated as incomplete coverage, **but only if a question was asked about allergies**.
    - If "Medication History" is missing in the Interview Summary, but no relevant question was asked, do not penalize for missing information.

    Score this based only on characteristics covered in the Interview Summary that are relevant to the questions asked. Ignore Reason for Appointment, Typing Style, Personality Traits, and Conversational Tone.

    Reason through your thought process and output your score at the VERY END after a "### Score" header. (e.g., ### Score\nXX) This should be the last output.

    Patient Information:
    {patient_info}

    Interview Summary: 
    {summary}

    Questions Asked:
    {questions_asked}
    '''

# Score for Patient Info coverage in Patient Responses
coverage_prompt_patInfo_patMsgs = '''
    Score the following interview with respect to coverage of the Patient Information based on questions asked on a continuous scale from 0 to 100, where a score of zero means "no coverage" and a score of one hundred means "perfect coverage". 

    Coverage measures whether or not characteristic content from the Patient Information that was asked for is answered in the interview, but **do not penalize for missing information unless a related question was explicitly asked**. If the patient was not asked a question about a specific characteristic in the Patient Information, you MUST ignore that characteristic in the scoring process. 

    For example:
    - If "Previous Hospitalization" is marked as "no" in Patient Information but a detailed history is reported in the interview, this should be treated as no coverage.
    - If "Allergies" contains two allergens in Patient Information but only one is reported in the interview, this should be treated as incomplete coverage, **but only if a question was asked about allergies**.
    - If "Medication History" is missing in the interview, but no relevant question was asked, do not penalize for missing information.

    Score this based only on characteristics covered in the interview that are relevant to the questions asked. Ignore Reason for Appointment, Typing Style, Personality Traits, and Conversational Tone.

    Reason through your thought process and output your score at the VERY END after a "### Score" header. (e.g., ### Score\nXX) This should be the last output.

    Patient Information:
    {patient_info}

    interview: 
    {interview}

    '''
# Score for Question coverage in Assistant's asked questions
coverage_prompt_asstInt_Questions = '''
    Score the following interview with respect to coverage of the Question Bank on a continuous scale from 0 to 100, where a score of zero means "no coverage" and a score of one hundred means "perfect coverage". 

    Note that coverage measures whether or not each question in the Question Bank is asked in the Interview. For every question in the Question Bank, label it as "Asked" or "Not asked" based on the Interview Messages. State message number to provide evidence for that label (e.g., "- birth complications - Asked (8)"). Use these labels to assign a score.
    Do not penalize for not asking a follow-up question if the response to the original question makes it not applicable. For example, if they patient answered "no" to having siblings, do not penalize for the interviewer not asking details about said siblings, as it is no longer applicable.

    Output your score at the VERY END after a "### Score" header. (e.g., ### Score\nXX)

    Question Bank:
    {questions}

    Interview: 
    {interview}
    '''


Checking files in C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\DM...
Found 15 JSON files.
Loaded conversation from C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\DM\DM_20241112-142903_Interview.json:
Loaded conversation from C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\DM\DM_20241112-142903_Interview.json:
Loaded conversation from C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\DM\DM_20241112-144659_Interview.json:
Loaded conversation from C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\DM\DM_20241112-144659_Interview.json:
Loaded conversation from C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\DM\DM_20241112-150652_Interview.json:
Loaded conversation from C:\Users\aleynaw\Desktop\transcript_generatio

In [10]:
out_dir = r"C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\Metrics"

# Define output CSV file path
# date_str = datetime.now().strftime("%Y-%m-%d")
coverage_csv_path = Path(out_dir, f"coverage_scores.csv")

# Write headers for the CSV file
with open(coverage_csv_path, mode="w", newline="") as cov_csv:
    csv_writer = csv.writer(cov_csv)
    csv_writer.writerow(["Interview", "Summary Content", "Summary Score", "PatInfo Content", "PatInfo Score", "Question Content", "Question Score"])
    print(f"Headers written successfully to {coverage_csv_path}.")
    

for i, pat in enumerate(patients_list):
    # print(i)
    patient_info = patients_list[i]
    interview = interviews[i]
    summary = summaries[i]
    
    # Hydrate Prompts
    
    hydrate = { r"{patient_info}": patient_info, r"{summary}": summary, r"{questions_asked}": questions_asked, r"{interview}": interview, r"{questions}": questions}
    coverage_prompt_patInfo_SummaryEval = replace_all(coverage_prompt_patInfo_SummaryEval, hydrate)
    coverage_prompt_patInfo_patMsgs = replace_all(coverage_prompt_patInfo_patMsgs, hydrate)
    coverage_prompt_asstInt_Questions = replace_all(coverage_prompt_asstInt_Questions, hydrate)
    
    # Summary coverage
    SumContent, SumScore = get_coverage(coverage_prompt_patInfo_SummaryEval)
    print("Content: ", SumContent)
    print("Score: ", SumScore)
    # Patient Info coverage
    PatContent, PatScore = get_coverage(coverage_prompt_patInfo_patMsgs)
    print("Content: ", PatContent)
    print("Score: ", PatScore)
    # Question coverage
    QContent, QScore = get_coverage(coverage_prompt_asstInt_Questions)
    print("Content: ", QContent)
    print("Score: ", QScore)
    
    csv_writer.writerow([f"Interview {i+1}", SumContent, SumScore, PatContent, PatScore, QContent, QScore])

Headers written successfully to C:\Users\aleynaw\Desktop\transcript_generation-main\transcript_generation\transcripts\llama3.1\Metrics\coverage_scores.csv.
Content:  ### Score

I would score this interview summary an 82 out of 100 in terms of coverage of both the Patient Information and the questions asked during the interview.

The interview summary covers most of the important characteristics from the Patient Information, including full name, preferred name, date of birth, sex, handedness, current city/town of residence, marital status, employment status, disability assistance status, current doctors, allergies, current medication use, health supplements, frequency of substance use, health conditions/diagnoses, previous hospitalizations/surgeries, head injury history, seizure history, family history of psychiatric conditions, birthplace, citizenship status, childhood development, work history, relationship history, hobbies, and stress management techniques.

However, there are a few 

ValueError: I/O operation on closed file.