In [None]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()  # Loads variables from .env into environment

api_key = os.getenv("API_KEY")


client = OpenAI(
    api_key=api_key,  # This is the default and can be omitted
)

summary_folder = "summaries"
transcript_folder_path = "est_asr_transcripts"
prompt_folder = "prompts"
clean_folder = "parandatud"

In [None]:
def get_summary_instructions(instruction_prompt, transcript):
    summary_response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": f"{instruction_prompt}"},
            {"role": "user", "content": f"Here is the transcript:\n{transcript}\n\nPlease provide a summary."}
        ],
        temperature=0.0000001,
        top_p=0.0000001,
        seed=1234
    )

    return summary_response.choices[0].message.content

def read_prompt(folder_path, prompt_number):
    file_name = f"prompt{prompt_number}.txt"
    file_path = os.path.join(folder_path, file_name)

    if os.path.exists(file_path):
        with open(file_path, 'r', encoding='utf-8') as file:
            prompt = file.read().strip()
            return prompt.replace("\n", "")  # Remove all new lines
    else:
        raise FileNotFoundError(f"Error: {file_name} not found in {folder_path}")
    

def read_single_transcript(folder_path, doctor, patient):
    # Format the number to ensure it is two digits (e.g., 01, 02, ..., 10)
    doctor_str = f"{doctor:02}"
    patient_str = f"{patient:02}"
    file_name = f"arsti_salvestus_orig_{doctor_str}_{patient_str}-est-asr-transcript.txt"
    file_path = os.path.join(folder_path, file_name)

    # Check if the file exists
    if os.path.exists(file_path):
        with open(file_path, 'r', encoding='utf-8') as file:
            transcript_content = file.read().strip()
        return transcript_content
    else:
        return f"Error: {file_name} not found in {folder_path}"
    

def save_summary(summary, folder_path, doctor, patient, prompt_number, letter):
    doctor_str = f"{doctor:02}"
    patient_str = f"{patient:02}"
    #output_file_name = f"arst_{doctor_str}_patsient_{patient_str}_kokkuvõte_prompt_{prompt_number}_{order}.txt"
    output_file_name = f"arst_{doctor_str}_patsient_{patient_str}_kokkuvõte_prompt_{prompt_number}_{letter}.txt"
    output_file_path = os.path.join(folder_path, output_file_name)

    with open(output_file_path, 'w', encoding='utf-8') as file:
        file.write(summary)

    print(f"Summary saved to {output_file_path}")


def create_directory(path, folder_name):
    """
    Creates a directory with the given folder_name in the specified path.

    Parameters:
    path (str): The base path where the directory should be created (can be relative).
    folder_name (str): The name of the directory to create.
    """
    # Construct the full directory path
    full_path = os.path.join(path, folder_name)
    # Create the directory
    try:
        os.makedirs(full_path, exist_ok=True)
        print(f"Directory '{full_path}' created successfully.")
    except PermissionError:
        print(f"Permission denied: Unable to create '{full_path}'.")
    except Exception as e:
        print(f"An error occurred: {e}")


def get_original_doctor_summary(doctor, patient):
    filepath = f"Arst_{doctor:03}/Patsient_{patient:03}/toorfailid/arsti_kokkuvote_orig_{doctor:02}_{patient:02}.txt"
    
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"File not found: {filepath}")
    
    with open(filepath, "r", encoding="utf-8") as file:
        return file.read()        


# Cross-validation prompt

In [None]:
def make_prompt(prompt, examples, prompt_folder="prompts"):
    instruction_prompt = read_prompt(prompt_folder, prompt)
    for i, example in enumerate(examples, 1):
        instruction_prompt += f"\nExample {i}: {example}"
    return instruction_prompt

In [None]:
letters = ['a','b','c','d','e','f','g','h','i','j']
for doctor_index in range(1,11):
    prompt_index = 4
    examples = list()
    for patient_index in range(1,11):
        examples.append(get_original_doctor_summary(doctor_index, patient_index))
    for patient_index in range(1,11):
        print(f"Processing doctor: {doctor_index}, patient {patient_index}.")
        excluded_example = examples[patient_index-1]
        remaining_examples = examples[:patient_index-1] + examples[patient_index:]
        prompt_text = make_prompt(prompt_index, remaining_examples)
        for letter in letters:
            new_folder_path = f"arst_{doctor_index}_patsient_{patient_index}/kokkuvõtted/prompt_{prompt_index}"
            create_directory(summary_folder, new_folder_path)
            save_path = f"{summary_folder}/{new_folder_path}"
            transcript = read_single_transcript(transcript_folder_path, doctor_index, patient_index)
            summary = get_summary_instructions(prompt_text, transcript)
            save_summary(summary, save_path, doctor_index, patient_index, prompt_index,letter)

## Generating with other prompts

In [None]:
letters = ['a','b','c','d','e','f','g','h','i','j']
for doctor_index in range(1,11):
    for patient_index in range(1,11):
        for prompt_index in range(1,6):
            if prompt_index == 4:
                continue
            print(f"Processing doctor: {doctor_index}, patient: {patient_index}, prompt:{prompt_index}.")
            for letter in letters:
                new_folder_path = f"arst_{doctor_index}_patsient_{patient_index}/kokkuvõtted/prompt_{prompt_index}"
                create_directory(summary_folder, new_folder_path)
                save_path = f"{summary_folder}/{new_folder_path}"

                #read both the transcript and the prompt
                transcript = read_single_transcript(transcript_folder_path, doctor_index, patient_index)
                instruction_prompt = read_prompt(prompt_folder, prompt_index)

                
                summary = get_summary_instructions(instruction_prompt, transcript)
                save_summary(summary, save_path, doctor_index, patient_index, prompt_index, letter)
            

## Generate summaries with the clean transcript

### Other Prompts

In [None]:
for doctor_index in range(1,11):
    for patient_index in range(1,11):
        print(f"Processing doctor: {doctor_index}, patient {patient_index}.")
        directory = f"Arst_{doctor_index:03}/Patsient_{patient_index:03}"
        path = f"{directory}/{clean_folder}"
        if os.path.exists(path):
            for filename in os.listdir(path):
                if "parandatud".lower() in filename.lower() and filename.lower().endswith(".txt"):
                    file_path = os.path.join(path, filename)
                    try:
                        with open(file_path, 'r', encoding='utf-8') as file:
                            transcript = file.read()
                            for prompt_index in range(1,6):
                                if prompt_index == 4:
                                    continue
                                new_folder_path = f"arst_{doctor_index}_patsient_{patient_index}/kokkuvõtted/prompt_{prompt_index}"
                                create_directory(summary_folder, new_folder_path)
                                save_path = f"{summary_folder}/{new_folder_path}"
                                
                                instruction_prompt = read_prompt(prompt_folder, prompt_index)
                                summary = get_summary_instructions(instruction_prompt, transcript)
                                save_summary(summary, save_path, doctor_index, patient_index, prompt_index, "puhas")
                    except Exception as e:
                        print(f"Error reading file '{filename}': {e}")
        
        else:
            print(f"Could not find path: {path}")
            continue

### Cross-validation prompt

In [None]:
for doctor_index in range(5,7):
    examples = list()
    for patient_index in range(1,11):
        examples.append(get_original_doctor_summary(doctor_index, patient_index))
    for patient_index in range(10,11):
        print(f"Processing doctor: {doctor_index}, patient {patient_index}.")
        directory = f"Arst_{doctor_index:03}/Patsient_{patient_index:03}"
        path = f"{directory}/{clean_folder}"
        
        prompt_index = 4
        excluded_example = examples[patient_index-1]
        remaining_examples = examples[:patient_index-1] + examples[patient_index:]
        prompt_text = make_prompt(prompt_index, remaining_examples)

        if os.path.exists(path):
            for filename in os.listdir(path):
                if "parandatud".lower() in filename.lower() and filename.lower().endswith(".txt"):
                    file_path = os.path.join(path, filename)
                    try:
                        with open(file_path, 'r', encoding='utf-8') as file:
                            transcript = file.read()
                            new_folder_path = f"arst_{doctor_index}_patsient_{patient_index}/kokkuvõtted/prompt_{prompt_index}"
                            create_directory(summary_folder, new_folder_path)
                            save_path = f"{summary_folder}/{new_folder_path}"
                            summary = get_summary_instructions(prompt_text, transcript)
                            save_summary(summary, save_path, doctor_index, patient_index, prompt_index, "puhas")
                    except Exception as e:
                        print(f"Error reading file '{filename}': {e}")
        
        else:
            print(f"Could not find path: {path}")
            continue