In [None]:
import openai
import sounddevice as sd
import numpy as np
import webrtcvad
import scipy.io.wavfile as wav
import os
from pydub import AudioSegment
from pydub.playback import play
import sounddevice as sd
import webrtcvad
import numpy as np
import soundfile as sf
import csv
import uuid
import pyaudio
import wave
import re


client = openai.OpenAI(api_key="xxxxxx")


def generate_unique_id():
    return str(uuid.uuid4())


def speech_to_text(audio_file_path):
    try:
        with open(audio_file_path, "rb") as audio_file:
            transcription_response = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
            )
            return transcription_response.text
    except Exception as e:
        print(f"Error in speech to text conversion: {e}")
        return None


def text_to_speech(question, speech_file_path):
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=question
    )
    response.stream_to_file(speech_file_path)

    # Play the MP3 file
    audio = AudioSegment.from_mp3(speech_file_path)
    play(audio)

    return speech_file_path


def record_response_with_vad(filename="output.wav", fs=16000, frame_duration=30, vad_timeout=3):
    chunk = int(fs * frame_duration / 1000)  # Chunk size to match the frame duration
    sample_format = pyaudio.paInt16  # 16 bits per sample
    channels = 1

    vad = webrtcvad.Vad()
    vad.set_mode(1)

    p = pyaudio.PyAudio()  # Create an interface to PortAudio

    # Open the microphone stream
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    frames_per_buffer=chunk,
                    input=True)

    frames = []  # Array to store frames
    silent_runs = 0

    print('Recording')

    while True:
        try:
            data = stream.read(chunk, exception_on_overflow=False)
            frames.append(data)

            # Check if the current frame has speech
            if vad.is_speech(data, fs):
                silent_runs = 0
            else:
                silent_runs += 1

            # Stop if silence duration exceeds threshold
            if silent_runs >= vad_timeout * (1000 // frame_duration):
                break
        except Exception as e:
            print(f"Error processing audio: {e}")
            break

    print('Finished recording')

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    p.terminate()

    # Save the recorded data as a WAV file
    wf = wave.open(filename, 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(p.get_sample_size(sample_format))
    wf.setframerate(fs)
    wf.writeframes(b''.join(frames))
    wf.close()

    return filename


def confirm_response(question, patient_response, interpreted_response, question_type, writer, run_id):
    confirmation_question = f"'{interpreted_response}'. Is this correct? Please answer with 'yes' or 'no'."
    text_to_speech(confirmation_question, "confirmation.mp3")
    confirmation_response_file = record_response_with_vad("confirmation_response.wav")
    confirmation_response = speech_to_text(confirmation_response_file).lower()
    os.remove("confirmation_response.wav")

    # List of affirmative and negative words
    affirmative_words = ["yes", "correct", "right", "exactly", "that's right", "absolutely", "sure"]
    negative_words = ["no", "incorrect", "wrong", "not really", "not exactly", "not quite"]

    # Check if the response contains any affirmative word
    if any(word in confirmation_response for word in affirmative_words):
        writer.writerow([run_id, question, patient_response, interpreted_response])  # Record the confirmed response
        return True, interpreted_response  # Return True and the interpreted response
    elif any(word in confirmation_response for word in negative_words):
        # If response is negative, ask for re-answer
        re_answer_question = "Please provide the correct value or elaborate on your response."
        text_to_speech(re_answer_question, "re_answer.mp3")
        re_answer_response_file = record_response_with_vad("re_answer_response.wav")
        re_answer_response = speech_to_text(re_answer_response_file).lower()
        os.remove("re_answer_response.wav")

        # Record the initial response and the fact that it was not confirmed
        writer.writerow([run_id, question, patient_response, interpreted_response + " (patient does not aggree this score)"])

        # Process the new response
        new_interpreted_response = process_response(question, re_answer_response, question_type)

        # Recursive call to handle re-answered question, this will record the re-answer
        return confirm_response(question, re_answer_response, new_interpreted_response, question_type, writer, run_id) 
    else:
        writer.writerow([run_id, question, patient_response, "Could not confirm response"])  # Record unconfirmed response
        return False, None  # Return False and None if response couldn't be confirmed

    return False, None  # Default return if no condition is met







def process_response(question, response, question_type):
    print(f"Processing response for question: {question}")
    # scoring details for Richards-Campbell Sleep Questionnaire
    if question_type == "richards_campbell":
        scoring_details = {
            "How would you rate the depth of your sleep last night, from deep sleep(100) to light sleep(0)?": "Sleep depth: 0 for very light sleep, 25 for light sleep, 50 for neutral, 75 for moderately deep, 100 for deep sleep",
            "How quickly did you fall asleep last night? Did you fall asleep almost immediately(100) or just never could fall asleep(0)?": "Sleep latency: 0 for never really fell asleep, 25 for took long, 50 for after a while, 75 for quite quickly, 100 for immediately",
            "How often did you wake up last night? Were you awake very little(100) or awake all night long(0)?": "Awakenings: 0 for almost all night, 25 for frequently, 50 for several times, 75 for once or twice, 100 for not at all",
            "When you woke up last night, how easily could you get back to sleep? Did you get back to sleep immediately(100) or couldn’t get back to sleep at all(0)?": "Returning to sleep: 0 for couldn’t get back to sleep, 25 for with difficulty, 50 for after some time, 75 for fairly quickly, 100 for immediately",
            "How would you describe your sleep quality last night? Was it a good night’s sleep(100) or a bad night’s sleep(0)?": "Sleep quality: 0 for very poor, 25 for poor, 50 for fair, 75 for good, 100 for excellent"
        }



        score_info = scoring_details.get(question, "No scoring info available")
        print(f"Scoring details: {score_info}")
        message_content = (
            f"Task: Score a single question from the Richards-Campbell Sleep Questionnaire (RCSQ) based on the patient's response.\n"
            f"Question from RCSQ: '{question}'\n"
            f"Patient's Response: '{response}'\n"
            f"Scoring Guidelines for This Question: {score_info}\n"
            f"Please analyze the patient's response to this specific question and provide the corresponding RCSQ score for this specific question."
        )
    else:
        # Specific categorization options for 'other' questions
        if question == "Was your room too hot or too cold last night?":
            options = "'Too hot', 'Too cold', 'Just right'"
        elif question == "Was there too much light in your room last night?":
            options = "'Too much light', 'Adequate light', 'Not enough light'"
        elif question == "Would an eye mask, ear plugs, or a white noise machine have helped you sleep better?":
            options = "'Eye mask would help.', 'Ear plugs would help.', 'White noise machine would help.', 'Patient just said Yes.', 'No aids needed.'"
        else:
            options = "Appropriate categories"

        message_content = (
            f"Task: Categorize the patient's response based on the question.\n"
            f"Question: '{question}'\n"
            f"Patient's Response: '{response}'\n"
            f"Options: {options}\n"
            f"Please analyze the patient's response and categorize it based on the given options. You must choose it from the given options, no other options are allowed."
        )
    completion = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an assistant helping to analyze and categorize sleep survey responses. Assuming you are speaking face to face with a patient. Do not repeat the question, be brief and clear, and do not add limitations or unnecessary details. "},
            {"role": "user", "content": message_content}
        ]
    )
    print(message_content)
    print(f"Completion: {completion.choices[0].message.content}")
    return completion.choices[0].message.content


def main():
    questions = [
        {"text": "How would you rate the depth of your sleep last night, from deep sleep(100) to light sleep(0)?", "type": "richards_campbell"},
        {"text": "How quickly did you fall asleep last night? Did you fall asleep almost immediately(100) or just never could fall asleep(0)?", "type": "richards_campbell"},
        {"text": "How often did you wake up last night? Were you awake very little(100) or awake all night long(0)?", "type": "richards_campbell"},
        {"text": "When you woke up last night, how easily could you get back to sleep? Did you get back to sleep immediately(100) or couldn’t get back to sleep at all(0)?", "type": "richards_campbell"},
        {"text": "How would you describe your sleep quality last night? Was it a good night’s sleep(100) or a bad night’s sleep(0)?", "type": "richards_campbell"}
        {"text": "Was your room too hot or too cold last night?", "type": "other"},
        {"text": "Was there too much light in your room last night?", "type": "other"},
        {"text": "Would an eye mask, ear plugs, or a white noise machine have helped you sleep better?", "type": "other"}
    ]
    run_id = generate_unique_id()
    csv_file_path = 'qa_pairs.csv'
    total_score = 0
    rc_question_count = 0

    # Check if CSV file exists and headers are needed
    write_headers = not os.path.isfile(csv_file_path)

    with open(csv_file_path, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file, delimiter='\t') 
        if write_headers:
            writer.writerow(['Run ID', 'Question', 'Patient Response', 'Answer'])

        for index, q in enumerate(questions):
            question, question_type = q["text"], q["type"]
            speech_file_path = f"{question_type}_{index}_question.mp3"
            text_to_speech(question, speech_file_path)
            response_file = record_response_with_vad()
            patient_response = speech_to_text(response_file)
            os.remove(response_file)
            categorized_response = process_response(question, patient_response, question_type)
            confirmed, interpreted_response = confirm_response(question, patient_response, categorized_response, question_type, writer, run_id)

            if question_type == "richards_campbell" and confirmed:
                match = re.search(r'\b(\d+)\b', interpreted_response)
                if match:
                    score = int(match.group(1))
                    total_score += score
                    rc_question_count += 1
    
    # Calculate the average score after processing all questions
    if rc_question_count > 0:
        average_score = total_score / rc_question_count
        print(f"Average RCSQ score: {average_score}")
    else:
        print("No confirmed Richards-Campbell questions.")

if __name__ == "__main__":
    main()

