In [1]:
import openai
import pandas as pd
import pickle
import re
import logging


In [2]:
logging.basicConfig(filename='processing.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


In [3]:
def extract_number(answer):
    try:
        matches = re.findall(r'\d+', answer)
        return int(matches[0]) if matches else None
    except Exception as e:
        logging.error(f"Error extracting number from answer '{answer}': {e}")
        return None


In [4]:
def estimate_number(answer):
    # Placeholder estimation logic
    if "not" in answer or "never" in answer or "no" in answer:
        return 0
    elif "always" in answer or "completely" in answer or "entirely" in answer:
        return 100
    # You can expand this logic based on your specific needs
    # For now, we return a middle value if we cannot estimate
    return 101

In [5]:
def generate_concise_response_gpt3(answer, question, number):
    try:
        if number is not None:
            prompt = f"""Please provide a 2-3 word summary based on the number associated with the answer:

            Question: How would you rate your sleep depth last night?
            Answer: "I had a very deep and restful sleep."
            Number: 90
            Summary: Restful sleep

            Question: How quickly did you fall asleep?
            Answer: "It took me hours to fall asleep, I was tossing and turning."
            Number: 10
            Summary: Long to sleep

            Question: How often did you wake up during the night?
            Answer: "I woke up several times, and my sleep was very interrupted."
            Number: 30
            Summary: Often awake

            Now, for this answer:
            Question: {question}
            Answer: "{answer}"
            Number: {number}
            Summary:"""
        else:
    
            prompt = f"""Since no numerical value is provided, please give a 2-3 word qualitative summary of this answer:

            Question: Was the room too hot/cold?
            Answer: "The room was slightly chilly, but it was bearable."
            Summary: Slightly chilly

            Question: Was there too much light?
            Answer: "There was a streetlight outside my window, but it didn't bother me much."
            Summary: Minimal light

            Question: Would eye mask, ear plugs, white noise machine help with sleep?
            Answer: "I think an eye mask might help block out the light."
            Summary: Eye mask helpful

            Question: Would eye mask, ear plugs, white noise machine help with sleep?
            Answer: "I think an eye mask, ear plugs and white noise machine could help."
            Summary: Eye Mask, White Noise Machine, Ear Plugs
            
            Now, for this answer:
            Question: {question}
            Answer: "{answer}"
            Summary:"""

        openai.api_key = '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'  # Replace with your actual API key
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt=prompt,
            max_tokens=10  # Adjust as needed
        )
        summary = response['choices'][0]['text'].strip()
        return summary
    except Exception as e:
        logging.error(f"Error generating concise response for answer '{answer}' with question '{question}': {e}")
        return "Error in summary"


In [6]:
# Main processing function
def process_answers():
    try:
        # Load answers from the pickle file
        with open('Answers/answers.pkl', 'rb') as f:
            answers = pickle.load(f)

        # Load RCSQ questions from the CSV
        rcsq_questions_df = pd.read_csv('richards_campbell_options.csv')

        # Prepare the list for the DataFrame
        mapped_answers_list = []

        # Process each patient's answers
        for patient_id, patient_answers in answers.items():
            for idx, answer in enumerate(patient_answers):
                question_id = f"Q{idx + 1}"
                question_matches = rcsq_questions_df.loc[rcsq_questions_df['QuestionID'] == question_id, 'QuestionText']
                
                if not question_matches.empty:
                    question_text = question_matches.values[0]
                    number = extract_number(answer)
                    if number is None:
                        number = estimate_number(answer)
                        logging.info(f"Estimated number for answer '{answer}' as {number}")
                    concise_response = generate_concise_response_gpt3(answer, question_text, number)
                    mapped_answers_list.append([patient_id, question_id, question_text, answer, number, concise_response])
                else:
                    logging.warning(f"No match found for question ID: {question_id}")

        # Create a DataFrame for the CSV export
        df = pd.DataFrame(mapped_answers_list, columns=['Patient ID', 'Question ID', 'Question Text', 'Answer', 'Number', 'Structured Response'])

        # Export to CSV
        df.to_csv('Answers_final.csv', index=False)
        logging.info("CSV file created successfully.")

    except Exception as e:
        logging.exception("An exception occurred during processing: ")
        raise

In [7]:
# Run the main function
if __name__ == "__main__":
    process_answers()