# Combined textual features
### This should combine the finetuned DeproBERTa probabilities (3) and the answers to the llm-driven questions (11)

In [1]:
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
import os

In [2]:

os.environ["GOOGLE_API_KEY"] = "AIzaSyCAQ7iYFHHlLi9pqKku_j_elEp9hOVO5Ng" 
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)


In [14]:
def build_interview_prompt(interview_text):
    questions = [
        "Have you felt emotionally and physically well lately?",
        "Have you experienced a noticeable change in your overall mood or feelings recently?",
        "Have you had trouble sleeping or experienced changes in your sleep patterns?",
        "Have you experienced a loss of interest or pleasure in usual activities?",
        "Have you felt tired or had little energy?",
        "Have you had difficulty concentrating or making decisions?",
        "Have you experienced feelings of worthlessness or excessive guilt?",
        "Have you had thoughts of death or suicide, or have you made any suicide attempts?",
        "Have you noticed significant changes in appetite or weight?",
        "Have you been diagnosed with depression or received mental health treatment in the past?",
        "Have you been withdrawing from social interaction or isolating yourself from others?"
    ]

    question_block = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])

    prompt = f"""You are analyzing an interview transcript to extract depression-related indicators. Your task is to answer each question below based ONLY on what the interviewee explicitly states or clearly implies in their responses.

RESPONSE FORMAT: Answer each question with exactly one of these four options:
- YES: Clear evidence the person experiences this symptom/condition
- NO: Clear evidence the person does NOT experience this symptom/condition  
- TO_SOME_EXTENT: Partial or mild symptoms are mentioned
- NOT_MENTIONED: No relevant information found in the transcript

ANALYSIS GUIDELINES:
- Base answers strictly on the interviewee's own words and descriptions
- Look for both direct statements and clear behavioral indicators
- Consider the timeframe mentioned (recent vs. past experiences)
- If multiple conflicting statements exist, prioritize the most recent or predominant pattern
- Do not infer beyond what is reasonably supported by the text
- Pay attention to severity and frequency when distinguishing between YES and TO_SOME_EXTENT

Interview Transcript:
\"\"\"
{interview_text}
\"\"\"

Questions to Answer:
{question_block}

ANSWER FORMAT:
1. [YOUR_ANSWER]
2. [YOUR_ANSWER]
...
11. [YOUR_ANSWER]

Remember: Use only YES, NO, TO_SOME_EXTENT, or NOT_MENTIONED for each response. No explanations or additional comments are needed. Just provide the answers in the specified format."""

    return prompt


In [15]:
import pickle

with open("result_base_600.pkl", "rb") as f:
    result_data = pickle.load(f)
    
print(result_data)

{'text': " Let's check everything находится. Zoom in here, virtual viewing, parties up here for just a second. And then I'll go ahead and seek we're back up every minute. So there she is. So I'm going to go ahead and shrink her back down while I can continue setting some things up. Okay. Okay. Okay. Okay. Okay. Okay. Okay. Hi. I'm Ellie. I'm not a therapist, but I'm very learned about people and I'm loved to learn about you. I'll ask you some questions to get started. And please feel free to tell me anything. Your answers are totally confidential. Okay. Are you okay with this? Yes. Okay. I'm fine. Let's go. Yes. Yes. The weather. What are the things you know where they're like about the weather? The crime. I don't know what that means. I don't know what that means. Sometimes when I'm feeling tense, I turn on the fist and scream saber. Hey, I know it's not polite, but it's the best I've got. What do you do to relax? Rhyp poetry. What are some things that make you really not? What are so

In [19]:
prompt = build_interview_prompt(result_data)
results = llm.invoke(prompt)
for line in results.content.split('\n'):
    print(line)

1. YES
2. YES
3. TO_SOME_EXTENT
4. TO_SOME_EXTENT
5. NOT_MENTIONED
6. NOT_MENTIONED
7. NOT_MENTIONED
8. TO_SOME_EXTENT
9. NOT_MENTIONED
10. TO_SOME_EXTENT
11. NO


In [20]:
# Encode answers to numerical values
# Define a mapping from string responses to numerical values
response_mapping = {
    "YES": 1,
    "TO_SOME_EXTENT": 0.5,
    "NO": 0,
    "NOT_MENTIONED": None
}

In [21]:
# encode the responses
encoded_answers = []
for line in results.content.split('\n'):
    if line.strip():  # Check if the line is not empty
        answer = line.split('.')[1].strip()  # Get the answer part after the number
        encoded_value = response_mapping.get(answer, None)  # Map to numerical value
        encoded_answers.append(encoded_value)

print("Encoded Answers:", encoded_answers)

Encoded Answers: [1, 1, 0.5, 0.5, None, None, None, 0.5, None, 0.5, 0]
