In [2]:
import pandas as pd
import openai
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel
from sklearn.utils.class_weight import compute_class_weight
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import torch.optim as optim
from transformers import BertForSequenceClassification
import torch.nn.functional as F
import random

In [16]:
base_path = os.path.expanduser("~/Desktop/support_data/")  

# 파일 불러오기
result = pd.read_csv(os.path.join(base_path, "dataForCoding.csv"), encoding='latin1')
result.info()

In [18]:
openai.api_key = sk-xxx-new

In [None]:
inf_prompts = {
    "Post_Advice Requests": (
        "From the given texts, identify whether the writer explicitly asks for advice, recommendations, or practical solutions to a specific issue. "
        "Please answer 'YES' if such sentences or paragraphs are found, or 'NO' if they are not."
    ),
    "Link or Reference": (
        "From the given texts, extract all sentences or paragraphs that include links, references, or recommendations for external resources. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "Solution": (
        "From the given texts, extract and return all sentences or paragraphs where the author provides advice, recommendations, or suggestions to the reader. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "Experience": (
        "From the given texts, identify and return all sentences or paragraphs where the writer shares personal experiences or the experiences of others. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
     "exp_only": (
        "From the given texts, identify and return all sentences or paragraphs that describe a past personal experience "
        "using simple past tense verbs and do not include any form of advice, suggestions, recommendations, or instructions. "
        "Please exclude any sentences that contain modal verbs like 'should', 'could', 'might', or imperative forms. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "feel_only": (
        "From the given texts, identify and return all sentences or paragraphs that express the speaker’s internal thoughts "
        "or emotional states without including any directives, commands, or suggestions for actions. "
        "Avoid any content that implies an action to be taken or advice being given. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "weak_advice": (
        "From the given texts, identify and return all sentences or paragraphs that appear to offer advice or suggestions, "
        "but in an indirect, tentative, or non-committal manner. This includes questions, conditional statements, or advice "
        "that lacks specificity or clear guidance. These sentences may sound helpful but do not provide concrete or actionable information. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "general_advice": (
        "From the given texts, extract and return all sentences or paragraphs where the speaker gives advice, "
        "recommendations, or suggestions that are general or non-specific in nature, rather than directly addressing the writer's unique situation. "
        "These may include broad life advice or statements applicable to anyone. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "direct_solution": (
        "From the given texts, identify and return all sentences or paragraphs that clearly provide practical solutions or "
        "step-by-step instructions to resolve a problem or situation. This includes direct recommendations, explicit procedural steps, "
        "or actionable advice aimed at helping the reader take specific action. Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "concrete_reference": (
        "From the given texts, identify and return all sentences or paragraphs that include concrete references to external resources, "
        "such as named websites, books, medical sources, or research-backed information. These references should support the advice or information "
        "provided and enhance the credibility or specificity of the support. Please answer 'NO' if no such sentences or paragraphs are found."
    )
}

In [21]:
emt_prompts = {
    "Empathy": (
        "From the given texts, identify and return all sentences or paragraphs where the author expresses empathy "
        "or understanding toward the reader's feelings or situation. Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "Encouragement": (
        "From the given texts, extract and return all sentences or paragraphs where the author provides encouragement or support "
        "to the reader. Please answer 'NO' if no such sentences or paragraphs are found."
    ),

    "Contact_Request": (
        "From the given texts, identify and return all sentences or paragraphs where the writer explicitly requests to be contacted "
        "or provides a way to initiate contact. Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "false_emt": (
        "From the given texts, identify and return all sentences or paragraphs that contain language which may initially "
        "appear empathetic or emotionally supportive, but in context, do not actually provide comfort, encouragement, or "
        "emotional support to the recipient. These may include expressions of the speaker’s own emotional state, observations, "
        "or personal reflections that are not directed at supporting the other person. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "detached_comment": (
        "From the given texts, extract and return all sentences or paragraphs that are emotionally neutral or detached, where "
        "the speaker provides information, observation, or general statements without directly acknowledging, validating, or "
        "emotionally engaging with the writer’s situation. These include responses that are impersonal or distanced in tone, "
        "even if they seem polite or factual. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "ambig_emt": (
        "From the given texts, identify and return all sentences or paragraphs that include emotional or reflective language "
        "but do not clearly offer emotional support, encouragement, problem-solving advice, or personal experience sharing. "
        "These may include vague expressions of empathy, indirect emotional comments, or ambiguous feelings that lack clear "
        "supportive intent. Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "deep_empathy": (
        "From the given texts, extract and return all sentences or paragraphs that express deep empathy, emotional presence, or "
        "strong emotional support toward the recipient. These should include expressions that directly acknowledge the recipient's "
        "feelings and offer meaningful comfort, such as “I’m so sorry you’re experiencing this” or “You are not alone and I care "
        "about you.” "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
    "active_support": (
        "From the given texts, extract all sentences or paragraphs that show active emotional support, such as offering to help, "
        "showing emotional engagement, or explicitly encouraging the recipient in a personal and meaningful way. These should go "
        "beyond polite or general statements, and instead demonstrate personal concern or emotional commitment. "
        "Please answer 'NO' if no such sentences or paragraphs are found."
    ),
     # Sentiment Score
    "sentiment": (
        "From the given texts, score the writer’s sentiment on a Likert scale ranging from -2 to +2, where -2 represents strong negative sentiment, "
        "-1 represents slightly negative sentiment, 0 represents neutral sentiment, +1 represents slightly positive sentiment, and +2 represents very positive sentiment. "
        "Provide the score with the rationales of scoring. Please answer “NO” when there is no obvious evidence of sentiment in the given texts."
    )
}

In [6]:
 #API 호출 함수
def get_response(text, prompt):
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": text}
        ]
    )
    return response.choices[0].message.content.strip()

In [None]:
# 결과 저장용 딕셔너리 초기화
results = {
    "pid": [],
    "cid": [],
    "post": [],
    "comment": [],
    "inf": [],
    "emt": [],
    **{key: [] for key in inf_prompts.keys()}
}

for index, row in result.iterrows():
    pid = row['pid']
    cid = row['cid']
    post = row['post']
    comment = row['comment']
    inf = row['inf']
    emt = row['emt']

    results["pid"].append(pid)
    results["cid"].append(cid)
    results["post"].append(post)
    results["comment"].append(comment)
    results["inf"].append(inf)
    results["emt"].append(emt)

    for key, prompt in inf_prompts.items():
        if key == "Post_Advice Requests":
            text_input = post
        else:
            text_input = comment

        answer = get_response(text_input, prompt)
        results[key].append(answer)

# 결과 DataFrame 생성
result_df = pd.DataFrame(results)
print(result_df.head())

In [22]:
# emt 결과 저장용 임시 딕셔너리
emt_results = {
    **{key: [] for key in emt_prompts.keys() if key != "sentiment"},
    "post_sentiment": [],
    "comment_sentiment": []
}

for _, row in result_df.iterrows():
    post = row["post"]
    comment = row["comment"]

    # 일반 emt 프롬프트
    for key, prompt in emt_prompts.items():
        if key == "sentiment":
            continue
        answer = get_response(comment, prompt)
        emt_results[key].append(answer)

    # sentiment 프롬프트 (post / comment 각각 따로 처리)
    post_sent = get_response(post, emt_prompts["sentiment"])
    comment_sent = get_response(comment, emt_prompts["sentiment"])

    emt_results["post_sentiment"].append(post_sent)
    emt_results["comment_sentiment"].append(comment_sent)

# result_df에 병합
for key in emt_results:
    result_df[key] = emt_results[key]

# 확인
print(result_df.head())

In [None]:
import re

def extract_sentiment_score(text):
    text = str(text) 
    match = re.search(r'[-+]?\d+', text)
    if match:
        return int(match.group())
    if not isinstance(text, str):
        return 0
    if "NO" in text.upper():
        return 0
    return 0

result_df["post_sentiment"] = result_df["post_sentiment"].apply(extract_sentiment_score)
result_df["comment_sentiment"] = result_df["comment_sentiment"].apply(extract_sentiment_score)

In [19]:
result_df.to_csv("final_data.csv")