In [2]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
import ast

load_dotenv()
API_KEY = os.getenv('API_KEY')

client = OpenAI(api_key=API_KEY)

df = pd.read_csv('result/analyze.csv')
# kw = [ast.literal_eval(items) for items in kw]
# kw = [item for sublist in kw for item in sublist]

findings = {}

for row in range(len(df)):
    r = df.iloc[row]
    findings[r['doi']] = ast.literal_eval(r['summaries'])

In [37]:
prompt = """Convert research statements about human-AI interaction into structured relationships using the format:
[cause, relationship, effect]

## Element Structure (Cause/Effect)
Format: "Subject|Feature"

### Subject Categories
- Human: Individual actors (e.g., human, human:student, human:clinician)
- AI: AI systems/components (e.g., ai:generative, ai:chatbot)
- CO: Concepts/Objects (e.g., co:project, co:justice, co:interaction)

### Feature Guidelines
- One word when possible
- Can be no nested (e.g. human) or 1-level nested (human:student)
- Specific over general terms

## Relationship Types
1. INCREASES/DECREASES
   - For direct impact on measurable attributes
   - Example: [ai|assistance, INCREASES, human|productivity]

2. INFLUENCES 
   - For complex/indirect effects on behavior/perception
   - Example: [ai|embodiment, INFLUENCES, human:#trust]

## Examples
Input: "Interactive Machine Learning interfaces enhance artists' creativity in writing by integrating user feedback"
Output: [ai:interactive|interface, INCREASES, human:artist|creativity:writing]

Input: "Engagement mechanisms improve users' positive perceptions of the robot"
Output: [ai|engagement, INCREASES, human:#robot]

## Rules
1. Focus on primary causal relationship
2. Use specific terms over general ones
   - ❌ "system features" -> ✅ "AI explanation interface"
3. Standardize nested subjects
   - One word when possible
   - No redundant terms (e.g., "generative AI" -> "ai:generative")
4. Remove numerical metrics
   - ❌ "AI performance by 27%" -> ✅ "ai|performance"

## Feature Special Cases
1. Perception (#)
   - Use for beliefs/perspectives/idea
   - Example: "perception of trust" -> "#trust"
2. Nested Features
   - Use least levels as possible
   - General cases including nouns with prepositions (e.g., "of", "in", "about")
   - Example: "reliance on AI" -> "reliance:ai", "misconception about AI" -> "misconception:ai"
"""

from pydantic import BaseModel
class Triplet(BaseModel):
   cause: str
   relationship: str
   effect: str

def get_summary(abstract):
    chat_completion = client.beta.chat.completions.parse(
        messages=[
            {
                "role": "user",
                "content": prompt + " The following is the statement: " + abstract,
            },
        ],
        model="gpt-4o",
        response_format=Triplet,
        temperature=0.5
    )
    
    return chat_completion.choices[0].message.parsed

In [38]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
import pandas as pd

@dataclass
class ProcessingResult:
    finding_id: str
    category: str
    summary: any
    success: bool
    error: str = None

def process_single_finding(finding_id, category):
    try:
        summary = get_summary(finding_id)
        return ProcessingResult(finding_id, category, summary, True)
    except Exception as e:
        return ProcessingResult(finding_id, category, None, False, str(e))

def process_findings_concurrent(findings, max_workers=None):
    triplets = pd.DataFrame(columns=['finding_id', 'category', 'cause', 'relationship', 'effect'])
    tasks = [(finding_id, category) for category, finding_ids in findings.items() for finding_id in finding_ids]
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_task = {executor.submit(process_single_finding, finding_id, category): (finding_id, category) 
                         for finding_id, category in tasks}
        
        for future in as_completed(future_to_task):
            result = future.result()
            if result.success:
                new_index = 0 if pd.isnull(triplets.index.max()) else triplets.index.max() + 1
                triplets.loc[new_index] = [result.finding_id, result.category, result.summary.cause,
                                         result.summary.relationship, result.summary.effect]
                print(f"{result.finding_id}: {result.summary.cause} -> {result.summary.relationship} -> {result.summary.effect}")
            else:
                print(f"Error processing {result.finding_id}: {result.error}")
    
    return triplets

result = process_findings_concurrent(findings)

AI products offering more user control tend to be more inclusive.: ai|user-control -> INCREASES -> co|inclusivity
AI systems provoke deep reflections by engaging users in evaluative processes.: ai|system -> INFLUENCES -> human|reflection
End-users prefer XAI explanations that provide practical, actionable information over technical system details.: ai|explanation -> INFLUENCES -> human|preference:#actionable_information
The AI music application prototypes exhibit distinct user control levels in intermittent, continuous, and proactive interaction paradigms.: ai:music|application -> INFLUENCES -> human|control:interaction
AI systems create indirect communities by celebrating user goals and behaviors.: ai|systems -> INFLUENCES -> co|community
AI products' inclusivity is influenced by whether they learn from individual or collective data.: ai|data -> INFLUENCES -> ai|inclusivity
Information presentation beyond explanations moderates trust formation during human-AI interactions.: co:informa

In [39]:
result.to_csv('triplets-new.csv')