In [4]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
import ast

load_dotenv()
API_KEY = os.getenv('API_KEY')

client = OpenAI(api_key=API_KEY)

df = pd.read_csv('data/27-oct-extracted.csv')
# kw = [ast.literal_eval(items) for items in kw]
# kw = [item for sublist in kw for item in sublist]

findings = {}

for row in range(len(df)):
    r = df.iloc[row]
    findings[r['doi']] = ast.literal_eval(r['summaries'])

In [7]:
prompt = """Convert research statements about human-AI interaction into structured causal relationships using this format:

[subject:cause, relationship, subject:effect]

Relationship Types (Choose ONE):
- ENHANCES/DIMINISHES
  - For impacts on performance, capability, or quality
  - Example: [AI explanations, ENHANCES, user understanding]
- INCREASES/DECREASES
  - For measurable changes or quantifiable effects
  - Example: [AI assistance, INCREASES, labeling accuracy]
- INFLUENCES
  - For complex relationships affecting perceptions, behaviors, or attitudes
  - Example: [AI embodiment, INFLUENCES, perceived intelligence]

Rules:
- Cause and Effect element should be one of:
  - AI feature/capability (e.g., "AI responsiveness", "generative AI tools")
  - User characteristic and behavior (e.g., "user beliefs", "perceived credibility")
  - System property (e.g., "model accuracy", "training data")
- Remove numerical metrics (e.g. AI performance by 27% -> AI performance)
- Use specific terms over general ones (e.g. "system features" -> "AI explanation interface")

Sample:
- "Interactive Machine Learning interfaces enhance system learning efficiency by integrating user feedback" -> [interactive ML interfaces, INCREASES, system learning efficiency]
- "Learning Engagement mechanisms improve users' positive perceptions of the robot" -> [learning engagement features, ENHANCES, users' perception of robot]
- "Machine learning methods automate mundane UX research tasks, enhancing efficiency" -> [ML automation, INCREASES, UX research efficiency]

Guidelines:
- Prioritize specific over general relationships
- Include quantitative metrics when present
- - Maintain clear causation direction
- Focus on primary relationship when multiple exists"""

from pydantic import BaseModel
class Triplet(BaseModel):
    cause: str
    relationship: str
    effect: str

def get_summary(abstract):
    chat_completion = client.beta.chat.completions.parse(
        messages=[
            {
                "role": "user",
                "content": prompt + " The following is the statement: " + abstract,
            },
        ],
        model="gpt-4o",
        response_format=Triplet,
        temperature=0.5
    )
    
    return chat_completion.choices[0].message.parsed

In [8]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
import pandas as pd

@dataclass
class ProcessingResult:
    finding_id: str
    category: str
    summary: any
    success: bool
    error: str = None

def process_single_finding(finding_id, category):
    try:
        summary = get_summary(finding_id)
        return ProcessingResult(finding_id, category, summary, True)
    except Exception as e:
        return ProcessingResult(finding_id, category, None, False, str(e))

def process_findings_concurrent(findings, max_workers=None):
    triplets = pd.DataFrame(columns=['finding_id', 'category', 'cause', 'relationship', 'effect'])
    tasks = [(finding_id, category) for category, finding_ids in findings.items() for finding_id in finding_ids]
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_task = {executor.submit(process_single_finding, finding_id, category): (finding_id, category) 
                         for finding_id, category in tasks}
        
        for future in as_completed(future_to_task):
            result = future.result()
            if result.success:
                new_index = 0 if pd.isnull(triplets.index.max()) else triplets.index.max() + 1
                triplets.loc[new_index] = [result.finding_id, result.category, result.summary.cause,
                                         result.summary.relationship, result.summary.effect]
                print(f"{result.finding_id}: {result.summary.cause} -> {result.summary.relationship} -> {result.summary.effect}")
            else:
                print(f"Error processing {result.finding_id}: {result.error}")
    
    return triplets

result = process_findings_concurrent(findings)

AI systems provoke deep reflections by engaging users in evaluative processes.: AI systems -> INFLUENCES -> user reflections
XAI explanations are used by end-users for tasks such as calibrating trust, improving skills, and giving feedback.: XAI explanations -> ENHANCES -> user task performance
AI products offering more user control tend to be more inclusive.: AI products offering more user control -> ENHANCES -> inclusivity
AI in games reveals misconceptions about AI systems by integrating interactions from other domains.: AI in games -> INFLUENCES -> user misconceptions about AI systems
Explainable AI facilitates trust by providing human-level explanations and can be processed affectively.: explainable AI -> ENHANCES -> user trust
AI products' inclusivity is influenced by whether they learn from individual or collective data.: data learning approach -> INFLUENCES -> AI products' inclusivity
AI systems create indirect communities by celebrating user goals and behaviors.: AI systems -> 

In [9]:
result.to_csv('triplets.csv')