In [1]:
import pandas as pd
import ast
import os
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd

load_dotenv()
API_KEY = os.getenv('API_KEY')

df = pd.read_csv('result/triplets.csv')
cause = [k.lower().split("|")[0] for k in df['cause'].to_list()]
effect = [k.lower().split("|")[0] for k in df['effect'].to_list()]

pool = set(cause + effect)

In [2]:
embedding = {}

from openai import OpenAI
client = OpenAI(api_key=API_KEY)

def get_embedding(text, model="text-embedding-3-large"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding


In [3]:
import concurrent.futures

def process_item(item):
    print(f"Processing item: {item}")
    embedding = get_embedding(item)
    return (item, embedding)

def get_concurrent_embeddings(kw_set, max_workers=None):
    embeddings = {}
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_item = {
            executor.submit(process_item, item): item 
            for item in kw_set
        }
        
        for future in concurrent.futures.as_completed(future_to_item):
            try:
                item, embedding = future.result()
                embeddings[item] = embedding
            except Exception as e:
                print(f"Error processing {future_to_item[future]}: {str(e)}")
    
    return embeddings

In [4]:
embedding = get_concurrent_embeddings(pool)

Processing item: ai:xpath
Processing item: co:software_engineering
Processing item: co:creative
Processing item: ai:liplearner
Processing item: ai:visualization
Processing item: co:basic
Processing item: ai:language-model
Processing item: co:transparency
Processing item: ai:deep_neural_network
Processing item: ai:feature-based
Processing item: ai:lexicon
Processing item: ai:example-based
Processing item: human:middle-aged
Processing item: human:domain-knowledgeable
Processing item: human:practitioner
Processing item: ai:participatory
Processing item: co:mid-roll
Processing item: ai:jitai
Processing item: ai:steering
Processing item: co:ai_trust_score
Processing item: human:young
Processing item: co:segmentation
Processing item: human:community
Processing item: human:emergent
Processing item: co:procedure
Processing item: co:user-ai
Processing item: ai:recommender
Processing item: co:rhetorical
Processing item: ai:sensemate
Processing item: ai:decision_support
Processing item: ai:virtua

In [5]:
x = pd.DataFrame({k: str(embedding[k]) for k in embedding}, index=[0]).T
x.to_csv("result/embedding-triplets-keys.csv")

In [10]:
import ast

df2 = pd.read_csv('result/analyze.csv')
kw = [si.lower() for i in df2['keywords'].to_list() for si in ast.literal_eval(i) ]

kw_embedding = get_concurrent_embeddings(kw)

Processing item: theory of mindProcessing item: human-ai interaction

Processing item: social intelligence
Processing item: human-ai interaction
Processing item: user experience
Processing item: design guidelines
Processing item: human-ai interaction
Processing item: user values
Processing item: prayer experiences
Processing item: clinical decision-making
Processing item: patient-centered care
Processing item: ai interaction models
Processing item: explainable ai
Processing item: trust formation
Processing item: human-ai interactions
Processing item: user burden
Processing item: sexual assault reporting
Processing item: algorithmic evaluation resistance
Processing item: appropriate trust
Processing item: human-ai interaction
Processing item: research opportunities
Processing item: inclusivity
Processing item: problem-solving diversity
Processing item: demographic diversity
Processing item: human-ai interaction
Processing item: interaction paradigms
Processing item: system design
Proces

In [11]:
x = pd.DataFrame({k: str(kw_embedding[k]) for k in kw_embedding}, index=[0]).T
x.to_csv("result/embedding-keywords.csv")

In [4]:
stmt = set(df['finding_id'].to_list())
stmt_embedding = get_concurrent_embeddings(stmt)

Processing item: DuetDraw enhances user satisfaction by providing detailed instructions during collaborative drawing tasks.
Processing item: Generative AI enhances music composition by balancing authorial control with unpredictability.
Processing item: Optometrists' risk-averse tendencies significantly influence their interpretation of AI clinical decision support system outputs.
Processing item: Trust calibration alone is insufficient for improving AI-assisted decision making outcomes.
Processing item: User demographics, including age and familiarity with probability, influence human-AI collaborative decision-making.
Processing item: AI models detect hyperactivity in children with 85.2% accuracy using parent-provided activity labels.
Processing item: AI explanations are most effective when they reveal reasons to doubt incorrect AI classifications or trust correct ones.
Processing item: False positives in AI grading harm learning by reducing students' engagement with feedback.
Processi

In [6]:
x = pd.DataFrame({k: str(stmt_embedding[k]) for k in stmt_embedding}, index=[0]).T
x.to_csv("result/embedding-stmt.csv")