In [41]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
API_KEY = os.getenv('API_KEY')

In [42]:
import pandas as pd

df = pd.read_csv('data/sample.csv')

In [123]:
from pydantic import BaseModel
class AbstractSummary(BaseModel):
    class Summary(BaseModel):
        bullet: str
        effect: str
        
    keywords: list[str]
    summaries: list[Summary]
    theme: str
    
class FindingsRelation(BaseModel):
    relation_type: str
    explanation: str
    polarity: str

In [145]:
client = OpenAI(api_key=API_KEY)

prompt = """Extract key findings from the abstract of this paper in bullet points focusing on the effect of AI,
emphasizing complete and clear statements of results or conclusions (e.g., \"AI Chatbot can induce creativity in brainstorming session\" or \"AI-enabled knowledge graph generation can be used for taxonomical tasks\").
Avoid including any context or procedural details (e.g. the paper hosts workshops sessions to test the hypothesis).
Each of the statement should be self-explanatory, independent from other bullets, and informative.
Terms like higher than (...), control condition, must be elaborated and explained or avoided.
Superlative terms, e.g., strongest or highest, must be modified relatively to other findings or avoid the terms.
Number of bullets should be as small as possible, corresponding to the number of significant, statistical, and practical findings.

The format of the output should be as follow:
summary.bullet -> summary
summary.effect -> "Positive" | "Negative" | "Neutral" analyzing the effects and implications of the finding on human and society.
keywords -> topics and themes of the paper. With these papers being about AI and HCI, terms related to the two, e.g. "AI application", "Human-centered design", "Human-AI interaction", must be omitted. The included terms should be around the thematic concerns of the paper, e.g. "Medical", "Creativity", "Productivity".
theme -> the main theme of the finding based on these themes:
    - Health: e.g. personalized monitoring, diagnosis, interventions for physical and mental health
    - Learning: e.g. Personalized, always present tutor for situated learning
    - Work: e.g. Amplifying creativity, performance, discovery, decision making and more
    - Elderly: e.g. supporting healthy independent aging
    - Entertainment: e.g. Immersive, personalized experiences and interactive storytelling
    - Social Justice: e.g. Data-driven policy making, bias detection, and equitable resource allocation
"""

relation_prompt = """You will be given two statements to analyze and identify the semantic relations between them and classify it according to the following schema:

Relation types:
1. Generalization: The second statement broadens or generalizes the first.
2. Instantiation: The second statement provides a specific example or narrows down the first.
3. Restatement: The second statement rephrases or elaborates on the first without changing the core meaning.
4. Similarity: The statements address similar themes without a direct hierarchical relationship.
5. Unrelated: The two statements are unrelated.

Important notes:
- Analyze ONLY the given statements. Do not add or assume any context beyond what is explicitly stated.
- Focus solely on the context and scope of the statements, not on whether they agree or disagree.
- "Restatement" should be used when the statements discuss the same specific context, even if they present opposing views.

Examples:
1. Statement 1: "AI can improve memory"
   Statement 2: "AI can reduce memory"
   Relation: Restatement
   Explanation: Both statements address AI's impact on memory, even though they present opposing effects.

2. Statement 1: "AI can improve creativity"
   Statement 2: "AI can reduce creativity in digital art"
   Relation: Instantiation
   Explanation: The second statement narrows the context from creativity in general to creativity specifically in digital art.

Polarity:
- Positive: The statements agree with or support each other.
- Negative: The statements contrast or disagree with each other.

The format of the output should be as follow:
    "relation_type" -> "generalization" | "instantiation" | "restatement" | "similarity" | "unrelated"
    "explaination" -> explaination of the analysis
    "polarity" -> "positive" | "negative"
"""

def get_summary(abstract):
    chat_completion = client.beta.chat.completions.parse(
        messages=[
            {
                "role": "user",
                "content": prompt + " The following is the abstract: " + abstract,
            },
        ],
        model="gpt-4o-mini",
        response_format=AbstractSummary,
    )
    
    return chat_completion.choices[0].message.parsed

def get_relation(finding1, finding2):
    chat_completion = client.beta.chat.completions.parse(
        messages=[
            {
                "role": "user",
                "content": relation_prompt + " The first statement is: " + finding1 + " The second statement is: " + finding2,
            },
        ],
        model="gpt-4o-mini",
        response_format=FindingsRelation,
    )
    
    return chat_completion.choices[0].message.parsed

def print_summary(summary):
    print(f"Theme: {summary.theme}")
    print(f"Keywords: {summary.keywords}")
    for s in summary.summaries:
        print(f"- {s.bullet} | Effect: {s.effect}")

In [97]:
# add column "summaries", "theme" and "keywords" to df
df['summaries'] = None
df['theme'] = None
df['keywords'] = None

In [46]:
for i in range(len(df)):
    print(i, df.iloc[i]['title'])
    s = get_summary(df.iloc[i]['abstract'])
    # add result to df
    df.at[i, 'summaries'] = [{"summary": summary.bullet, "effect": summary.effect} for summary in s.summaries]
    df.at[i, 'theme'] = s.theme
    df.at[i, 'keywords'] = s.keywords

0 Theory of Mind in Human-AI Interaction
1 Assessing Human-AI Interaction Early through Factorial Surveys: A Study on the Guidelines for Human-AI Interaction
2 Spiritual AI: Exploring the Possibilities of a Human-AI Interaction Beyond Productive Goals
3 Development and translation of human-AI interaction models into working prototypes for clinical decision-making
4 Exploration of Explainable AI for Trust Development on Human-AI Interaction
5 Re-examining User Burden in Human-AI Interaction: Focusing on a Domain-Specific Approach
6 A Systematic Review on Fostering Appropriate Trust in Human-AI Interaction: Trends, Opportunities and Challenges
7 Measuring User Experience Inclusivity in Human-AI Interaction via Five User Problem-Solving Styles
8 Human-AI interaction: intermittent, continuous, and proactive
9 Designing for Human-AI Interaction: Comparing Intermittent, Continuous, and Proactive Interactions for a Music Application
10 "Help Me Help the AI": Understanding How Explainability C

In [50]:
df.to_csv('data/sample-summary.csv', index=False)

In [166]:
df = pd.read_csv('data/sample-summary.csv')

In [167]:
doi_list = df['doi'].to_list()

def get_name(doi):
    return df.loc[df['doi'] == doi].title.values[0]

In [190]:
import networkx as nx
import json
import ast

# for index, row in df.iterrows():
#     # print(row.references)
#     # print(ast.literal_eval(row.references))
#     references = ast.literal_eval(row.references)
#     for ref in references:
#         if(ref in doi_list):
#             graph["links"].append({"source": row.doi, "target": ref})

G = nx.Graph()

# add papers and findings
for i in range(len(df)):
    paper = df.iloc[i]
    summaries = ast.literal_eval(paper.summaries)
    for i, summary in enumerate(summaries):
        G.add_node(f"{paper.doi}:{i}", name=summary['summary'], effect=summary['effect'], paper=paper['doi'], kind="finding")
    G.add_node(f"{paper['doi']}", name=get_name(paper['doi']), kind="paper", summaries=len(summaries), theme=paper['theme'], keywords=paper['keywords'], year=paper['year'])

# add literal references
for i in range(len(df)):
    paper = df.iloc[i]
    references = ast.literal_eval(paper.references)
    for ref in references:
        if(ref in doi_list):
            G.add_edge(f"{paper.doi}", f"{ref}", kind="paper")

In [None]:
all_edges = [(u, v) for u, v, d in G.edges(data=True)]
edges = [(u, v) for u, v in all_edges if G[u][v]['kind'] == 'paper']

for u, v in edges:
    nu = G.nodes[u]
    nv = G.nodes[v]
    print("Analyzing:", nu['name'], "->" , nv['name'])
    for i in range(nu['summaries']):
        for j in range(nv['summaries']):
            relation = get_relation(G.nodes[f"{v}:{j}"]['name'], G.nodes[f"{u}:{i}"]['name'])
            if(relation.relation_type != "unrelated"):
                G.add_edge(f"{u}:{i}", f"{v}:{j}", relation=relation.relation_type, explanation=relation.explanation, polarity=relation.polarity, kind="finding")

In [191]:
import concurrent.futures
from functools import partial

def process_edge(G, u, v):
    nu = G.nodes[u]
    nv = G.nodes[v]
    print(f"Analyzing: {nu['name']} -> {nv['name']}")
    new_edges = []
    for i in range(nu['summaries']):
        for j in range(nv['summaries']):
            relation = get_relation(G.nodes[f"{v}:{j}"]['name'], G.nodes[f"{u}:{i}"]['name'])
            if relation.relation_type != "unrelated":
                new_edges.append((f"{u}:{i}", f"{v}:{j}", {
                    'relation': relation.relation_type,
                    'explanation': relation.explanation,
                    'polarity': relation.polarity,
                    'kind': "finding"
                }))
    return new_edges

def parallel_process_edges(G, edges, max_workers=None):
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        process_edge_partial = partial(process_edge, G)
        future_to_edge = {executor.submit(process_edge_partial, u, v): (u, v) for u, v in edges}
        
        all_new_edges = []
        for future in concurrent.futures.as_completed(future_to_edge):
            u, v = future_to_edge[future]
            try:
                new_edges = future.result()
                all_new_edges.extend(new_edges)
            except Exception as exc:
                print(f"Edge {u} -> {v} generated an exception: {exc}")

    return all_new_edges

# Main execution
all_edges = [(u, v) for u, v, d in G.edges(data=True)]
edges = [(u, v) for u, v in all_edges if G[u][v]['kind'] == 'paper']

new_edges = parallel_process_edges(G, edges)

# Add all new edges to the graph
G.add_edges_from(new_edges)

Analyzing: Theory of Mind in Human-AI Interaction -> Towards Mutual Theory of Mind in Human-AI Interaction: How Language Reflects What Students Perceive About a Virtual Teaching Assistant
Analyzing: Assessing Human-AI Interaction Early through Factorial Surveys: A Study on the Guidelines for Human-AI Interaction -> Re-examining Whether, Why, and How Human-AI Interaction Is Uniquely Difficult to Design
Analyzing: Assessing Human-AI Interaction Early through Factorial Surveys: A Study on the Guidelines for Human-AI Interaction -> Guidelines for Human-AI Interaction
Analyzing: Assessing Human-AI Interaction Early through Factorial Surveys: A Study on the Guidelines for Human-AI Interaction -> Measuring User Experience Inclusivity in Human-AI Interaction via Five User Problem-Solving Styles
Analyzing: Assessing Human-AI Interaction Early through Factorial Surveys: A Study on the Guidelines for Human-AI Interaction -> Understanding the Dynamics in Creating Domain-Specific AI Design Guidelin

In [195]:
for i in range(len(df)):
    paper = df.iloc[i]
    summaries = ast.literal_eval(paper.summaries)
    for i, summary in enumerate(summaries):
        G.add_edge(f"{paper.doi}", f"{paper.doi}:{i}", kind="paper_to_finding")

graph_data = nx.node_link_data(G, edges="edges")
with open('graph-relation.json', 'w') as f:
    f.write(json.dumps(graph_data))

In [206]:
import networkx as nx
import json
import ast

# for index, row in df.iterrows():
#     # print(row.references)
#     # print(ast.literal_eval(row.references))
#     references = ast.literal_eval(row.references)
#     for ref in references:
#         if(ref in doi_list):
#             graph["links"].append({"source": row.doi, "target": ref})

H = nx.Graph()

# add papers and findings
for i in range(len(df)):
    paper = df.iloc[i]
    summaries = ast.literal_eval(paper.summaries)
    for i, summary in enumerate(summaries):
        H.add_node(f"{paper.doi}:{i}", name=summary['summary'], effect=summary['effect'], paper=paper['doi'], kind="finding")
    H.add_node(f"{paper['doi']}", name=get_name(paper['doi']), kind="paper", summaries=len(summaries), theme=paper['theme'], keywords=paper['keywords'], year=paper['year'].item())

# add literal references
for i in range(len(df)):
    paper = df.iloc[i]
    references = ast.literal_eval(paper.references)
    for ref in references:
        if(ref in doi_list):
            H.add_edge(f"{paper.doi}", f"{ref}", kind="paper")
            
# add paper to finding            
for i in range(len(df)):
    paper = df.iloc[i]
    summaries = ast.literal_eval(paper.summaries)
    for i, summary in enumerate(summaries):
        H.add_edge(f"{paper.doi}", f"{paper.doi}:{i}", kind="paper_to_finding")
        print(paper.doi, f"{paper.doi}:{i}")
            
all_edges = [(u, v) for u, v, d in G.edges(data=True)]
edges = [(u, v) for u, v in all_edges if G[u][v]['kind'] == 'paper']
            
# export to json
graph_data = nx.node_link_data(G, edges="edges")
with open('graph-2.json', 'w') as f:
    f.write(json.dumps(graph_data))

10.1145/3613905.3636308 10.1145/3613905.3636308:0
10.1145/3613905.3636308 10.1145/3613905.3636308:1
10.1145/3613905.3636308 10.1145/3613905.3636308:2
10.1145/3511605 10.1145/3511605:0
10.1145/3511605 10.1145/3511605:1
10.1145/3511605 10.1145/3511605:2
10.1145/3613905.3650743 10.1145/3613905.3650743:0
10.1145/3613905.3650743 10.1145/3613905.3650743:1
10.1145/3613905.3650743 10.1145/3613905.3650743:2
10.1145/3643834.3660697 10.1145/3643834.3660697:0
10.1145/3643834.3660697 10.1145/3643834.3660697:1
10.1145/3639592.3639625 10.1145/3639592.3639625:0
10.1145/3639592.3639625 10.1145/3639592.3639625:1
10.1145/3639592.3639625 10.1145/3639592.3639625:2
10.1145/3613905.3638186 10.1145/3613905.3638186:0
10.1145/3613905.3638186 10.1145/3613905.3638186:1
10.1145/3613905.3638186 10.1145/3613905.3638186:2
10.1145/3696449 10.1145/3696449:0
10.1145/3696449 10.1145/3696449:1
10.1145/3696449 10.1145/3696449:2
10.1145/3696449 10.1145/3696449:3
10.1145/3663740 10.1145/3663740:0
10.1145/3663740 10.1145/3663