# Data Analysis for Relational AI Group Experiment

Authors: Elijah Claggett, Faria Huq

In [None]:
# Important Variables

# Location of Empirica data files
data_path = './data/'

In [None]:
# Imports
from scipy.stats import ttest_ind, bootstrap
from convokit import PolitenessStrategies
from datetime import datetime
from colorama import Fore
from scipy import stats
import seaborn as sns
import pandas as pd
import numpy as np
import requests
import spacy
import json
import re
import math
import os
from utils import prettyPrintMulti, prettyPrintList, prettyPrintChats, bf, readDataFiles, empiricaColumnExists, getMultiGameData, getMsgCount
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import networkx as nx
from itertools import combinations

## 1) Load data

In [None]:
# Print data files
trials = [f for f in os.listdir(data_path) if f.endswith('.json')]
trials.sort()

print(bf('All trials:'))
prettyPrintList(trials)

In [None]:
debugPrintFlag = False
game_dict_multi, player_dict_multi, stage_dict_multi, playerStage_dict_multi = readDataFiles(trials, data_path)

participant_opinions_by_game = {}
for gameID in game_dict_multi:
    try:
        participant_opinions_by_game[gameID] = game_dict_multi[gameID]['chatParticipants'][-1][-1]['val']
    except:
        print('failed for', gameID)

In [None]:
def truncate_to_microseconds(ts):
    if "." in ts:
        base, rest = ts.split(".")
        digits = rest.rstrip("Z")  # remove Z or +00:00 if present
        micro = digits[:6]  # truncate to microseconds
        return f"{base}.{micro}+00:00"
    return ts.replace("Z", "+00:00")

In [None]:
# filter out the stage times 
import numpy as np

updated_stage_dict_multi = {}
for each_trial in stage_dict_multi.keys():
    stage_list = [] 
    # print('each_trial', each_trial)
    if 'name' not in stage_dict_multi[each_trial].keys(): continue
    if 'ended' not in stage_dict_multi[each_trial].keys(): continue
    if 'started' not in stage_dict_multi[each_trial].keys(): continue
    for each_entry_name, each_entry_time in zip(stage_dict_multi[each_trial]['name'], stage_dict_multi[each_trial]['started']):
        if not isinstance(each_entry_time, float) or not math.isnan(each_entry_time):
            iso_dt = datetime.fromisoformat(truncate_to_microseconds(each_entry_time[0]['dt']))
            iso_unix_ms = int(iso_dt.timestamp() * 1000)
        else:
            iso_unix_ms = np.inf
        stage_list.append({'name': each_entry_name[0]['val'], 'startTime': iso_unix_ms})
    updated_stage_dict_multi[each_trial] = stage_list
# print(updated_stage_dict_multi)

## 2) Clean data

Remove:

- People who didn't finish the entire experiment
- Test data (eli / faria)

In [None]:
exclude_local_trails = ['01JTP5XG9V373HW0JF06P59GNZ', '01JTP48A46PS57YC8B2MC6T1MT', '01JTP4B3CMETA57BXMM3E66WFN', '01JTQ4YH098ZARPFM1TSYTR1XP', '01JTQ6FRYG87XERGETTKVQW098', '01JTQ7FBBK75WQ7RXXFVDRT1NV']

In [None]:
# Data cleaning

notFinishedStudy = 0
totalRecruited = 0
for gameID in player_dict_multi:
    for participantID in player_dict_multi[gameID].index:
        totalRecruited += 1
        if type(player_dict_multi[gameID].loc[participantID]['participantIdentifier']) == list:
            eID = json.loads(player_dict_multi[gameID].loc[participantID]['participantIdentifier'][-1]['val'])
        
        # Remove anyone who didn't complete the entire experiment (aka didn't submit a summary)
        if 'summary' not in player_dict_multi[gameID].loc[participantID]:
            player_dict_multi[gameID].drop([participantID], inplace=True)
            notFinishedStudy += 1
            continue


In [None]:
# Summarize the data cleaning process

def getSummaries(p):
    if empiricaColumnExists(p, 'summary'):
        summary = json.loads(p['summary'][-1]['val'])
        return summary

summaries = getMultiGameData(getSummaries, game_dict_multi, player_dict_multi)

numParticipants = 0
for gameID in summaries:
    if gameID in exclude_local_trails:  continue
    # print('summaries', summaries)
    for participantID in summaries[gameID]:
        numParticipants += 1

if debugPrintFlag:
    print(f'Total participants recruited: {totalRecruited}')
    print(f'Total participants kept: {numParticipants}')
    print('---------------------------')
    print(f'Total participants not finish study: {notFinishedStudy}')

In [None]:
# Get Prolific metadata

def getParticipantIdentifier(p):
    if empiricaColumnExists(p, 'participantIdentifier'):
        pID = json.loads(p['participantIdentifier'][-1]['val'])
        return pID

def getProlificSession(p):
    if empiricaColumnExists(p, 'sessionID'):
        sessionID = json.loads(p['sessionID'][-1]['val'])
        return sessionID
def getProlificStudy(p):
    if empiricaColumnExists(p, 'studyID'):
        studyID = json.loads(p['studyID'][-1]['val'])
        return studyID


pIDs = getMultiGameData(getParticipantIdentifier, game_dict_multi, player_dict_multi)
sessionIDs = getMultiGameData(getProlificSession, game_dict_multi, player_dict_multi)
studyIDs = getMultiGameData(getProlificStudy, game_dict_multi, player_dict_multi)

e2p = {}
e2session = {}
e2study = {}
for gameID in pIDs:
    for participantID in pIDs[gameID]:
        e2p[participantID] = pIDs[gameID][participantID][0]
for gameID in pIDs:
    for participantID in sessionIDs[gameID]:
        if len(sessionIDs[gameID][participantID]) > 0:
            e2session[participantID] = sessionIDs[gameID][participantID][0]
for gameID in pIDs:
    for participantID in studyIDs[gameID]:
        if len(studyIDs[gameID][participantID]) > 0:
            e2study[participantID] = studyIDs[gameID][participantID][0]


## 3) Utility Functions to load Tajriba Data Log

In [None]:
# Participants

def getChatIdentities(p):
    if empiricaColumnExists(p, 'selfIdentity'):
        chatIdentities = json.loads(p['selfIdentity'][-1]['val'])
        return chatIdentities

chatIdentities = getMultiGameData(getChatIdentities, game_dict_multi, player_dict_multi)

for gameID in chatIdentities:
    if gameID in exclude_local_trails:  continue
    if debugPrintFlag: print(bf('Trial:'), gameID)
    for participantID in chatIdentities[gameID]:
        if debugPrintFlag: print(bf('Participant:'), e2p[participantID], bf('Chat Identity:'), chatIdentities[gameID][participantID][-1])

In [None]:
# Participants

def getjoinedrooms(p):
    if not empiricaColumnExists(p, 'step'): return None
    for each_step in p['step']:
        if 'ready' in each_step['val']:
            t0 = datetime.fromisoformat(truncate_to_microseconds(each_step['dt']))  
            break
            
    latest_entry = None
    max_time = None
    for each_entry in p['joinedRooms']:
        t1 = datetime.fromisoformat(truncate_to_microseconds(each_entry['dt']))
        if max_time is None or t1 > max_time:
            max_time = t1
            latest_entry = each_entry

    if max_time and max_time > t0:
        duration_minutes = round((max_time - t0).total_seconds() / 60, 2)
        return {
            'ID': p['participantID'][-1]['val'],
            'joinedRooms': latest_entry['val'],
            'time': latest_entry['dt'],
            'minutes_after_game_start': duration_minutes
        }

    return None

joinedrooms = getMultiGameData(getjoinedrooms, game_dict_multi, player_dict_multi)

for gameID in joinedrooms:
    if gameID in exclude_local_trails:  continue
    if debugPrintFlag: print(bf('Trial:'), gameID)
    for participantID in joinedrooms[gameID]:
        if debugPrintFlag: print(bf('Participant:'), e2p[participantID], bf('joined room:'), joinedrooms[gameID][participantID])


In [None]:
# Tutorial Stage and Duration

def getTutorialDuration(p):
    if not empiricaColumnExists(p, 'participantID') or not empiricaColumnExists(p, 'passedTutorial'):
        return None

    joinTime = p['participantID'][-1]['dt']
    tutorialPassTime = p['passedTutorial'][-1]['dt']

    t0 = datetime.fromisoformat(truncate_to_microseconds(joinTime))
    t1 = datetime.fromisoformat(truncate_to_microseconds(tutorialPassTime))

    duration_minutes = (t1 - t0).total_seconds() / 60
    return [p['participantID'][-1]['val'], round(duration_minutes, 2)]
        

def getTutorialProgress(p):
    if not empiricaColumnExists(p, 'participantID') or not empiricaColumnExists(p, 'LastTutorialStage'):
        return None

    joinTime = p['participantID'][-1]['dt']
    tutorialPassTime = p['LastTutorialStage'][-1]['dt']
    t0 = datetime.fromisoformat(truncate_to_microseconds(joinTime))
    t1 = datetime.fromisoformat(truncate_to_microseconds(tutorialPassTime))
    duration_minutes = (t1 - t0).total_seconds() / 60
    
    stage = p['LastTutorialStage'][-1]
    
    return [p['participantID'][-1]['val'], stage, round(duration_minutes, 2)]

# Initial survey results
def getSurveyResults(p):
    if empiricaColumnExists(p, 'surveyAnswers'):
        surveyResults = json.loads(p['surveyAnswers'][-1]['val'])
        return surveyResults

surveyResults = getMultiGameData(getSurveyResults, game_dict_multi, player_dict_multi)
TutorialProgress = getMultiGameData(getTutorialProgress, game_dict_multi, player_dict_multi)
TutorialDurations = getMultiGameData(getTutorialDuration, game_dict_multi, player_dict_multi)

debugPrintFlag = True

for gameID in surveyResults:
    if gameID in exclude_local_trails:  continue 
    if debugPrintFlag: 
        print(bf('Trial:'), gameID,  game_dict_multi[gameID]['trial_name'].iloc[0])
        print('\n----Trial Started----\n')
    for participantID in surveyResults[gameID]:
        if e2p[participantID] == '2' or e2p[participantID] == '24': continue
        if len(surveyResults[gameID][participantID]) == 0 and len(TutorialProgress[gameID][participantID]) == 0: continue
        if debugPrintFlag:
            print(bf('Participant:'), e2p[participantID])
            if len(surveyResults[gameID][participantID]) == 0: print('payment: $1')
            else: print('payment: $5')
            print(bf('Survey Results:'), surveyResults[gameID][participantID])
            print(bf('Tutorial Progress with stage:'), TutorialProgress[gameID][participantID])
            print(bf('Tutorial Duration (minute):'), TutorialDurations[gameID][participantID])
            print('\n--------\n')
    
    print('\n----Trial Ended----\n')

debugPrintFlag = False

In [None]:
# Chat messages

def getCreatedRooms(p):
    results = []
    if not empiricaColumnExists(p, 'step'): return None
    if not empiricaColumnExists(p, 'createRoom'): return None
    if not empiricaColumnExists(p, 'participantIdx'): return None
    for each_step in p['step']:
        if 'ready' in each_step['val']:
            t0 = datetime.fromisoformat(truncate_to_microseconds(each_step['dt']))  
            break 
    
    for each_entry in p['createRoom']:
        t1 = datetime.fromisoformat(truncate_to_microseconds(each_entry['dt']))
        if t1 > t0:
            results.append(
                {
                    'ID': p['participantID'][-1]['val'],
                    'createdRoom': each_entry['val'],
                    'participantIdx': p['participantIdx'][-1]['val'],
                    'time': each_entry['dt']
                }
            )

    return results

createdRooms = getMultiGameData(getCreatedRooms, game_dict_multi, player_dict_multi)

def getparticipantIdx(p):
    results = []
    results.append(
        {
            'ID': p['participantID'][-1]['val'],
            'participantIdx': p['participantIdx'][-1]['val']
        }
    )
    return results

participantIdx = getMultiGameData(getparticipantIdx, game_dict_multi, player_dict_multi)

# Chat rooms
for gameID in game_dict_multi:
    print(bf('Trial:'), gameID,  game_dict_multi[gameID]['trial_name'].iloc[0])
    gameparams = json.loads(game_dict_multi[gameID]['gameParams'][0][-1]['val'])
    topic_ID = json.loads(game_dict_multi[gameID]['topic'][0][-1]['val'])
    print('condition', gameparams['condition'])
    print('topic', gameparams['topics'][int(topic_ID)])


    if gameID in exclude_local_trails:  continue
    if 'chatRooms' not in game_dict_multi[gameID].keys():
        print('no chatroom found for', gameID)
        continue
        
    if isinstance(game_dict_multi[gameID]['chatRooms'][0], float):
        print('no chatroom found for', gameID)
        continue
        
    rooms = json.loads(game_dict_multi[gameID]['chatRooms'][0][-1]['val'])
    if debugPrintFlag: print(bf('Trial:'), gameID)
    if debugPrintFlag: print(bf('Chat rooms:'), rooms)
    
    # for participantID in createdRooms[gameID]:
    #     if len(createdRooms[gameID][participantID]) > 0:
    #         print('Participant', bf(e2p[participantID]), 'created room at', createdRooms[gameID][participantID][-1])

    participantIdx_mapping = {}
    for participantID in participantIdx[gameID]:
        if len(participantIdx[gameID][participantID]) > 0:
            if debugPrintFlag: print('Participant', bf(e2p[participantID]), 'pid', participantIdx[gameID][participantID][-1])
            participantIdx_mapping[participantIdx[gameID][participantID][-1][-1]['participantIdx']] = {'participantIdx':participantIdx[gameID][participantID][-1], 'prolificID': e2p[participantID]}

    all_senders = []
    message_count = 0
    for roomID in rooms:
        if debugPrintFlag: print('roomID', roomID)
        if debugPrintFlag: print(bf('#'+rooms[roomID]['title']), 'messages:')
        roomMessages = json.loads(game_dict_multi[gameID]['chatChannel-'+roomID][0][-1]['val'])
        print(f'total message count in room: {roomID} is {len(roomMessages)}')
        stagetimeStamps = updated_stage_dict_multi[gameID] 
        if debugPrintFlag: prettyPrintChats(roomMessages, stagetimeStamps)
        
        for message in roomMessages:
            all_senders.append(message['sender'])

    all_senders =  list(set(all_senders))
    
    if debugPrintFlag: print('total message count', message_count)
    if debugPrintFlag: print('List of participants who participated in the group conversation')
    for each_sender in all_senders:
        try:
            if debugPrintFlag: print(participantIdx_mapping[each_sender])
        except:
            continue

In [None]:
stage_room_senders = {}

for gameID in game_dict_multi:
    if gameID in exclude_local_trails or 'chatRooms' not in game_dict_multi[gameID]:
        continue

    if isinstance(game_dict_multi[gameID]['chatRooms'][0], float):
        continue

    rooms = json.loads(game_dict_multi[gameID]['chatRooms'][0][-1]['val'])
    stagetimeStamps = updated_stage_dict_multi[gameID]

    # Create index: stage → roomID → set of senders
    stage_data = defaultdict(lambda: defaultdict(set))

    for roomID in rooms:
        chat_key = 'chatChannel-' + roomID
        if chat_key not in game_dict_multi[gameID]: continue
        messages = json.loads(game_dict_multi[gameID][chat_key][0][-1]['val'])

        for msg in messages:
            msg_time = msg['dt']
            sender = msg['sender']

            if int(sender) == -1: continue 

            # Find stage for this message
            stage_name = None
            for i in range(len(stagetimeStamps) - 1):
                if stagetimeStamps[i]['startTime'] <= msg_time <= stagetimeStamps[i + 1]['startTime']:
                    stage_name = stagetimeStamps[i]['name']
                    if 'ready' in stage_name or 'transition' in stage_name:
                        stage_name = stagetimeStamps[i + 1]['name']
                    break

            if stage_name:
                if not 'ready' in stage_name and not 'transition' in stage_name: 
                    stage_data[stage_name][roomID].add(sender)

    # Format for output
    if debugPrintFlag: print('gameID', bf(gameID))
    stage_room_senders[gameID] = []
    for stage_name, room_senders in stage_data.items():
        stage_room_senders[gameID].append({
            "stage": stage_name,
            "rooms": {room: sorted(list(senders)) for room, senders in room_senders.items()}
        })


    # Only print the current game's stages
    if debugPrintFlag: 
        print(json.dumps([
            {
                "stage": stage_name,
                "rooms": {room: sorted(list(senders)) for room, senders in room_senders.items()}
            }
            for stage_name, room_senders in stage_data.items()
        ], indent=2))

In [None]:
# attempted to get chatParticipants per stage numerally, but it did not work

updated_stage_room_senders = {}
for gameID in game_dict_multi:
    if 'chatParticipants' not in game_dict_multi[gameID]:
        continue

    if debugPrintFlag: 
        print('gameID', gameID)
        gameparams = json.loads(game_dict_multi[gameID]['gameParams'][0][-1]['val'])
        topic_ID = json.loads(game_dict_multi[gameID]['topic'][0][-1]['val'])
        print('condition', gameparams['condition'])
        print('topic', gameparams['topics'][int(topic_ID)])
    
    chatParticipantsLog = game_dict_multi[gameID]['chatParticipants']
    stageTimeStamps = updated_stage_dict_multi[gameID]
    sender_info_per_game = []

    # Step 1: Sort all chatParticipants entries by timestamp
    all_logs = []
    # print('chat_logs', chatParticipantsLog[0])
    for log in chatParticipantsLog[0]:
        # print('log', log)
        try:
            # ts = datetime.fromisoformat(truncate_to_microseconds(log['dt']))

            if not isinstance(log['dt'], float) or not math.isnan(log['dt']):
                iso_dt = datetime.fromisoformat(truncate_to_microseconds(log['dt']))
                iso_unix_ms = int(iso_dt.timestamp() * 1000)
                val = log['val']
                all_logs.append({'iso_unix_ms': iso_unix_ms, 'val': val})
        except:
            continue

    all_logs.sort(key=lambda x: x['iso_unix_ms'])
    if debugPrintFlag: print('stage_room_senders', stage_room_senders[gameID])

    # Step 2: For each stage, get latest log before it
    for i in range(1, len(stageTimeStamps)):
        stage = stageTimeStamps[i]
        stage_name = stage['name']
        if 'transition' not in stage_name and 'summary' not in stage_name: continue
        stage_time = stage['startTime']
        # if debugPrintFlag: print('stage_time', stage_time) 
        # Find the latest chatParticipants log before this stage
        latest_log = None
        for log in chatParticipantsLog[0]:
            if not isinstance(log['dt'], float) or not math.isnan(log['dt']):
                iso_dt = datetime.fromisoformat(truncate_to_microseconds(log['dt']))
                iso_unix_ms = int(iso_dt.timestamp() * 1000)
            try: 
                if iso_unix_ms < stage_time:
                    latest_log = log
                else:
                    break
            except:
                continue

        room_entries = {}
        if latest_log:
            # print(f"\nBefore Stage: {bf(stage_name)} at {stage_time}, {stage_name.replace('transition-', '', 1)}")
            # print(f"joined room", latest_log['val'])
            room_info = json.loads(latest_log['val'])
            for participant_id in room_info:
                if '-1' in participant_id: continue
                room_id = room_info[participant_id]['room']
                if str(room_id) in room_entries.keys():
                    room_entries[str(room_id)].append(participant_id)
                else:
                    room_entries[str(room_id)] = [participant_id]
                # print(participant_id, ':', room_id)
            updated_sender_list = {}
            if 'transition' in stage_name:
                updated_sender_list['stage'] = stage_name.replace('transition-', '', 1)
            else:
                updated_sender_list['stage'] = '"group-discussion-3"'
            updated_sender_list['rooms'] = room_entries
            sender_info_per_game.append(updated_sender_list)
    updated_stage_room_senders[gameID] = sender_info_per_game
    if debugPrintFlag: print('updated_stage_room_senders', updated_stage_room_senders[gameID])

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
import hashlib

# Assign fixed colors to each participant using a hash
def get_color(participant_id):
    random.seed(int(hashlib.md5(participant_id.encode()).hexdigest(), 16))
    return (random.random(), random.random(), random.random())

# Fix vertical axis range so room boxes are vertically centered and evenly spaced
# Updated plot with participant IDs as text labels inside each circle


# Modified version to generate separate plots for each gameID

def plot_each_game_separately(data):
    room_width = 3
    room_height = 1.5
    h_spacing = 4
    v_spacing = 2.5
    dot_radius = 0.12
    dot_margin = 0.2

    for game_id, stages in data.items():
        fig, ax = plt.subplots(figsize=(10, 5))
        ax.set_title(f"Game: {game_id}", fontsize=12)

        max_rooms = max(len(stage["rooms"]) for stage in stages)
        ax.set_xlim(-1, len(stages) * h_spacing + 1)
        ax.set_ylim(-v_spacing * max_rooms + room_height, room_height + 1)
        ax.axis('off')

        for round_idx, stage_data in enumerate(stages):
            rooms = stage_data["rooms"]
            room_keys_sorted = sorted(rooms.items(), key=lambda x: int(x[0]))
            for room_idx, (room_id, participants) in enumerate(room_keys_sorted):
                x0 = round_idx * h_spacing
                y0 = -room_idx * v_spacing

                # Draw room
                rect = patches.FancyBboxPatch(
                    (x0, y0),
                    room_width,
                    room_height,
                    boxstyle="round,pad=0.02",
                    edgecolor='black',
                    facecolor='#f0f0f0',
                    linewidth=1
                )
                ax.add_patch(rect)

                # Draw participants with labels
                max_per_row = int((room_width - 2 * dot_margin) / (2 * dot_radius))
                for i, pid in enumerate(participants):
                    col = i % max_per_row
                    row = i // max_per_row
                    px = x0 + dot_margin + dot_radius + col * 2 * dot_radius
                    py = y0 + room_height - dot_margin - dot_radius - row * 2 * dot_radius
                    ax.add_patch(plt.Circle((px, py), dot_radius, color=get_color(pid)))
                    ax.text(px, py, pid, color='black', fontsize=6, ha='center', va='center')

                ax.text(x0 + room_width / 2, y0 + room_height + 0.1,
                        f"R{round_idx+1}-Room{room_id}",
                        ha='center', va='bottom', fontsize=8)

        plt.savefig('plot.jpg')
        plt.show()

if debugPrintFlag: plot_each_game_separately(updated_stage_room_senders)

In [None]:
# Color based on opinion
def opinion_color(opinion):
    opinion = int(opinion)
    if opinion == 0:
        return "#b2182b"  # Strongly Disagree
    elif opinion == 1:
        return "#ef8a62"
    elif opinion == 2:
        return "#fddbc7"
    elif opinion == 3:
        return "#f7f7f7"  # Neutral
    elif opinion == 4:
        return "#d1e5f0"
    elif opinion == 5:
        return "#67a9cf"
    elif opinion == 6:
        return "#2166ac"  # Strongly Agree
    else:
        print('assigning fallback color', opinion)
        return "#e0e0e0"  # fallback / missing

In [None]:
# Determine edge weight based on opinion compatibility
def edge_weight(op1, op2):
    op1 = int(op1)
    op2 = int(op2)
    if op1 == 3 or op2 == 3:
        return 0
    if ((op1 in [0, 1, 2] and op2 in [0, 1, 2]) or
        (op1 in [4, 5, 6] and op2 in [4, 5, 6])):
        return 1
    return -1

# Plot network graphs
def plot_graph_networks(data, opinions_data):
    for game_id, stages in data.items():
        if game_id not in opinions_data:
            continue

        fig, axs = plt.subplots(1, len(stages), figsize=(5 * len(stages), 5))
        if len(stages) == 1:
            axs = [axs]

        gameparams = json.loads(game_dict_multi[gameID]['gameParams'][0][-1]['val'])
        topic_ID = json.loads(game_dict_multi[gameID]['topic'][0][-1]['val'])

        fig.suptitle(f"Game: {game_id}, \nCondition: {gameparams['condition']}, \nTopic: {gameparams['topics'][int(topic_ID)]}", fontsize=14)

        for round_idx, stage_data in enumerate(stages):
            G = nx.Graph()
            pos = {}
            node_colors = []

            room_offset = 0
            for room_id, participants in sorted(stage_data["rooms"].items(), key=lambda x: int(x[0])):
                center_x, center_y = room_offset * 6, 0
                for i, pid in enumerate(participants):
                    pid_str = str(pid)
                    node_id = f"{round_idx}_{pid_str}"
                    opinion = json.loads(opinions_data[game_id])[str(pid)]["opinion"]
                    G.add_node(node_id, opinion=opinion, label=pid_str)
                    angle = 2 * np.pi * i / len(participants)
                    pos[node_id] = (center_x + 2 * np.cos(angle), center_y + 2 * np.sin(angle))
                    node_colors.append(opinion_color(opinion))

                for i in range(len(participants)):
                    for j in range(i + 1, len(participants)):
                        pid_i = f"{round_idx}_{str(participants[i])}"
                        pid_j = f"{round_idx}_{str(participants[j])}"
                        op_i = json.loads(opinions_data[game_id])[str(participants[i])]["opinion"]
                        op_j = json.loads(opinions_data[game_id])[str(participants[j])]["opinion"]
                        weight = edge_weight(op_i, op_j)
                        G.add_edge(pid_i, pid_j, weight=weight)

                room_offset += 1

            ax = axs[round_idx]
            weights = [G[u][v]['weight'] for u, v in G.edges()]
            edge_colors = ['green' if w > 0 else 'red' if w < 0 else 'grey' for w in weights]
            nx.draw(
                G,
                pos,
                ax=ax,
                with_labels=True,
                labels=nx.get_node_attributes(G, "label"),
                node_color=node_colors,
                edge_color=edge_colors,
                node_size=500,
                font_size=8
            )
            edge_labels = nx.get_edge_attributes(G, "weight")
            nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8, ax=ax)
            ax.set_title(f"Round {round_idx + 1}")

        plt.tight_layout()
        plt.show()

if debugPrintFlag: plot_graph_networks(updated_stage_room_senders, participant_opinions_by_game)


# NetworkX Metric Calculation

In [None]:
def extract_node_scores(game_id, opinions_data):
    raw = json.loads(opinions_data[game_id])
    node_score = {}
    for pid in raw:
        if '-1' in pid: continue
        node_score[int(pid)] = raw[pid]['opinion']- 3
    return node_score

def extract_round_groupings(stage_data):
    return [list(map(int, participants)) for room, participants in stage_data["rooms"].items()]

def analyze_round(node_scores, round_data):
    all_ids = list(node_scores.keys())
    total_pairs = list(combinations(all_ids, 2))
    pop_distances = [abs(node_scores[i] - node_scores[j]) for i, j in total_pairs]
    pop_avg_distance = np.mean(pop_distances) if pop_distances else 0

    all_room_distances = []
    for room in round_data:
        if len(room) > 1:
            room_pairs = list(combinations(room, 2))
            room_distances = [abs(node_scores[i] - node_scores[j]) for i, j in room_pairs]
            all_room_distances.extend(room_distances)
    avg_within_group_distance = np.mean(all_room_distances) if all_room_distances else 0

    relative_distance_ratio = avg_within_group_distance / pop_avg_distance if pop_avg_distance > 0 else 0

    G = nx.Graph()
    G.add_nodes_from(node_scores.keys())
    for room in round_data:
        for i, j in combinations(room, 2):
            G.add_edge(i, j)
    nx.set_node_attributes(G, node_scores, "ideology")
    assortativity = nx.numeric_assortativity_coefficient(G, "ideology")

    group_means = [np.mean([node_scores[i] for i in room]) for room in round_data]
    pairwise_separation = [abs(a - b) for a, b in combinations(group_means, 2)]
    between_group_distance = np.mean(pairwise_separation) if pairwise_separation else 0
    normalized_separation = between_group_distance / pop_avg_distance if pop_avg_distance > 0 else 0
    distance_ratio = avg_within_group_distance / between_group_distance if between_group_distance > 0 else 0

    return {
        "rdr": relative_distance_ratio,
        "assortativity": assortativity,
        "norm_sep": normalized_separation,
        "wb_ratio": distance_ratio
    }

all_metrics = {}

for game_id, stages in stage_room_senders.items():
    if game_id not in participant_opinions_by_game:
        continue

    node_scores = extract_node_scores(game_id, participant_opinions_by_game)
    game_metrics = []

    for stage in stages:
        round_data = extract_round_groupings(stage)
        metrics = analyze_round(node_scores, round_data)
        metrics["stage"] = stage["stage"]
        game_metrics.append(metrics)

    all_metrics[game_id] = game_metrics

In [None]:
def plot_opinion_color_variant_with_metrics(data, opinions_data, metrics_data, game_dict_multi):
    for game_id, stages in data.items():
        stagetimeStamps = updated_stage_dict_multi[game_id]
        if game_id not in opinions_data: continue
        if game_id not in game_dict_multi: continue

        fig, ax = plt.subplots(figsize=(14, 9))

        gameparams = json.loads(game_dict_multi[game_id]['gameParams'][0][-1]['val'])
        topic_ID = json.loads(game_dict_multi[game_id]['topic'][0][-1]['val'])
        ax.set_title(f"Session {game_id}, {game_dict_multi[game_id]['trial_name'].iloc[0]}\nCondition: {gameparams['condition']}, \nTopic: {gameparams['topics'][int(topic_ID)]}", fontsize=14, loc='center', pad=20)

        room_width = 3
        room_height = 1.5
        h_spacing = 4
        v_spacing = 2.5
        dot_radius = 0.12
        dot_margin = 0.2

        max_rooms = max(len(stage["rooms"]) for stage in stages)
        num_rounds = len(stages)

        ax.set_xlim(-1, h_spacing * max_rooms + 2)
        ax.set_ylim(-1, v_spacing * num_rounds + 2)
        ax.axis('off')

        # Plot rooms and participants
        for round_idx, stage_data in enumerate(stages):
            rooms = stage_data["rooms"]
            y0 = v_spacing * (num_rounds - round_idx - 1)

            for room_idx, (room_id, participants) in enumerate(sorted(rooms.items(), key=lambda x: int(x[0]))):
                roomMessages = json.loads(game_dict_multi[game_id]['chatChannel-'+room_id][0][-1]['val'])
                allcounts = getMsgCount(roomMessages, stagetimeStamps) 
                x0 = room_idx * h_spacing

                rect = patches.FancyBboxPatch(
                    (x0, y0),
                    room_width,
                    room_height,
                    boxstyle="round,pad=0.02",
                    edgecolor='black',
                    facecolor='#f0f0f0',
                    linewidth=1
                )
                ax.add_patch(rect)

                max_per_row = int((room_width - 2 * dot_margin) / (2 * dot_radius))
                for i, pid in enumerate(participants):
                    col = i % max_per_row
                    row = i // max_per_row
                    px = x0 + dot_margin + dot_radius + col * 2 * dot_radius
                    py = y0 + room_height - dot_margin - dot_radius - row * 2 * dot_radius
                    opinion = json.loads(opinions_data[game_id])[str(pid)]["opinion"]
                    color = opinion_color(opinion)
                    ax.add_patch(plt.Circle((px, py), dot_radius, color=color))
                    ax.text(px, py, str(pid), color='black', fontsize=6, ha='center', va='center')

                # Retrieve message count for this room
                msg_count = allcounts[stage_data['stage']]
                ax.text(x0 + room_width / 2, y0 + room_height + 0.1,
                        f"Round {round_idx+1} - Room {room_id} ({msg_count} msgs)",
                        ha='center', va='bottom', fontsize=9)

            # Add metrics text on the right side of the plot
            if game_id in metrics_data:
                for iter_ in range(len(metrics_data[game_id])):
                    if metrics_data[game_id][iter_]['stage'] == stage_data['stage']:
                        metrics = metrics_data[game_id][iter_]
                metric_str = f'{metrics["assortativity"]:.3f}\n{metrics["wb_ratio"]:.3f}'
                ax.text(h_spacing * (max_rooms + 0.5), y0 + room_height / 2,
                        metric_str, va='center', ha='center', fontsize=11)

        # Add metric labels
        ax.text(h_spacing * (max_rooms + 0.5), v_spacing * num_rounds + 0.5, "Assortativity\nWithin/Between Ratio", ha='center', va='bottom', fontsize=12)

        plt.tight_layout()
        plt.savefig(game_id + '.jpg')
        print('Saved:', game_id + '.jpg')
        plt.show()

plot_opinion_color_variant_with_metrics(updated_stage_room_senders, participant_opinions_by_game, all_metrics, game_dict_multi)

In [None]:
# Suggestions

def getProvidedSuggestions(p):
    if empiricaColumnExists(p, 'suggestedReply'):
        results = []
        for each_suggestion in p['suggestedReply']:
            results.append(each_suggestion['val'])
        return results
def getCopiedSuggestions(p):
    if empiricaColumnExists(p, 'copySuggestion'):
        for each_suggestion in p['copySuggestion']:
            results.append(each_suggestion['val'])
        return results
def getAcceptedSuggestions(p):
    if empiricaColumnExists(p, 'sendSuggestion'):
        for each_suggestion in p['sendSuggestion']:
            results.append(each_suggestion['val'])
        return results
    
providedSuggestions = getMultiGameData(getProvidedSuggestions, game_dict_multi, player_dict_multi)
copiedSuggestions = getMultiGameData(getCopiedSuggestions, game_dict_multi, player_dict_multi)
acceptedSuggestions = getMultiGameData(getAcceptedSuggestions, game_dict_multi, player_dict_multi)

for gameID in providedSuggestions:
    if gameID in exclude_local_trails:  continue
    if debugPrintFlag: print(bf('Trial:'), gameID)
    for participantID in providedSuggestions[gameID]:
        if debugPrintFlag:
            print(bf('Participant:'), e2p[participantID])
            print('\t', bf('Suggestions provided:'), providedSuggestions[gameID][participantID])
            print('\t', bf('Suggestions edited:'), copiedSuggestions[gameID][participantID])
            print('\t', bf('Suggestions accepted:'), acceptedSuggestions[gameID][participantID])

In [None]:
# Summaries

def getSummaryText(p):
    if empiricaColumnExists(p, 'summaryText'):
        results = json.loads(p['summaryText'][-1]['val'])
        return results

def getSummaryAgreement(p):
    if empiricaColumnExists(p, 'summaryAgreement'):
        results = json.loads(p['summaryAgreement'][-1]['val'])
        return results
def getSuggestionRating(p):
    if empiricaColumnExists(p, 'suggestionRating'):
        results = json.loads(p['suggestionRating'][-1]['val'])
        return results
    
def getSuggestionExplanation(p):
    if empiricaColumnExists(p, 'suggestionExplanation'):
        results = json.loads(p['suggestionExplanation'][-1]['val'])
        return results

summaryText = getMultiGameData(getSummaryText, game_dict_multi, player_dict_multi)
summaryAgreement = getMultiGameData(getSummaryAgreement, game_dict_multi, player_dict_multi)
suggestionRating = getMultiGameData(getSuggestionRating, game_dict_multi, player_dict_multi)
suggestionExplanation = getMultiGameData(getSuggestionExplanation, game_dict_multi, player_dict_multi)

for gameID in summaryText:
    if gameID in exclude_local_trails:  continue
    if debugPrintFlag: print(bf('Trial:'), gameID)
    for participantID in summaryText[gameID]:
        if debugPrintFlag:
            print(bf('Participant:'), e2p[participantID])
            print('\t', bf('Summary Text:'), summaryText[gameID][participantID])
            print('\t', bf('Summary Agreement:'), summaryAgreement[gameID][participantID])
            print('\t', bf('Suggestion Rating:'), suggestionRating[gameID][participantID])
            print('\t', bf('Suggestion Explanation:'), suggestionExplanation[gameID][participantID])

## 4) Figures

In [None]:
# Put figure code here