In [313]:
import pandas as pd
import numpy as np
import os
import re
from glob import glob
from io import StringIO
import requests
import ast
import json
import itertools
import random
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

# Load in necessary Raw Score Data for computing Synergy (Group Advantage) from Scratch

In [314]:
# python equivalent of CSV loader
def load_CSVs(pattern, rename=True):
	search_pattern = os.path.join('../..', f'**/{pattern}.csv')
	files = [
		path for path in glob(search_pattern, recursive=True)
		if re.search(rf'Wave [0-9] data.*/{pattern}.csv', path)
	]
	
	def read_and_process(file_path):
		df = pd.read_csv(file_path, low_memory=False)
		df = df.astype({col: 'str' for col in df.columns if re.search(r"(data\.score|duplicateCellID)", col)})
		return df

	data_frames = [read_and_process(file) for file in files]
	data = pd.concat(data_frames).drop_duplicates()

	if rename:
		adjusted_pattern = re.sub(r's$', '', pattern)
		adjusted_pattern = re.sub(r'-(\w)', lambda match: match.group(1).upper(), adjusted_pattern) + "Id"
		columns_to_drop = [col for col in data.columns if col.startswith(adjusted_pattern)]
		data = data.drop(columns=columns_to_drop)
		data = data.rename(columns=lambda x: adjusted_pattern if re.search(r"_id$", x) else x)
		
	return data

games = load_CSVs("games")
game_lobbies = load_CSVs("game-lobbies")
treatments = load_CSVs("treatments")  
factors = load_CSVs("factors")
factor_types = load_CSVs("factor-types")
lobby_configs = load_CSVs("lobby-configs")
batches = load_CSVs("batches")
rounds = load_CSVs("rounds")
stages = load_CSVs("stages")
players = load_CSVs("players")
player_logs = load_CSVs("player-logs")
player_rounds = load_CSVs("player-rounds")
player_stages = load_CSVs("player-stages")
player_inputs = load_CSVs("player-inputs")
offline_scoring = load_CSVs("offline scoring", False)

Cleaning all the main data

In [315]:
# hard-coded list of which wave a task is in
tasks_in_waves = [
	{
		'wave': 1,
		'tasks': [
			"Moral Reasoning",
			"Allocating Resources",
			"Writing Story",
			"Divergent Association",
			"Room Assignment",
			"Wolf Goat Cabbage",
			"Guess the Correlation",
			"Sudoku",
			"Whac a Mole",
			"Word Construction"
		]
	},
	{
		'wave': 2,
		'tasks': [
			"Logic Problem",
			"Unscramble Words",
			"Recall Word Lists",
			"Random Dot Motion",
			"Typing"
		]
	},
	{
		'wave': 3,
		'tasks': [
			"Putting Food Into Categories",
			"Recall Association",
			"hk_22482ca42486c72f Writing",
			"Wildcat Wells",
			"WildCam"
		]
	}
]

# Read in the task map
task_map_url = "https://raw.githubusercontent.com/Watts-Lab/task-mapping/master/task_map.csv"
task_map_csv = requests.get(task_map_url).text
task_map = pd.read_csv(StringIO(task_map_csv))

# all the renamings
task_map['task'] = task_map['task'].replace({
	"Allocating resources to programs": "Allocating Resources",
	"Guessing the correlation": "Guess the Correlation",
	"Moral Reasoning (Disciplinary Action Case)": "Moral Reasoning",
	"Whac-A-Mole": "Whac a Mole",
	"Divergent Association Task": "Divergent Association",
	"Room assignment task": "Room Assignment",
	"Wolf, goat and cabbage transfer": "Wolf Goat Cabbage",
	"Word construction from a subset of letters": "Word Construction",
	"Writing story": "Writing Story",
	"Unscramble words (anagrams)": "Unscramble Words",
	"Wildcam Gorongosa (Zooniverse)": "WildCam",
	"Putting food into categories": "Putting Food Into Categories",
	"Recall association": "Recall Association",
	"hk_22482ca42486c72f writing": "hk_22482ca42486c72f Writing",
	"Random dot motion": "Random Dot Motion",
	"Typing game": "Typing",
	"Recall word lists": "Recall Word Lists"
})

# get the wave for a given task
def get_wave(task):
	for wave_info in tasks_in_waves:
		if task in wave_info['tasks']:
			return wave_info['wave']
	return pd.NA

task_map['wave'] = task_map['task'].apply(get_wave)
task_map
# drop null tasks
task_map = task_map.dropna(subset=['task'])

# Get the McGrath categorical rating for all tasks
mcgrath_mapping = pd.read_csv("../data/20_task_map_mcgrath_manually_updated.csv")
mcgrath_mapping = mcgrath_mapping.melt(
	id_vars='task',
	value_vars=[col for col in mcgrath_mapping.columns if col.endswith('_cat')],
	var_name='mcgrath_type',
	value_name='value'
)
mcgrath_mapping = mcgrath_mapping[mcgrath_mapping['value'] == 1]
mcgrath_mapping['mcgrath_type'] = mcgrath_mapping['mcgrath_type'].str.replace('_cat', '')
mcgrath_mapping.drop(columns='value', inplace=True)

factor_info = factors[["factorId", "value", "factorTypeId"]].merge(
	factor_types[["factorTypeId", "name"]],
	on="factorTypeId",
	how="inner"
)
factor_info = factor_info[factor_info["name"].isin({"unitsSeed", "unitsIndex", "playerCount"})].merge(
	treatments.assign(factorId=treatments['factorIds'].str.split(',')).explode('factorId')[["factorId", "treatmentId"]],
).filter(items=["value", "name", "treatmentId"]).drop_duplicates()
conditions = factor_info.pivot(index="treatmentId", columns="name", values="value").reset_index().dropna()

player_conditions = players.merge(
	player_rounds[["playerId", "gameId"]].drop_duplicates(),
	on="playerId",
	how="left"
).merge(
	games[["gameId", "treatmentId"]],
	on="gameId",
	how="inner"
).merge(
	conditions,
	on="treatmentId",
	how="inner"
).drop(columns=["treatmentId", "gameId"])

complexity_levels = ["Low", "Medium", "High"]
playerCountLevels = [1, 3, 6]
synergy_levels = ["None", "Weak", "Strong"]

task_instances = stages[~(stages["displayName"].str.contains("Practice")) & ~(stages["displayName"].str.contains("Intro"))].dropna(subset=["data.constants"])
task_instances = (
	task_instances
	.assign(
		# Extracting the instance name
		initial_instance=lambda df: df['data.constants'].str.extract(r'"name":"(.*?)"', expand=False),
		
		# Mapping the name to numbers
		instance_number=lambda df: np.select(
			[
				df['initial_instance'].str.contains(r'zero', na=False),
				df['initial_instance'].str.contains(r'one', na=False),
				df['initial_instance'].str.contains(r'two', na=False),
				df['initial_instance'].str.contains(r'three', na=False),
				df['initial_instance'].str.contains(r'0', na=False),
				df['initial_instance'].str.contains(r'1', na=False),
				df['initial_instance'].str.contains(r'2', na=False),
				df['initial_instance'].str.contains(r'3', na=False)
			],
			[0, 1, 2, 3, 0, 1, 2, 3],
			default=np.nan
		)
	)
	.assign(
		# Calculating final instance
		instance=lambda df: np.where(
			df['initial_instance'].str.contains(r'dat instance', na=False),
			df['instance_number'] + 1,
			df['instance_number']
		)
	)
)
# Python is zero-indexed; so we need to transform this to get the codes to work
task_instances["instance"] = task_instances["instance"].astype(int)-1 
task_instances['complexity'] = pd.Categorical.from_codes(
	task_instances['instance'],
	categories=complexity_levels,
	ordered=True
)

task_instances = task_instances[['stageId', 'instance', 'data.constants', 'complexity']]
task_instances["instance"] = task_instances["instance"].astype(int)+1 # reset indexing to match R

merged_score_info = player_conditions[["playerId", "playerCount", "data.playerIds"]].dropna().merge(
	player_stages,
	on="playerId",
	how = "left"
).merge(
	stages[["stageId", "displayName", "startTimeAt", "data.stageLength", "data.defaultStageLength"]],
	on="stageId",
	how="left"
).merge(
	task_instances,
	on="stageId",
	how="left"
).merge(
	offline_scoring,
	on="stageId",
	how="left"
).dropna(subset = ["complexity"])

merged_score_info = (
	merged_score_info
	.assign(
		task=lambda df: df['displayName'].str.replace(r" Round.*", "", regex=True),
		score=lambda df: pd.to_numeric(np.where(df['score'].isna(), df['data.score'], df['score']), errors='coerce'),
		playerCount=lambda df: pd.Categorical(df['playerCount'], ordered=True)
	)
	.dropna(subset=['score'])
)

merged_score_info['duration'] = merged_score_info['data.stageLength'] / 60000

merged_score_info_with_wave_assigned = pd.DataFrame()
for group_keys, group_df in merged_score_info.groupby(['task', 'complexity'], observed=True):
  
	max_score = group_df['score'].dropna().max()
	if np.isnan(max_score) or max_score == 0:
		group_df['score'] = np.where(group_df['task'] == "Random Dot Motion", group_df['score'], 0)
	else:
		group_df['score'] = np.where(
			group_df['task'] == "Random Dot Motion",
			group_df['score'],
			np.clip(100 * group_df['score'] / max_score, 0, None)
		)
	
	group_df['efficiency'] = group_df['score'] / group_df['duration']
	group_df['wave'] = group_df['task'].apply(lambda task: get_wave(task))

	merged_score_info_with_wave_assigned = pd.concat(
		[merged_score_info_with_wave_assigned, group_df],
		ignore_index=True
	)

# merged_score_info_with_wave_assigned should have 17,789 rows

# Select and aggregate fields
merged_score_info_with_wave_assigned = merged_score_info_with_wave_assigned[['wave', 'task', 'complexity', 'playerCount', 'stageId', 'score', 'duration', 'efficiency', 'data.playerIds']].drop_duplicates()
merged_score_info_with_wave_assigned = merged_score_info_with_wave_assigned.dropna(subset=['efficiency', 'score'])

raw_score_data = (
	merged_score_info_with_wave_assigned
	.loc[:, ['wave', 'task', 'complexity', 'playerCount', 'stageId', 'score', 'duration', 'efficiency', 'data.playerIds']]
	.drop_duplicates()
	.dropna(subset=['efficiency', 'score']) ### THIS IS DROPPING MORE THAN I WANT
	.groupby(['stageId', 'task', 'complexity', 'playerCount', 'wave', 'data.playerIds'], observed=True)
	.agg({'score': 'max', 'duration': 'min', 'efficiency': 'max'})
	.reset_index()
)

# Rename columns
raw_score_data.rename(columns={'data.playerIds': 'playerIds'}, inplace=True)

# raw_score data should have 5,972 rows

In [316]:
raw_score_data["playerCount"] = raw_score_data["playerCount"].astype(int)
team_raw_score_data = raw_score_data[raw_score_data["playerCount"] > 1]
individual_raw_score_data = raw_score_data[raw_score_data["playerCount"] == 1]

# Recalculating nominal teams using deduplicated sum

In the main paper, we use two types of statistical aggregations for the 'nominal' teams: using the 'best' score from an individual player in a nominal team, and using the 'average' score from an individual player (aka a randomly selected player) from a nominal team.

For a subset of tasks, it might be possible to add another baseline: **combining (then de-duplicating) the individual units of output**. For example, a standard practice for idea generation tasks is to take all the individually-generated ideas, de-duplicate them, and then look at how groups do in contrast to that.

In our data, 4 tasks for which we can do this are:
1. **Word Construction**
2. **Unscramble Words**
3. **Putting Food Into Categories**
4. **Recall Association**

In [317]:
df_summable_answers_raw = pd.read_csv('../outputs/summable_answers_raw.csv')

In [318]:
df_summable_answers_raw.sample(5) # peek at raw score formats

Unnamed: 0,stageId,data.type,data.constants,data.answers
52,2cbSrYP43jpgTwKfu,PuttingFoodIntoCategories,"{""name"":""Putting Food Into Categories instance...","{""name"":""nBkXhWcAi5zEX5Jov"",""word"":""red vs. bl..."
606,xdGeoybpJ2xWknN2F,WordConstruction,"{""name"":""Word Construction instance zero"",""cal...","{""name"":""Cck3Sp73QHgk4JAeA"",""word"":""peak""}"
490,kFXF6GXyikCeooX7n,WordConstruction,"{""name"":""Word Construction instance zero"",""cal...","{""name"":""XCmbKAg7L2mErEgsf"",""word"":""peak""},{""n..."
1436,GPa6KAvjqqmWH38bj,RecallAssociation,"{""name"":""recall association instance 0"",""calcu...","{""target"":""fruit"",""words"":[{""player"":""A1ZIIU5H..."
598,4PYJSpeysv2X4KZzQ,WordConstruction,"{""name"":""Word Construction instance one"",""calc...","{""name"":""TS8EHZeFszZQBBFF6"",""word"":""arbs""},{""n..."


### Deduplicated Sum Evaluations

In [319]:
original_score_dfs = {}

#### Unscramble Words

In [320]:
unscramble_words = df_summable_answers_raw[df_summable_answers_raw["data.type"]=="UnscrambleWords"]

# evaluate unscramble words for a dataframe of players ("nominal team")
def evaluate_nominal_team_unscramble_words(df):
    all_correct_words = set()
    for data_answers in df['data.answers']:
        all_correct_words.update(evaluate_unscramble_words(data_answers))
    return all_correct_words

# evaluate unscramble words for ONE player
def evaluate_unscramble_words(data_answers):
    answer_list = ast.literal_eval(data_answers)
    if(type(answer_list) is dict): # in case there's only one answer, make it a list
        answer_list = [answer_list]

    correct_words = set()
    for answer in answer_list:
        correct_ans = answer['word'].lower().strip()
        input_ans = answer['input'].lower().strip()

        if correct_ans == input_ans: correct_words.add(correct_ans)
    return correct_words

# get the "original scores" for the REAL data
unscramble_words = unscramble_words.copy()
unscramble_words["score_real_raw"] = unscramble_words["data.answers"].apply(
    lambda x: len(evaluate_unscramble_words(x))
)
unscramble_words_orig_data = raw_score_data[raw_score_data["task"]=="Unscramble Words"]
unscramble_words_orig_data = unscramble_words_orig_data.merge(
   unscramble_words[["stageId", "score_real_raw"]],
   on="stageId",
   how="left"
)
unscramble_words_compiled = unscramble_words_orig_data[["stageId", "task", "complexity", "playerCount", "score_real_raw"]]

original_score_dfs["Unscramble Words"] = unscramble_words_compiled

#### Word Construction

In [321]:
word_construction = df_summable_answers_raw[df_summable_answers_raw["data.type"]=="WordConstruction"]

# evaluate word construction for a dataframe of players ("nominal team")
def evaluate_nominal_team_word_construction(df):
    all_correct_words = set()
    for data_constants, data_answers in zip(df['data.constants'], df['data.answers']):
        all_correct_words.update(evaluate_word_construction(data_constants, data_answers))
    return all_correct_words

# evaluate word construction for ONE player
def evaluate_word_construction(data_constants, data_answers):
    import json
    
    def extract_words_from_trie(node, prefix="", words=None):
        if words is None:
            words = set()
        if node.get("end", False):
            words.add(prefix)
        for child_key, child_node in node.get("children", {}).items():
            extract_words_from_trie(child_node, prefix + child_key, words)
        return words
    
    constants = json.loads(data_constants)
    # answers might be array or raw string of JSON objects
    answers = json.loads(f"[{data_answers}]") if not data_answers.strip().startswith("[") else json.loads(data_answers)

    # build set of valid words
    trie = constants["possible_trie"]
    valid_words = extract_words_from_trie(trie)

    # answers attempted by player
    participant_words = {a["word"].lower().strip() for a in answers}

    # correct = intersection
    correct_words = participant_words & {w.lower() for w in valid_words}
    return correct_words


# apply to the dataframe
word_construction = word_construction.copy()
word_construction["score_real_raw"] = word_construction.apply(
    lambda row: len(evaluate_word_construction(row["data.constants"], row["data.answers"])),
    axis=1
)

# merge back with raw_score_data (like unscramble words)
word_construction_orig_data = raw_score_data[raw_score_data["task"]=="Word Construction"]
word_construction_orig_data = word_construction_orig_data.merge(
   word_construction[["stageId", "score_real_raw"]],
   on="stageId",
   how="left"
)

# compile clean dataframe
word_construction_compiled = word_construction_orig_data[["stageId", "task", "complexity", "playerCount", "score_real_raw"]]
word_construction_compiled = word_construction_compiled.copy()
word_construction_compiled["score_real_raw"] = word_construction_compiled["score_real_raw"].fillna(0)

# add to dict
original_score_dfs["Word Construction"] = word_construction_compiled

#### Recall Association

In [322]:
recall_association = df_summable_answers_raw[df_summable_answers_raw["data.type"]=="RecallAssociation"]

# evaluate RecallAssociation for ONE player
def evaluate_recall_association(data_constants, data_answers):
    constants = json.loads(data_constants)

    # --- fix for answers not being a proper JSON array ---
    answers_str = data_answers.strip()
    if not answers_str.startswith("["):
        answers_str = f"[{answers_str}]"
    answers = json.loads(answers_str)
    # ------------------------------------------------------

    # build dictionary: target -> set(valid words)
    valid_lookup = {lst["target"].lower(): {w.lower().strip() for w in lst["words"]}
                    for lst in constants["lists"]}

    correct_words = set()
    for response in answers:
        target = response["target"].lower()
        if target not in valid_lookup:
            continue  # ignore unknown target

        valid_words = valid_lookup[target]
        for w in response["words"]:
            word = w["word"].lower().strip()
            if word in valid_words:
                correct_words.add(word)

    return correct_words

# pooled evaluation across MULTIPLE rows (nominal team)
def evaluate_nominal_team_recall_association(df):
    all_correct_words = set()
    for _, row in df.iterrows():
        all_correct_words.update(
            evaluate_recall_association(row["data.constants"], row["data.answers"])
        )
    return all_correct_words


recall_association = recall_association.copy()

# score for each row (one player)
recall_association["score_real_raw"] = recall_association.apply(
    lambda row: len(evaluate_recall_association(row["data.constants"], row["data.answers"])),
    axis=1
)

# merge back with raw_score_data
recall_association_orig_data = raw_score_data[raw_score_data["task"]=="Recall Association"]
recall_association_orig_data = recall_association_orig_data.merge(
   recall_association[["stageId", "score_real_raw"]],
   on="stageId",
   how="left"
)

# compile clean dataframe
recall_association_compiled = recall_association_orig_data[["stageId", "task", "complexity", "playerCount", "score_real_raw"]]

# add to dict
original_score_dfs["Recall Association"] = recall_association_compiled

#### Putting Food Into Categories

In [323]:
# TBD on this one; it's too complicated due to GPT API scoring...

### Nominal Team Generation --> Scaled Score

- We need to compute a new scaled score with the nominal team score
- Scaled score: min-max normalization * 100
- With the 'summed' values, there are now potentially greater max scores than before

So, what we'll do instead:
- For each task in the 4 summable tasks:
    * generate n = 100 nominal teams
    * log the new team AND nominal team AND individual scores
    * create a min-max normalization across {team, individual, nominal team}

In [324]:
def get_all_possible_nominal_teams(num_players, individual_data, task, complexity, MAX_ITER = 100000):
	# filter to the task and complexity
	filtered_data = individual_data[(individual_data["task"] == task) & (individual_data["complexity"] == complexity)]
	# get all possible ways to sample num_players (rows) from filtered_data
	if(len(list(itertools.combinations(filtered_data.index, num_players))) > MAX_ITER):
		# sample teams randomly if there are too many team combinations
		return random.sample(list(itertools.combinations(filtered_data.index, num_players)), MAX_ITER)

	return list(itertools.combinations(filtered_data.index, num_players))

In [325]:
def get_scores_for_nominal_team(nominal_teams_list, nominal_team_eval_func, individual_raw_score_data=individual_raw_score_data, df_summable_answers_raw=df_summable_answers_raw):
    scores = []
    for nominal_team in nominal_teams_list:
        selected_individuals = individual_raw_score_data.loc[list(nominal_team)]
        raw_scores_for_selected = df_summable_answers_raw[df_summable_answers_raw["stageId"].isin(selected_individuals["stageId"].values)]
        combined_score = nominal_team_eval_func(raw_scores_for_selected)
        scores.append(len(combined_score))
    return scores

In [326]:
evaluation_functions = {
    "Unscramble Words": evaluate_nominal_team_unscramble_words,
    "Word Construction": evaluate_nominal_team_word_construction,
    "Recall Association": evaluate_nominal_team_recall_association
}
nominal_dfs = {}

for task in ["Unscramble Words", "Word Construction", "Recall Association"]:
    individual_raw_score_data_task = individual_raw_score_data[individual_raw_score_data["task"]==task]

    nominal_teams_3_low = get_all_possible_nominal_teams(3, individual_raw_score_data_task, task, "Low")
    nominal_teams_3_medium = get_all_possible_nominal_teams(3, individual_raw_score_data_task, task, "Medium")
    nominal_teams_3_high = get_all_possible_nominal_teams(3, individual_raw_score_data_task, task, "High")

    nominal_teams_6_low = get_all_possible_nominal_teams(6, individual_raw_score_data_task, task, "Low")
    nominal_teams_6_medium = get_all_possible_nominal_teams(6, individual_raw_score_data_task, task, "Medium")
    nominal_teams_6_high = get_all_possible_nominal_teams(6, individual_raw_score_data_task, task, "High")

    # for each task condition, sample 100 nominal teams and evaluate their deduplicated sum scores
    random.seed(19104)
    NUM_NOMINAL_TEAMS = 100
    # put together a dataframe that saves the playerCount, complexity, and deduplicated sum scores
    nom_3_low_df = pd.DataFrame({
        "task": task,
        "playerCount": 3,
        "complexity": "Low",
        "deduplicated_sum_score": get_scores_for_nominal_team(random.sample(nominal_teams_3_low, NUM_NOMINAL_TEAMS), evaluation_functions[task])
    })
    nom_3_medium_df = pd.DataFrame({
        "task": task,
        "playerCount": 3,
        "complexity": "Medium",
        "deduplicated_sum_score": get_scores_for_nominal_team(random.sample(nominal_teams_3_medium, NUM_NOMINAL_TEAMS), evaluation_functions[task])
    })
    nom_3_high_df = pd.DataFrame({
        "task": task,
        "playerCount": 3,
        "complexity": "High",
        "deduplicated_sum_score": get_scores_for_nominal_team(random.sample(nominal_teams_3_high, NUM_NOMINAL_TEAMS), evaluation_functions[task])
    })
    nom_6_low_df = pd.DataFrame({
        "task": task,
        "playerCount": 6,
        "complexity": "Low",
        "deduplicated_sum_score": get_scores_for_nominal_team(random.sample(nominal_teams_6_low, NUM_NOMINAL_TEAMS), evaluation_functions[task])
    })
    nom_6_medium_df = pd.DataFrame({
        "task": task,
        "playerCount": 6,
        "complexity": "Medium",
        "deduplicated_sum_score": get_scores_for_nominal_team(random.sample(nominal_teams_6_medium, NUM_NOMINAL_TEAMS), evaluation_functions[task])
    })
    nom_6_high_df = pd.DataFrame({
        "task": task,
        "playerCount": 6,
        "complexity": "High",
        "deduplicated_sum_score": get_scores_for_nominal_team(random.sample(nominal_teams_6_high, NUM_NOMINAL_TEAMS), evaluation_functions[task])
    })

    # append all the dataframes together
    all_nominal_teams_df = pd.concat([
        nom_3_low_df,
        nom_3_medium_df,
        nom_3_high_df,
        nom_6_low_df,
        nom_6_medium_df,
        nom_6_high_df
    ], ignore_index=True)

    nominal_dfs[task] = all_nominal_teams_df

In [327]:
def min_max_normalize_to_100(series):
    min_val = series.min()
    max_val = series.max()
    if max_val == min_val:
        eries.apply(lambda x: 1)
    return ((series - min_val) / (max_val - min_val)) * 100

def normalize_task_scores(nominal_df, compiled_df, complexity_col="complexity"):
    # Prepare copies with stable positional keys
    nominal_df = nominal_df.copy()
    compiled_df = compiled_df.copy()
    nominal_df["_pos"] = np.arange(len(nominal_df))
    compiled_df["_pos"] = np.arange(len(compiled_df))

    # Stack both sets with a common value column
    nom_part = nominal_df[["task", complexity_col, "_pos", "deduplicated_sum_score"]].rename(
        columns={"deduplicated_sum_score": "value"}
    )
    nom_part["_src"] = "nom"

    comp_part = compiled_df[["task", complexity_col, "_pos", "score_real_raw"]].rename(
        columns={"score_real_raw": "value"}
    )
    comp_part["_src"] = "comp"

    combined = pd.concat([nom_part, comp_part], ignore_index=True)

    # Groupwise min–max within (task, complexity)
    grp = combined.groupby(["task", complexity_col])["value"]
    gmin = grp.transform("min")
    gmax = grp.transform("max")
    denom = gmax - gmin

    combined["normalized"] = ((combined["value"] - gmin) / denom.replace(0, 1)) * 100
    combined.loc[denom == 0, "normalized"] = 1  # constant groups → 1

    # Split normalized results and merge back via positional key
    nom_norm = combined.loc[combined["_src"] == "nom", ["_pos", "normalized"]]
    comp_norm = combined.loc[combined["_src"] == "comp", ["_pos", "normalized"]]

    nominal_df = nominal_df.merge(nom_norm, on="_pos", how="left").drop(columns="_pos")
    compiled_df = compiled_df.merge(comp_norm, on="_pos", how="left").drop(columns="_pos")

    nominal_df = nominal_df.rename(columns={"normalized": "deduplicated_sum_score_normalized"})
    compiled_df = compiled_df.rename(columns={"normalized": "score_real_raw_normalized"})

    return nominal_df, compiled_df

In [328]:
nominal_unscrambled, regular_unscrambled = normalize_task_scores(nominal_dfs["Unscramble Words"], unscramble_words_compiled)
nominal_word_construction, regular_word_construction = normalize_task_scores(nominal_dfs["Word Construction"], word_construction_compiled)
nominal_recall_association, regular_recall_association = normalize_task_scores(nominal_dfs["Recall Association"], recall_association_compiled)

In [329]:
nominal_all = (
    pd.concat([nominal_unscrambled, nominal_word_construction, nominal_recall_association], ignore_index=True)
)

nominal_avg = (
    nominal_all
    .groupby(["task", "complexity", "playerCount"])["deduplicated_sum_score_normalized"]
    .mean()
    .rename("nominal_avg_dedup_sum")
    .reset_index()
)

In [330]:
nominal_avg

Unnamed: 0,task,complexity,playerCount,nominal_avg_dedup_sum
0,Recall Association,High,3,48.920455
1,Recall Association,High,6,73.670455
2,Recall Association,Low,3,58.433333
3,Recall Association,Low,6,80.9
4,Recall Association,Medium,3,47.830508
5,Recall Association,Medium,6,70.644068
6,Unscramble Words,High,3,78.0
7,Unscramble Words,High,6,93.125
8,Unscramble Words,Low,3,94.625
9,Unscramble Words,Low,6,99.375


In [334]:
### divide the score for each "real" team with the average score for the corresponding nominal team
def normalize_against_nominal(regular_df, score_col="team_score"):
    # Join nominal averages onto the regular df and compute normalized score
    out = regular_df.merge(nominal_avg, on=["task", "complexity", "playerCount"], how="left")
    out["normalized_score"] = out[score_col] / out["nominal_avg_dedup_sum"]
    return out

team_unscrambled_normalized = normalize_against_nominal(regular_unscrambled[regular_unscrambled["playerCount"]>1], score_col="score_real_raw_normalized")
team_word_construction_normalized = normalize_against_nominal(regular_word_construction[regular_word_construction["playerCount"]>1], score_col="score_real_raw_normalized")
team_recall_association_normalized = normalize_against_nominal(regular_recall_association[regular_recall_association["playerCount"]>1], score_col="score_real_raw_normalized")

In [340]:
deduplicated_sum_synergy_df = pd.concat([
    team_unscrambled_normalized,
    team_word_construction_normalized,
    team_recall_association_normalized
], ignore_index=True).to_csv('../outputs/deduplicated_sum_synergy_df.csv', index=False)