In [7]:
import numpy as np
import pandas as pd
from scipy.spatial import distance
import matplotlib.pyplot as plt

In [8]:
def greedy_maximin(data, remaining, selected,  n, metric="euclidean"):
    """
        data: pandas dataframe(rows=points, cols=dimensions)
            points to choose from
            
        remaining: indexes of `data` that are candidates for the next point
        
        selected: indexes of `data` that have already been included
        
        n: number of additional points to include
        
        metric: "euclidean" "cityblock"
            distance measures from scipy.spatial.distance
    """
    distances = []
    for _ in range(n):
        dists = distance.cdist(data.loc[selected], data.loc[remaining], metric)
        closest = dists.min(axis=0)  # distance between candidates and closest selection
        distances.append(closest.max())
        add = remaining[np.argmax(closest)]
        selected += [add]
        remaining.remove(add)

    return selected, distances

In [9]:
def greedy_minimin(data, remaining, selected,  n, metric="euclidean"):
    """
        data: pandas dataframe(rows=points, cols=dimensions)
            points to choose from
            
        remaining: indexes of `data` that are candidates for the next point
        
        selected: indexes of `data` that have already been included
        
        n: number of additional points to include
        
        metric: "euclidean" "cityblock"
            distance measures from scipy.spatial.distance
    """
    distances = []
    for _ in range(n):
        dists = distance.cdist(data.loc[selected], data.loc[remaining], metric)
        closest = dists.min(axis=0)  # distance between candidates and closest selection
        distances.append(closest.min())
        add = remaining[np.argmin(closest)]
        selected += [add]
        remaining.remove(add)

    return selected, distances

In [10]:
def within_set_distance(data, selected, metric="euclidean"):
    return distance.cdist(data.loc[selected],data.loc[selected], metric) 

In [11]:
task_map = pd.read_csv("../task_map.csv", index_col="task")

In [12]:
task_map.head()

Unnamed: 0_level_0,Q1concept_behav,Q3type_1_planning,Q4type_2_generate,Q6type_5_cc,Q7type_7_battle,Q8type_8_performance,Q9divisible_unitary,Q10maximizing,Q11optimizing,Q13outcome_multip,...,Q18ans_recog,Q19time_solvability,Q20type_3_type_4,Q22confl_tradeoffs,Q23ss_out_uncert,Q24eureka_question,Q2intel_manip_1,Q21intellective_judg_1,Q5creativity_input_1,Q25_type6_mixed_motive
task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Categorization problem,0.0,0.166667,0.166667,0.125,0.333333,0.916667,0.217391,0.458333,0.875,0.958333,...,0.875,1.0,1.0,0.708333,0.75,0.25,0.054167,0.945833,0.266667,0
Mastermind,0.0,0.115385,0.115385,0.076923,0.2,0.076923,0.16,0.64,0.846154,0.923077,...,0.807692,0.923077,1.0,0.269231,0.461538,0.115385,0.103846,0.934615,0.234615,0
Logic Problem,0.0,0.269231,0.153846,0.115385,0.230769,0.153846,0.269231,0.576923,0.769231,0.961538,...,0.923077,0.961538,0.961538,0.384615,0.692308,0.307692,0.026923,0.961538,0.2,0
Sudoku,0.045455,0.136364,0.136364,0.090909,0.227273,0.954545,0.272727,0.272727,0.909091,0.954545,...,1.0,1.0,1.0,0.181818,0.045455,0.227273,0.077273,0.990909,0.181818,0
"Rank cities by population, rank words by familiarity",0.0,0.043478,0.0,0.086957,0.043478,0.0,0.136364,0.565217,0.73913,0.956522,...,0.608696,0.869565,1.0,0.086957,0.863636,0.043478,0.026087,0.947826,0.113043,0


Drop tasks that cannot be used to measure synergy:
- Either has no correct answer (e.g., designed to be 'impossible')
- Or has no individual component (e.g., Mixed-Motive = 0)

In [13]:
# # drop things that we can't play
# task_map.drop('Railroad Route Construction game (Impossible Version)', inplace = True)

# # drop things that are not playable by individuals (Mixed-Motive = 0)
# task_map = task_map[task_map["Q25_type6_mixed_motive"] == 0]

In [14]:
scores_dict = {}
for task in task_map.index:
    selected = [task]
    remaining = list(task_map.index.difference(selected))
    choices,scores = greedy_maximin(task_map, remaining, selected, n=10)
    scores_dict[task] = scores[9]

In [15]:
print(np.quantile(list(scores_dict.values()), [0.5, 0.95]))

[1.55209764 1.60547509]


In [16]:
pd.Series(scores_dict).idxmax()

'Railroad Route Construction game (Impossible Version)'

In [17]:
selected = [pd.Series(scores_dict).idxmax()]
remaining = list(task_map.index.difference(selected))
choices,scores = greedy_maximin(task_map, remaining, selected, n=9) # needs to be 9 because we already selected 1
choices

['Railroad Route Construction game (Impossible Version)',
 'Reproducing arts',
 'Best job candidate (hidden-profile)',
 'Putting food into categories',
 'Checkers',
 'Minimal Group Paradigm (study diversity)',
 'Whac-A-Mole',
 '9 Dot Problem',
 'Shopping plan',
 'TOPSIM - general mgmt business game']

In [18]:
# print the scores
scores

[2.5496655359581437,
 2.188780982478159,
 1.9504622562723126,
 1.9314821335512227,
 1.927431393517628,
 1.8565276466507805,
 1.7812782820507724,
 1.700180875381155,
 1.6703836894544493]

# Try to replicate for a 'closest cluster'

In [19]:
scores_dict_min = {}
for task in task_map.index:
    selected = [task]
    remaining = list(task_map.index.difference(selected))
    choices,scores = greedy_minimin(task_map, remaining, selected, n=10)
    scores_dict_min[task] = scores[9]

In [20]:
selected = [pd.Series(scores_dict_min).idxmin()]
remaining = list(task_map.index.difference(selected))
choices,scores = greedy_minimin(task_map, remaining, selected, n=9)
choices

['Arithmetic problem 1',
 'Euclidean traveling salesperson',
 'Abstract grid task',
 'Mastermind',
 'Logic Problem',
 'Guessing the correlation',
 'Random dot motion',
 'Letters-to-numbers problems (cryptography)',
 'Computer maze',
 'Recall images']