In [462]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.decomposition import PCA
import statsmodels.stats.api as sms
from statsmodels.stats.multitest import multipletests

# Sanity Checks
Ensure the data look right.

In [463]:
# Filenames
multitask_cumulative_by_stage = '../output/conv/multi_task_output_conversation_level_stageId_cumulative.csv'
multitask_cumulative_by_stage_and_task = '../output/conv/multi_task_output_conversation_level_stageId_cumulative_within_task.csv'
multitask_cumulative_by_round_dv_last = '../output/conv/multi_task_output_conversation_level_roundId_last_cumulative.csv'
multitask_by_round_dv_last = '../output/conv/multi_task_output_conversation_level_roundId_last.csv'


In [464]:
# params
num_conversation_components = 5
min_num_chats = 0
desired_target = "score"
N_ITERS = 100

In [465]:
def drop_invariant_columns(df):
	"""
	Certain features are invariant throughout the training data (e.g., the entire column is 0 throughout).

	These feature obviously won't be very useful predictors, so we drop them.
	
	This function works by identifying columns that only have 1 unique value throughout the entire column,
	and then dropping them.

	@df: the dataframe containing the features (this should be X).
	"""
	nunique = df.nunique()
	cols_to_drop = nunique[nunique == 1].index
	return(df.drop(cols_to_drop, axis=1))

In [466]:
def read_and_preprocess_data(path, min_num_chats, num_conversation_components, conversation_id):
	conv_data  = pd.read_csv(path)

	# Fill NA with mean
	conv_data.fillna(conv_data.mean(numeric_only=True), inplace=True)

	# Filter this down to teams that have at least min_num of chats
	# Can also comment this out to re-run results on *all* conversations!
	conv_data = conv_data[conv_data["sum_num_messages"] >= min_num_chats]


	# Save the important information

	# DV
	dvs = conv_data[["score","speed","efficiency","raw_duration_min","default_duration_min"]]

	# Team Composition
	composition_colnames = ['birth_year', 'CRT', 'income_max', 'income_min', 'IRCS_GS', 'IRCS_GV', 'IRCS_IB', 'IRCS_IR',
				'IRCS_IV', 'IRCS_RS', 'political_fiscal', 'political_social', 'RME', 'country', 'education_level',
				'gender', 'marital_status', 'political_party', 'race', 'playerCount', 'conversation_num']
	
	# Select columns that contain the specified keywords
	composition = conv_data[[col for col in conv_data.columns if any(keyword in col for keyword in composition_colnames)]]

	# Task
	task = conv_data[['task', 'complexity', 'conversation_num']].copy()

	task_map_path = '../utils/task_map.csv' # get task map
	task_map = pd.read_csv(task_map_path)

	task_name_mapping = {
		"Moral Reasoning": "Moral Reasoning (Disciplinary Action Case)",
		"Wolf Goat Cabbage": "Wolf, goat and cabbage transfer",
		"Guess the Correlation": "Guessing the correlation",
		"Writing Story": "Writing story",
		"Room Assignment": "Room assignment task",
		"Allocating Resources": "Allocating resources to programs",
		"Divergent Association": "Divergent Association Task",
		"Word Construction": "Word construction from a subset of letters",
		"Whac a Mole": "Whac-A-Mole"
	}
	task.loc[:, 'task'] = task['task'].replace(task_name_mapping)
	task = pd.merge(left=task, right=task_map, on = "task", how='left')
	
	# Create dummy columns for 'complexity'
	complexity_dummies = pd.get_dummies(task['complexity'])
	task = pd.concat([task, complexity_dummies], axis=1)   
	task.drop(['complexity', 'task'], axis=1, inplace=True)

	conversation = conv_data.drop(columns= list(dvs.columns) + list(composition.columns))._get_numeric_data()
	conversation = drop_invariant_columns(conversation) # drop invariant conv features

	# additional preprocess --- get PC's of conversation to reduce dimensionality issues
	pca = PCA(n_components=num_conversation_components)
	pca_result = pca.fit_transform(conversation.transform(lambda x: (x - x.mean()) / x.std()))
	print("PCA explained variance:")
	print(np.sum(pca.explained_variance_ratio_))
	conversation = pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(pca_result.shape[1])])

	return composition, task, conversation, dvs

In [467]:
composition_mcbs, task_mcbs, conversation_mcbs, dvs_mcbs = read_and_preprocess_data(multitask_cumulative_by_stage, min_num_chats, num_conversation_components, conversation_id="stageId")

PCA explained variance:
0.40402318586039965


In [468]:
composition_mcbst, task_mcbst, conversation_mcbst, dvs_mcbst = read_and_preprocess_data(multitask_cumulative_by_stage_and_task, min_num_chats, num_conversation_components, conversation_id="stageId")

PCA explained variance:
0.34705540681863056


In [469]:
composition_mcbr, task_mcbr, conversation_mcbr, dvs_mcbr = read_and_preprocess_data(multitask_cumulative_by_round_dv_last, min_num_chats, num_conversation_components, conversation_id="roundId")

PCA explained variance:
0.4255788257848803


In [470]:
composition_mbr, task_mbr, conversation_mbr, dvs_mbr = read_and_preprocess_data(multitask_by_round_dv_last, min_num_chats, num_conversation_components, conversation_id="roundId")

PCA explained variance:
0.3724445336757018


## Assert that non-conversation columns are exactly the same

In [471]:
def show_non_matching_rows(df1, df2):
    # Identify non-matching rows
    non_matching_rows_df1 = df1[~(df1 == df2).all(axis=1)].copy()
    non_matching_rows_df2 = df2[~(df1 == df2).all(axis=1)].copy()

    # Add indicator column and concatenate DataFrames
    non_matching_rows_df1['Origin'] = 'DF1'
    non_matching_rows_df2['Origin'] = 'DF2'
    result_df = pd.concat([non_matching_rows_df1, non_matching_rows_df2])
    result_df = result_df.sort_values(by = ["conversation_num"])

    return result_df


### Task and composition features match for the stage-based analysis

In [472]:
show_non_matching_rows(task_mcbs, task_mcbst)

Unnamed: 0,conversation_num,Q1concept_behav,Q3type_1_planning,Q4type_2_generate,Q6type_5_cc,Q7type_7_battle,Q8type_8_performance,Q9divisible_unitary,Q10maximizing,Q11optimizing,...,Q23ss_out_uncert,Q24eureka_question,Q2intel_manip_1,Q21intellective_judg_1,Q5creativity_input_1,Q25_type6_mixed_motive,High,Low,Medium,Origin


In [473]:
show_non_matching_rows(composition_mcbs, composition_mcbst)

Unnamed: 0,conversation_num,playerCount,CRT_nanmean,CRT_nanstd,IRCS_GS_nanmean,IRCS_GS_nanstd,IRCS_GV_nanmean,IRCS_GV_nanstd,IRCS_IB_nanmean,IRCS_IB_nanstd,...,income_min_nanstd,marital_status_nanmean,marital_status_nanstd,political_fiscal_nanmean,political_fiscal_nanstd,political_party_nanmean,political_party_nanstd,political_social_nanmean,political_social_nanstd,Origin


### Task and composition features match for the round-based analysis

In [474]:
show_non_matching_rows(task_mcbr, task_mbr)

Unnamed: 0,conversation_num,Q1concept_behav,Q3type_1_planning,Q4type_2_generate,Q6type_5_cc,Q7type_7_battle,Q8type_8_performance,Q9divisible_unitary,Q10maximizing,Q11optimizing,...,Q23ss_out_uncert,Q24eureka_question,Q2intel_manip_1,Q21intellective_judg_1,Q5creativity_input_1,Q25_type6_mixed_motive,High,Low,Medium,Origin


In [475]:
show_non_matching_rows(composition_mcbr, composition_mbr)

Unnamed: 0,conversation_num,playerCount,CRT_nanmean,CRT_nanstd,IRCS_GS_nanmean,IRCS_GS_nanstd,IRCS_GV_nanmean,IRCS_GV_nanstd,IRCS_IB_nanmean,IRCS_IB_nanstd,...,income_min_nanstd,marital_status_nanmean,marital_status_nanstd,political_fiscal_nanmean,political_fiscal_nanstd,political_party_nanmean,political_party_nanstd,political_social_nanmean,political_social_nanstd,Origin


# Analysis of Results

In [476]:
mcbs_results = pd.read_csv('./multi_task_results/multitask_cumulative_by_stage_category_solo.csv')
mcbst_results = pd.read_csv('./multi_task_results/multitask_cumulative_by_stage_and_task_category_solo.csv')

mcbr_results = pd.read_csv('./multi_task_results/multitask_cumulative_by_round_dv_last_category_solo.csv')
mbr_results = pd.read_csv('./multi_task_results/multitask_by_round_dv_last_category_solo.csv')

In [477]:
def drop_unnamed_index(df):
    for col in df.columns:
        if 'Unnamed' in col:
            df = df.drop(col, axis =1)
    return df

In [478]:
mcbs_results = drop_unnamed_index(mcbs_results)
mcbst_results = drop_unnamed_index(mcbst_results)
mcbr_results = drop_unnamed_index(mcbr_results)
mbr_results = drop_unnamed_index(mbr_results)


In [493]:
def get_mean_sd_of_results(df):
    # Calculate mean and standard deviation for each column
    result_df = df.agg(['mean', 'std']).transpose()

    # Calculate 95% confidence interval for each column
    ci_df = df.agg(lambda x: sms.DescrStatsW(x.dropna()).tconfint_mean())
    
    # Transpose and rename columns for clarity
    ci_df = ci_df.transpose()
    ci_df.columns = ['95% CI Lower', '95% CI Upper']

    # Concatenate the mean, std, and confidence interval DataFrames
    result_df = pd.concat([result_df, ci_df], axis=1).apply(lambda x: round(x, 4))

    return result_df

In [495]:
def get_pairwise_t_tests(results):
    comparison1 = []
    comparison2 = []
    p_values = []

    for i in range(len(results.columns)):
        for j in range(i + 1, len(results.columns)):
                        
            facet1 = results.columns[i]
            facet2 = results.columns[j]

            obs1 = results[facet1].dropna()
            obs2 = results[facet2].dropna()

            t_stat, p_value = stats.ttest_ind(obs1, obs2)

            comparison1.append(facet1)
            comparison2.append(facet2)
            p_values.append(p_value)

    _, p_values_corrected, _, _ = multipletests(p_values, alpha=0.05, method='fdr_bh')

    pairwise_t_test = pd.DataFrame({"Facet 1": comparison1, "Facet2": comparison2, "p-value": p_values_corrected})
    pairwise_t_test["p-value"] = pairwise_t_test["p-value"].apply(lambda x: round(x, 3))

    return pairwise_t_test

In [487]:
mcbs_results = mcbs_results.rename(columns={"Communication Process": "Communication Process (Cumulative)"})

In [488]:
mcbst_results = mcbst_results.rename(columns={"Communication Process": "Communication Process (Task-Specific)"})

In [489]:
stage_results = pd.concat([mcbs_results, mcbst_results])

In [497]:
get_mean_sd_of_results(stage_results).to_csv('./multi_task_results/stage_based_results.csv')
get_mean_sd_of_results(stage_results)

Unnamed: 0,mean,std,95% CI Lower,95% CI Upper
Team Composition,0.0096,0.0113,0.0081,0.0112
Team Size,-0.0006,0.0031,-0.0011,-0.0002
Task Attributes,0.2187,0.0244,0.2153,0.2221
Task Complexity,0.0107,0.0086,0.0095,0.0119
Communication Process (Cumulative),0.0032,0.01,0.0012,0.0052
Communication Process (Task-Specific),0.0691,0.0202,0.0651,0.0731


In [456]:
get_pairwise_t_tests(mcbs_results)

Unnamed: 0,Facet 1,Facet2,p-value
0,Team Composition,Team Size,0.0
1,Team Composition,Task Attributes,0.0
2,Team Composition,Task Complexity,0.0445
3,Team Composition,Communication Process (Cumulative),0.0003
4,Team Size,Task Attributes,0.0
5,Team Size,Task Complexity,0.0
6,Team Size,Communication Process (Cumulative),0.0007
7,Task Attributes,Task Complexity,0.0
8,Task Attributes,Communication Process (Cumulative),0.0
9,Task Complexity,Communication Process (Cumulative),0.0


In [458]:
get_pairwise_t_tests(mcbst_results)

Unnamed: 0,Facet 1,Facet2,p-value
0,Team Composition,Team Size,0.0
1,Team Composition,Task Attributes,0.0
2,Team Composition,Task Complexity,0.8383
3,Team Composition,Communication Process (Task-Specific),0.0
4,Team Size,Task Attributes,0.0
5,Team Size,Task Complexity,0.0
6,Team Size,Communication Process (Task-Specific),0.0
7,Task Attributes,Task Complexity,0.0
8,Task Attributes,Communication Process (Task-Specific),0.0
9,Task Complexity,Communication Process (Task-Specific),0.0


In [433]:
mcbr_results = mcbr_results.rename(columns={"Communication Process": "Communication Process (Cumulative)"})

In [434]:
mbr_results = mbr_results.rename(columns={"Communication Process": "Communication Process (Task-Specific)"})

In [435]:
round_results = pd.concat([mcbr_results, mbr_results])

In [459]:
get_pairwise_t_tests(mcbr_results)

Unnamed: 0,Facet 1,Facet2,p-value
0,Team Composition,Team Size,0.0018
1,Team Composition,Task Attributes,0.0
2,Team Composition,Task Complexity,0.0076
3,Team Composition,Communication Process (Cumulative),0.4565
4,Team Size,Task Attributes,0.0
5,Team Size,Task Complexity,0.0
6,Team Size,Communication Process (Cumulative),0.0
7,Task Attributes,Task Complexity,0.0
8,Task Attributes,Communication Process (Cumulative),0.0
9,Task Complexity,Communication Process (Cumulative),0.0


In [460]:
get_pairwise_t_tests(mbr_results)

Unnamed: 0,Facet 1,Facet2,p-value
0,Team Composition,Team Size,0.0086
1,Team Composition,Task Attributes,0.0
2,Team Composition,Task Complexity,0.0003
3,Team Composition,Communication Process (Task-Specific),0.0
4,Team Size,Task Attributes,0.0
5,Team Size,Task Complexity,0.0
6,Team Size,Communication Process (Task-Specific),0.0
7,Task Attributes,Task Complexity,0.0
8,Task Attributes,Communication Process (Task-Specific),0.0
9,Task Complexity,Communication Process (Task-Specific),0.0


In [498]:
get_mean_sd_of_results(round_results).to_csv('./multi_task_results/round_based_results.csv')
get_mean_sd_of_results(round_results)

Unnamed: 0,mean,std,95% CI Lower,95% CI Upper
Team Composition,0.0045,0.0243,0.0011,0.0079
Team Size,-0.0048,0.0067,-0.0057,-0.0038
Task Attributes,0.1634,0.0431,0.1574,0.1694
Task Complexity,0.0395,0.025,0.036,0.043
Communication Process (Cumulative),-0.022,0.0278,-0.0275,-0.0165
Communication Process (Task-Specific),0.0304,0.0368,0.0231,0.0377
