## Load Data, Define Categories

In [1]:
from pprint import pprint
from importlib import reload
import pandas as pd
from questionnaire_helpers import scoring
from scipy.stats import ttest_ind, f_oneway, ks_2samp, pearsonr
reload(scoring)

recompute=True

conditions = {
    "LipSync": [6, 7, 8, 9, 11],
    "FaceTracking": [1, 2, 3, 4, 10],
}

# category: from_column, to_column
categories = {
    "demographic": (0, 2),
    "acquaintance": (3, 3),
    "self-rating": (4, 18),
    "isq-pre": (19, 23),
    "ssq-pre": (24, 39),
    "ssq-post": (40, 55),
    "isq-post": (56, 60),
    "overlap": (61, 61),
    "iqq": (62, 75),
    "other-rating": (76, 90),
}

def get_answers_by_condition(df, c_name):
    return df[df.index.map(lambda x: x[0] in str(conditions[c_name]))]

def get_answers_by_category(df, q_name):
    """ Returns all answers for certain questionnaire.
    :param df: df with all answers.
    :param q_name: Name of questionnaire.
    :param q_cols: Dict with columns of questionnaire as number pairs (from, to).
    :return: pandas.DataFrame with answers for questionnaire.
    """
    from_col, to_col = categories[q_name]
    return df.iloc[:, from_col:to_col+1]

def group_by_dyad(_df):
    _df["Dyad"] = _df.index.str.split('_').map(lambda x: x[0])
    _df["Dyad"] = _df["Dyad"].astype(float)
    return _df.groupby("Dyad")

def group_by_suffix(_df):
    _df["Subject"] = _df.index.str.split('_').map(lambda x: x[1])
    _df["Subject"] = _df["Subject"].astype(float)
    return _df.groupby("Subject")

df = pd.read_csv("./prepped_data/questionnaire/prepped_questionnaire.csv", index_col='ID')
df_res = pd.read_csv("./out/result/results_sync.csv", index_col='Dyad')
if recompute:
    df_res = df_res[["Synchrony", "Lag"]]

df_res

FileNotFoundError: [Errno 2] No such file or directory: './prepped_data/questionnaire/prepped_questionnaire.csv'

In [None]:
df_new_res = pd.DataFrame()
for idx, row in df_res.iterrows():
    new_idx = (f"{str(idx)}_1", f"{str(idx)}_2")
    row.name = new_idx[0]
    df_new_res = df_new_res.append(row)
    row.name = new_idx[1]
    df_new_res = df_new_res.append(row)
df_res = df_new_res

## Demographic Data

In [None]:
df_demo = get_answers_by_category(df, "demographic")
print("Mean age:", df_demo["Please enter your age.."].mean(),
      "Var age:", df_demo["Please enter your age.."].std(),
      "Gender:", df_demo["Please select your gender..."].value_counts(),
      sep='\n')

In [None]:
df_demo["Please enter your age.."].max()

## Big Five Personality Traits
### Get Answers By For Self- And Other-Rating

In [None]:
df_self_rating = get_answers_by_category(df, "self-rating")
df_self_scores = scoring.score_df(df_self_rating, scoring.sk_bfi, 8, "I see myself as someone who... [{}]")

df_other_rating = get_answers_by_category(df, "other-rating")
df_other_scores = scoring.score_df(df_other_rating, scoring.sk_bfi, 8, "I see my negotiation partner as someone who... [{}]")

pd.concat([df_other_scores.add_prefix("SELF_"), df_self_scores.add_prefix("OTHER_")], axis=1, )

### Calculate Differences
How did the rating of others deviate from self-rating?
Use absolute errors

In [None]:
df_bfi_diff = pd.DataFrame()
for subj, self_scores in df_self_scores.iterrows():
    if subj.endswith('_1'):
        other_idx = subj.split('_')[0] + '_2'
    else:
        other_idx = subj.split('_')[0] + '_1'
    diff_row = df_other_scores.loc[other_idx] - self_scores
    diff_row.name = subj
    df_bfi_diff = df_bfi_diff.append(diff_row)

#df_bfi_diff = df_bfi_diff.applymap(abs)
df_bfi_diff

In [None]:
df_bfi_diff.sum(axis=1).hist(figsize=(10,10))

Mean differences:

In [None]:
bfi_diff_ls = get_answers_by_condition(df_bfi_diff, "LipSync")
bfi_diff_ft = get_answers_by_condition(df_bfi_diff, "FaceTracking")

print("LipSync",
      "=======",
      bfi_diff_ls.describe(),
      '',
      "FaceTracking",
      "============",
      bfi_diff_ft.describe(),
      sep='\n')

In [None]:
df_ia_res = pd.DataFrame(columns=["Subject","IA", "IA_p"])
for subj, self_scores in df_self_scores.iterrows():
    if subj.endswith('_1'):
        other_idx = subj.split('_')[0] + '_2'
    else:
        other_idx = subj.split('_')[0] + '_1'
    r, p = pearsonr(df_other_scores.loc[other_idx], self_scores)
    df_ia_res = df_ia_res.append({"Subject": subj, "IA": r, "IA_p": p}, ignore_index=True)
df_ia_res = df_ia_res.set_index("Subject")

# df_ia_res = group_by_dyad(df_ia_res).mean()
# df_ia_res.index = df_ia_res.index.map(pd.to_numeric)
df_ia_res

In [None]:
print(df_res, df_ia_res, sep='\n')

In [None]:
df_res = df_res.join(df_ia_res.drop(columns=["IA_p"]))
df_res

In [None]:
results = {
    "Neuroticism": ttest_ind(bfi_diff_ft["Neuroticism"], bfi_diff_ls["Neuroticism"], alternative="less"),
    "Extraversion": ttest_ind(bfi_diff_ft["Extraversion"], bfi_diff_ls["Extraversion"], alternative="less"),
    "Openness": ttest_ind(bfi_diff_ft["Openness"], bfi_diff_ls["Openness"], alternative="less"),
    "Agreeableness": ttest_ind(bfi_diff_ft["Agreeableness"], bfi_diff_ls["Agreeableness"], alternative="less"),
    "Conscientiousness": ttest_ind(bfi_diff_ft["Conscientiousness"], bfi_diff_ls["Conscientiousness"], alternative="less")
    }

pprint(results)

In [None]:

results = {
    "Neuroticism": ks_2samp(bfi_diff_ft["Neuroticism"], bfi_diff_ls["Neuroticism"], alternative="less"),
    "Extraversion": ks_2samp(bfi_diff_ft["Extraversion"], bfi_diff_ls["Extraversion"], alternative="less"),
    "Openness": ks_2samp(bfi_diff_ft["Openness"], bfi_diff_ls["Openness"], alternative="less"),
    "Agreeableness": ks_2samp(bfi_diff_ft["Agreeableness"], bfi_diff_ls["Agreeableness"], alternative="less"),
    "Conscientiousness": ks_2samp(bfi_diff_ft["Conscientiousness"], bfi_diff_ls["Conscientiousness"], alternative="less")
    }

pprint(results)

## Simulator Sickness Questionaire
### Get Answers By For SSQ, Compare Before and After
Scoring from https://conservancy.umn.edu/bitstream/handle/11299/201892/SSQ%20Scoring.pdf?sequence=9&isAllowed=y

In [None]:
def apply_ssq_conversion(df):
    df = df.copy()
    df["Nausea"] = df["Nausea"] * 9.54
    df["Oculomotor"] = df["Oculomotor"] * 7.58
    df["Disorientation"] = df["Disorientation"] * 13.92
    df["Total"] = df["Total"] * 3.74 * (3/7)
    return df

# PRE NEGOTIATION
# ==============================
df_ssq_pre = get_answers_by_category(df, "ssq-pre")

df_ssq_pre_score = scoring.score_df(
    df_ssq_pre, scoring.sk_ssq, 3,
    "Please rate each item on the scale. Consider how you are feeling right now. [{}]")
df_ssq_pre_score = apply_ssq_conversion(df_ssq_pre_score)


# POST NEGOTIATION
# ==============================
df_ssq_post = get_answers_by_category(df, "ssq-post")
df_ssq_post.columns = df_ssq_post.columns.str.rstrip('.1')  # Remove '.1' from LimeSurvey

df_ssq_post_score = scoring.score_df(
    df_ssq_post, scoring.sk_ssq, 3,
    "Please rate each item on the scale. Consider how you are feeling right now. [{}]")

df_ssq_post_score = apply_ssq_conversion(df_ssq_post_score)

In [None]:

import scipy.stats as stats
stats.shapiro(df_ssq_pre_score["Total"])

In [None]:
stats.shapiro(df_ssq_post_score["Total"])

In [None]:
stats.wilcoxon(df_ssq_post_score["Total"], df_ssq_pre_score["Total"])

In [None]:
print(df_ssq_post_score["Total"], df_ssq_pre_score["Total"])

## Interaction Quality Questionnaire
#

In [None]:
df_iqq = get_answers_by_category(df, "iqq")
df_iqq

In [None]:
df_iqq_score = scoring.score_df(
    df_iqq, scoring.sk_iqq, max_score=7,
    str_fmt="Please select the answer that best describes your experience during the negotiation. [{}]")

df_iqq_score[["Trust", "Rapport"]] /= 5
df_iqq_score[["Smoothness", "Similarity"]] /= 2
#df_iqq_res = group_by_dyad(df_iqq_score).mean()
# df_iqq_res.index = df_iqq_res.index.map(pd.to_numeric)
df_iqq_res = df_iqq_score

In [None]:
df_res = df_res.join(df_iqq_res)


## Inventory Satisfaction Questionaire
#

In [None]:
df_isq = pd.concat([get_answers_by_category(df, "isq-pre"), get_answers_by_category(df, "isq-post")], axis=1)

for i in [0, 1, 2, 3, 6]:
    df_isq.iloc[:, i] = df_isq.iloc[:, i].apply(lambda x: x * 1000 if x < 100 else x)

df_isq["Did your standards for satisfaction…"] = df_isq["Did your standards for satisfaction…"].replace({3: 2, 2: 1, 1: 3})
df_isq

In [None]:
df_isq_res = pd.DataFrame()
df_isq_res["outcome_satisfaction"] = df_isq["How would you now describe the outcome?"]
df_isq_res["compromise"] = (df_isq["For what amount?"] - df_isq["Please fill in the settlement amount which would result in the following levels of satisfaction for you (note: marginally satisfactory must equal the bottom line you've been given in your Confidential Instructions) [Optimal]"])
df_isq_res["satisfaction_change"] = df_isq["Did your standards for satisfaction…"]

df_isq_res

df_res = df_res.join(df_isq_res)

## Self-Other Overlap

In [None]:
s_overlap = get_answers_by_category(df, "overlap")
#s_overlap = group_by_dyad(s_overlap).mean().drop(1).iloc[:, 0]
s_overlap.name = "Overlap"
df_res = df_res.join(s_overlap)


## Prior Knowledge

In [None]:
df_know_res = get_answers_by_category(df, "acquaintance")
# df_know_res = group_by_dyad(df_know_res).mean()
df_know_res = df_know_res.iloc[:,0]
df_know_res.name = "Prior_Knowing"
df_res = df_res.join(df_know_res)

## Visualizations

In [None]:
df_res.sort_index().to_csv("out/result/results_dyadic.csv")

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def corr_sig(df=None):
    df = df.copy()
    p_matrix = np.zeros(shape=(df.shape[1],df.shape[1]))
    df = df.fillna(0)
    for col in df.columns:
        for col2 in df.drop(col,axis=1).columns:
            _, p = pearsonr(df[col], df[col2])
            p_matrix[df.columns.to_list().index(col), df.columns.to_list().index(col2)] = p
    return p_matrix

fig, ax = plt.subplots(figsize=(10,10))         # Sample figsize in inches

corr = df_res.corr()
p_values = corr_sig(df_res)
p_mask = np.invert(np.tril(p_values<0.05))

annot = corr.copy()
for i, row in enumerate(p_values):
    for j, val in enumerate(row):
        annot.iloc[i, j] = f"r={annot.iat[i, j]:.2f}\np={val:.2f}"


sns.heatmap(
    corr,
    mask=p_mask,
    xticklabels=corr.columns,
    yticklabels=corr.columns,
    annot=annot,
    fmt='',
    vmin=-1, vmax=1,
    cmap=sns.color_palette("magma", as_cmap=True),
    ax=ax
)

