In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statistics import mean
from scipy import stats

In [None]:
results_folder = os.getcwd()
csv_details = "EXPERIMENTS.csv"
csv_explicit = "explicit.csv"
csv_implicit = "implicit.csv"
csv_init = "init.csv"

folder_plots = "plots/trust/"

In [None]:
exp_details = pd.read_csv(results_folder+"/"+csv_details)
survey_explicit = pd.read_csv(results_folder+"/"+csv_explicit)
survey_implicit = pd.read_csv(results_folder+"/"+csv_implicit)
survey_init = pd.read_csv(results_folder+"/"+csv_init)

# Isolate columns that we want
for i in range(3):
    survey_init.drop(survey_init.columns[[0]], axis=1, inplace=True)

for i in range(1):
    survey_implicit.drop(survey_implicit.columns[[0]], axis=1, inplace=True)
    survey_explicit.drop(survey_explicit.columns[[0]], axis=1, inplace=True)
    
for i in range(24): # remove godspeed
    survey_implicit.drop(survey_implicit.columns[[-1]], axis=1, inplace=True)
    survey_explicit.drop(survey_explicit.columns[[-1]], axis=1, inplace=True)
    
for i in range(12): # remove trust extra
    survey_implicit.drop(survey_implicit.columns[[-1]], axis=1, inplace=True)
    survey_explicit.drop(survey_explicit.columns[[-1]], axis=1, inplace=True)
    survey_init.drop(survey_init.columns[[-1]], axis=1, inplace=True)

# CREATE TRUST CSV
Function successfully; Act consistently; Reliable; Predictable; Dependable; Follow directions; Meet the needs of the mission; Perform exactly as instructed; Have errors (Reverse Coded); Provide appropriate information; Unresponsive (Reverse Coded); Malfunction (Reverse Coded); Communicate with people; Provide feedback

In [None]:
def get_trust_scores(df_survey):
    trust_scores = []
    
    for index in range(len(df_survey)):
        trust_subscale = df_survey.iloc[index].tolist() # 14 items
        if len(trust_subscale) != 14:
            print("Error: subscale hasn't 14 items.")
        # From percentage (xx%) to float (0.xx)
        for i in range(len(trust_subscale)):
            trust_subscale[i] = float(trust_subscale[i].replace('%', 'e-2'))

        # REVERSE: Have errors (Reverse Coded); Unresponsive (Reverse Coded); Malfunction (Reverse Coded)
        for i in range(1, 4):
            trust_subscale[-i] = 1-trust_subscale[-i]

        trust_scores.append(np.round(mean(trust_subscale), 4))
        
    return trust_scores

In [None]:
trust_init_scores = get_trust_scores(survey_init)
trust_implicit_scores = get_trust_scores(survey_implicit)
trust_explicit_scores = get_trust_scores(survey_explicit)

# Create list of first and second surveys (explicit, implicit)
first_surveys = []
second_surveys = []
for index in np.unique(exp_details['ID']):
    order_behaviors = exp_details[exp_details['ID'] == index]['BEHAVIOR'].tolist()
    first_surveys.append(order_behaviors[0])
    second_surveys.append(order_behaviors[1])

# Create list with trust scores based on survey done
first_trust = []
second_trust = []
for i in range(len(first_surveys)):
    if first_surveys[i] == 'explicit' and second_surveys[i] == 'implicit':
        first_trust.append(trust_explicit_scores[i])
        second_trust.append(trust_implicit_scores[i])
    elif first_surveys[i] == 'implicit' and second_surveys[i] == 'explicit':
        first_trust.append(trust_implicit_scores[i])
        second_trust.append(trust_explicit_scores[i])
    else:
        print("ERROR: in explicit or implicit trust sequence.")

In [None]:
df = pd.DataFrame(np.unique(exp_details['ID']), columns = ['ID'] )
df['INIT_TRUST'] = trust_init_scores
df['FIRST_SURVEY'] = first_surveys
df['FIRST_TRUST'] = first_trust
df['SECOND_SURVEY'] = second_surveys
df['SECOND_TRUST'] = second_trust
df

In [None]:
df.to_csv("results_trust.csv", index=False)

# ANALYSIS

In [None]:
'''
def update_ax(df, ax, indexes_1, indexes_2, color):
    col_0 = df["INIT_TRUST"][indexes_1].tolist()
    col_1 = df["FIRST_TRUST"][indexes_1].tolist()
    col_2 = df["FIRST_TRUST"][indexes_2].tolist()
    col_3 = df["SECOND_TRUST"][indexes_2].tolist()
    ax.plot(["INIT", "FIRST"], [col_0, col_1], color+'o-')
    ax.plot(["FIRST", "SECOND"], [col_2, col_3], color+'o-') 
    for i in range(len(indexes_1)):
        plt.text(['INIT'], col_0[i], indexes_1[i], fontsize=12)
    for i in range(len(indexes_2)):
        plt.text(['SECOND'], col_3[i], indexes_2[i], fontsize=12)
'''

In [None]:
# Plots' utilities
def update_ax_mean(df, ax, indexes_1, indexes_2, color_1, color_2):
    indexes = list(set(indexes_1).intersection(set(indexes_2)))
    if len(indexes) > 1:
        init_mean = np.mean(df["INIT_TRUST"][indexes].tolist())*100
        first_mean = np.mean(df["FIRST_TRUST"][indexes].tolist())*100
        second_mean = np.mean(df["SECOND_TRUST"][indexes].tolist())*100
        ax.plot(["INIT", "FIRST"], [init_mean, first_mean], color_1+'o--', markersize=7)
        ax.plot(["FIRST", "SECOND"], [first_mean, second_mean], color_2+'o--', markersize=7)
    return len(indexes)
    

color_true_pred = "g"
color_younger_pred = "b"
color_older_pred = "r"

# Plot legend
from matplotlib.patches import Patch
from matplotlib.lines import Line2D

legend_elements = [Line2D([0], [0], color=color_true_pred, linestyle="--", label="Right age group"),
                   Line2D([0], [0], color=color_younger_pred, linestyle="--", label="Younger age group"),
                   Line2D([0], [0], color=color_older_pred, linestyle="--", label="Older age group or wrong gender")
                  ]

In [None]:
df = pd.read_csv("results_trust.csv")
#df.drop(df[df['ID'] == 59].index, inplace=True)
df

In [None]:
# Dataframe with sequence explicit and then implicit
df_ei = df[df['FIRST_SURVEY'] == "explicit"]
# Dataframe with sequence implicit and then explicit
df_ie = df[df['FIRST_SURVEY'] == "implicit"]

In [None]:
'''
sections = ['INIT', 'FIRST', 'SECOND']
statistics_ei = pd.DataFrame(sections, columns = ['TRUST'])
statistics_ie = pd.DataFrame(sections, columns = ['TRUST'])

def get_statistics(statistics, df, indexes, title):
    for section in sections:
        col = df[section+"_TRUST"][indexes].tolist()            
        row = statistics[statistics['TRUST'] == section].index
        statistics.loc[row, 'MEAN_'+title] = np.round(mean(col), 2)
        statistics.loc[row, 'STD_'+title] = np.round(np.std(col), 2)

        #statistic, pvalue = stats.kstest(col_1, col_2)
        #statistics.loc[row, 'PVALUE'] = np.round(pvalue, 2)
    return statistics
'''

### EXPLICIT + IMPLICIT

In [None]:
# WRONG PREDICTION SAID IN THE first PART WITH explicit BEHAVIOR
id_true_pred = [0, 4, 6, 8, 10, 14, 16, 18, 20, 24, 26, 28, 36, 40, 42, 44, 46, 50, 54]
id_younger_pred = [34, 48, 52, 56, 60]
id_older_pred = [2, 12, 22, 30, 32, 38, 58]

# WRONG PREDICTION IN THE second PART WITH implicit BEHAVIOR
id_true_pred_2 = [4, 6, 8, 10, 12, 16, 20, 24, 28, 30, 38, 40, 42, 44, 46, 50, 54, 58, 60]
id_younger_pred_2 = [0, 18, 26, 34, 36, 48, 52, 56]
id_older_pred_2 = [2, 14, 22, 32]

In [None]:
# IGNORED: because in this case there isn't correlation between first and second results
# it is considered only when it's right or wrong in the first part
'''
statistics_ei = get_statistics(statistics_ei, df_ei, id_true_pred, "TRUE")
statistics_ei = get_statistics(statistics_ei, df_ei, id_younger_pred, "YOUNGER")
#statistics_ei = get_statistics(statistics_ei, df_ei, id_older_pred, "OLDER")
print("Statistics when wrong prediction was said in the FIRST part that was EXPLICIT")
statistics_ei
'''

# it is considered only when it's right or wrong in the second part
'''statistics_ei = get_statistics(statistics_ei, df_ei, id_true_pred_2, "TRUE")
statistics_ei = get_statistics(statistics_ei, df_ei, id_younger_pred_2, "YOUNGER")
statistics_ei = get_statistics(statistics_ei, df_ei, id_older_pred_2, "OLDER")
print("Statistics when wrong prediction was said in the SECOND part that was IMPLICIT")
statistics_ei
'''

In [None]:
# ALL subjects E+I
'''
fig, ax = plt.subplots(1,1,figsize=(15, 15)) 

update_ax(df_ei, ax, id_true_pred, id_true_pred_2, color_true_pred)
update_ax(df_ei, ax, id_younger_pred, id_younger_pred_2, color_younger_pred)
update_ax(df_ei, ax, id_older_pred, id_older_pred_2, color_older_pred)

plt.show()

# TRUE-TRUE
fig, ax = plt.subplots(1,1,figsize=(15, 15)) 

color_true_pred = "g"
color_younger_pred = "b"
color_older_pred = "r"

id_both_true_pred = list(set(id_true_pred).intersection(set(id_true_pred_2)))

update_ax(df_ei, ax, id_both_true_pred, id_both_true_pred, color_true_pred)

plt.show()

# YOUNGER-OLDER or OLDER-YOUNGER
# TRUE-YOUNGER or YOUNGER-TRUE
# TRUE-OLDER or OLDER-TRUE
# YOUNGER-YOUNGER or OLDER-OLDER
fig, ax = plt.subplots(1,1,figsize=(15, 15)) 

id_not_both_true_pred = list(set(id_true_pred) ^ (set(id_true_pred_2)))

update_ax(df_ei, ax, id_not_both_true_pred, id_not_both_true_pred, color_true_pred)
update_ax(df_ei, ax, id_younger_pred, id_younger_pred_2, color_younger_pred)
update_ax(df_ei, ax, id_older_pred, id_older_pred_2, color_older_pred)

plt.show()
'''

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15, 10)) 

numerosity = {}

numerosity['true-true'] = update_ax_mean(df_ei, ax, id_true_pred, id_true_pred_2, color_true_pred, color_true_pred)
numerosity['true-younger'] = update_ax_mean(df_ei, ax, id_true_pred, id_younger_pred_2, color_true_pred, color_younger_pred)
numerosity['true-older'] = update_ax_mean(df_ei, ax, id_true_pred, id_older_pred_2, color_true_pred, color_older_pred)

numerosity['younger-true'] = update_ax_mean(df_ei, ax, id_younger_pred, id_true_pred_2, color_younger_pred, color_true_pred)
numerosity['younger-younger'] = update_ax_mean(df_ei, ax, id_younger_pred, id_younger_pred_2, color_younger_pred, color_younger_pred)
numerosity['younger-older'] = update_ax_mean(df_ei, ax, id_younger_pred, id_older_pred_2, color_younger_pred, color_older_pred)

numerosity['older-true'] = update_ax_mean(df_ei, ax, id_older_pred, id_true_pred_2, color_older_pred, color_true_pred)
numerosity['older-younger'] = update_ax_mean(df_ei, ax, id_older_pred, id_younger_pred_2, color_older_pred, color_younger_pred)
numerosity['older-older'] = update_ax_mean(df_ei, ax, id_older_pred, id_older_pred_2, color_older_pred, color_older_pred)

plt.yticks(np.arange(50, 105, 5))
plt.grid(axis='y')
ax.legend(handles=legend_elements, loc='lower left')
plt.savefig(folder_plots+"/ei.jpg")

In [None]:
# creating the dataset
courses = list(numerosity.keys())
values = list(numerosity.values())
  
fig = plt.figure(figsize = (13, 5))
 
# creating the bar plot
plt.bar(courses, values, color ='maroon',
        width = 0.4)
 
plt.xlabel("Predictions")
plt.ylabel("Number of subjects")
plt.title("Number of subjects in E+I with that conditions")
plt.show()

### IMPLICIT + EXPLICIT

In [None]:
id_true_pred_1_all = id_true_pred
id_younger_pred_1_all = id_younger_pred
id_older_pred_1_all = id_older_pred

id_true_pred_2_all = id_true_pred_2
id_younger_pred_2_all = id_younger_pred_2
id_older_pred_2_all = id_older_pred_2

In [None]:
# WRONG PREDICTION IN THE first PART WITH implicit BEHAVIOR
id_true_pred = [3, 5, 9, 13, 15, 17, 19, 23, 25, 31, 33, 37, 41, 43, 47, 51]
id_younger_pred = [29, 39, 45, 53, 55, 59, 61]
id_older_pred = [1, 7, 11, 21, 27, 35, 49, 57]

# WRONG PREDICTION SAID IN THE second PART WITH explicit BEHAVIOR
id_true_pred_2 = [1, 5, 9, 11, 13, 17, 19, 23, 25, 31, 33, 35, 37, 41, 43, 47, 49, 51, 57]
id_younger_pred_2 = [29, 39, 45, 53, 55, 59, 61]
id_older_pred_2 = [3, 7, 15, 21, 27]

In [None]:
# IGNORED: because in this case there isn't correlation between first and second results
# it is considered only when it's right or wrong in the first part
'''statistics_ie = get_statistics(statistics_ie, df_ie, id_true_pred, "TRUE")
statistics_ie = get_statistics(statistics_ie, df_ie, id_younger_pred, "YOUNGER")
statistics_ie = get_statistics(statistics_ie, df_ie, id_older_pred, "OLDER")
print("Statistics when wrong prediction was said in the FIRST part that was IMPLICIT")
statistics_ie'''
# it is considered only when it's right or wrong in the second part
'''statistics_ie = get_statistics(statistics_ie, df_ie, id_true_pred, "TRUE")
statistics_ie = get_statistics(statistics_ie, df_ie, id_younger_pred, "YOUNGER")
statistics_ie = get_statistics(statistics_ie, df_ie, id_older_pred, "OLDER")
print("Statistics when wrong prediction was said in the SECOND part that was EXPLICIT")
statistics_ie'''

In [None]:
# ALL I+E
'''
fig, ax = plt.subplots(1,1,figsize=(15, 15)) 

update_ax(df_ie, ax, id_true_pred, id_true_pred_2, color_true_pred)
update_ax(df_ie, ax, id_younger_pred, id_younger_pred_2, color_younger_pred)
update_ax(df_ie, ax, id_older_pred, id_older_pred_2, color_older_pred)

plt.show()

# TRUE-TRUE
fig, ax = plt.subplots(1,1,figsize=(15, 15)) 

color_true_pred = "g"
color_younger_pred = "b"
color_older_pred = "r"

id_both_true_pred = list(set(id_true_pred).intersection(set(id_true_pred_2)))

update_ax(df_ie, ax, id_both_true_pred, id_both_true_pred, color_true_pred)

plt.show()

# YOUNGER-OLDER or OLDER-YOUNGER
# TRUE-YOUNGER or YOUNGER-TRUE
# TRUE-OLDER or OLDER-TRUE
# YOUNGER-YOUNGER or OLDER-OLDER
fig, ax = plt.subplots(1,1,figsize=(15, 15)) 

id_not_both_true_pred = list(set(id_true_pred) ^ (set(id_true_pred_2)))

update_ax(df_ie, ax, id_not_both_true_pred, id_not_both_true_pred, color_true_pred)
update_ax(df_ie, ax, id_younger_pred, id_younger_pred_2, color_younger_pred)
update_ax(df_ie, ax, id_older_pred, id_older_pred_2, color_older_pred)

plt.show()
'''

From https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kstest.html

Confidence level of 95%; that is, we will reject the null hypothesis in favor of the alternative if the p-value is less than 0.05.

INIT_TRUST is not distributed according to the FIRST_TRUST

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15, 10)) 

numerosity = {}

numerosity['true-true'] = update_ax_mean(df_ie, ax, id_true_pred, id_true_pred_2, color_true_pred, color_true_pred)
numerosity['true-younger'] = update_ax_mean(df_ie, ax, id_true_pred, id_younger_pred_2, color_true_pred, color_younger_pred)
numerosity['true-older'] = update_ax_mean(df_ie, ax, id_true_pred, id_older_pred_2, color_true_pred, color_older_pred)

numerosity['younger-true'] = update_ax_mean(df_ie, ax, id_younger_pred, id_true_pred_2, color_younger_pred, color_true_pred)
numerosity['younger-younger'] = update_ax_mean(df_ie, ax, id_younger_pred, id_younger_pred_2, color_younger_pred, color_younger_pred)
numerosity['younger-older'] = update_ax_mean(df_ie, ax, id_younger_pred, id_older_pred_2, color_younger_pred, color_older_pred)

numerosity['older-true'] = update_ax_mean(df_ie, ax, id_older_pred, id_true_pred_2, color_older_pred, color_true_pred)
numerosity['older-younger'] = update_ax_mean(df_ie, ax, id_older_pred, id_younger_pred_2, color_older_pred, color_younger_pred)
numerosity['older-older'] = update_ax_mean(df_ie, ax, id_older_pred, id_older_pred_2, color_older_pred, color_older_pred)

plt.yticks(np.arange(50, 105, 5))
plt.grid(axis='y')
ax.legend(handles=legend_elements, loc='lower left')
plt.savefig(folder_plots+"/ie.jpg")

In [None]:
# creating the dataset
courses = list(numerosity.keys())
values = list(numerosity.values())
  
fig = plt.figure(figsize = (13, 5))
 
# creating the bar plot
plt.bar(courses, values, color ='maroon',
        width = 0.4)
 
plt.xlabel("Predictions")
plt.ylabel("Number of subjects")
plt.title("Number of subjects in E+I with that conditions")
plt.show()

## ALL: both E+I and I+E

In [None]:
id_true_pred_1_all = np.unique(id_true_pred_1_all+id_true_pred)
id_younger_pred_1_all = np.unique(id_younger_pred_1_all+id_younger_pred)
id_older_pred_1_all = np.unique(id_older_pred_1_all+id_older_pred)

id_true_pred_2_all = np.unique(id_true_pred_2_all+id_true_pred_2)
id_younger_pred_2_all = np.unique(id_younger_pred_2_all+id_younger_pred_2)
id_older_pred_2_all = np.unique(id_older_pred_2_all+id_older_pred_2)

In [None]:
print("E+I")
print("INIT vs FIRST: ", stats.kstest(df_ei["INIT_TRUST"].tolist(), df_ei["FIRST_TRUST"].tolist()))
print("SECOND vs FIRST: ", stats.kstest(df_ei["SECOND_TRUST"].tolist(), df_ei["FIRST_TRUST"].tolist()))
print("I+E")
print("INIT vs FIRST: ", stats.kstest(df_ie["INIT_TRUST"].tolist(), df_ie["FIRST_TRUST"].tolist()))
print("SECOND vs FIRST: ", stats.kstest(df_ie["SECOND_TRUST"].tolist(), df_ie["FIRST_TRUST"].tolist()))

In [None]:
print("INIT vs FIRST: ", stats.kstest(df["INIT_TRUST"].tolist(), df["FIRST_TRUST"].tolist()))
print("SECOND vs FIRST: ", stats.kstest(df["SECOND_TRUST"].tolist(), df["FIRST_TRUST"].tolist()))

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15, 10)) 

numerosity = {}

numerosity['true-true'] = update_ax_mean(df, ax, id_true_pred_1_all, id_true_pred_2_all, color_true_pred, color_true_pred)
numerosity['true-younger'] = update_ax_mean(df, ax, id_true_pred_1_all, id_younger_pred_2_all, color_true_pred, color_younger_pred)
numerosity['true-older'] = update_ax_mean(df, ax, id_true_pred_1_all, id_older_pred_2_all, color_true_pred, color_older_pred)

numerosity['younger-true'] = update_ax_mean(df, ax, id_younger_pred_1_all, id_true_pred_2_all, color_younger_pred, color_true_pred)
numerosity['younger-younger'] = update_ax_mean(df, ax, id_younger_pred_1_all, id_younger_pred_2_all, color_younger_pred, color_younger_pred)
numerosity['younger-older'] = update_ax_mean(df, ax, id_younger_pred_1_all, id_older_pred_2_all, color_younger_pred, color_older_pred)

numerosity['older-true'] = update_ax_mean(df, ax, id_older_pred_1_all, id_true_pred_2_all, color_older_pred, color_true_pred)
numerosity['older-younger'] = update_ax_mean(df, ax, id_older_pred_1_all, id_younger_pred_2_all, color_older_pred, color_younger_pred)
numerosity['older-older'] = update_ax_mean(df, ax, id_older_pred_1_all, id_older_pred_2_all, color_older_pred, color_older_pred)

plt.yticks(np.arange(50, 105, 5))
plt.grid(axis='y')
ax.legend(handles=legend_elements, loc='lower left')
plt.savefig(folder_plots+"/both.jpg")