In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import numpy as np
import csv
import pandas as pd

import itertools

In [2]:
df = pd.read_csv('data/EMNLP_merged_Qualtrics_numeric_and_Prolific_allRaces_cleaned_with_all_Metrics.csv')

# Filter out yellow flags and red flags rows

column_name = 'Quality_flag'
excluded_values = ['red']  

# Create a mask to identify rows with values to be excluded
mask = df[column_name].isin(excluded_values)

# Apply the mask to exclude the rows from the data frame
df_filtered = df[~mask]

# Save the filtered data frame to a new CSV file
#df_filtered.to_csv('FairX_merged_Qualtrics_numeric_and_Prolific_allRaces_cleaned_with_5Metrics_green.csv', index=False)

In [3]:
print('df_filtered.columns:', df_filtered.columns)

# Merge the main result csv with the csv with full-text options of individual similarity features
df_indfair_text = pd.read_csv('data/EMNLP_both_races_text_IndFair.csv')

# print('df_indfair_text:', df_indfair_text)

df_filtered_w_indfair_text = pd.merge(df_filtered, df_indfair_text, left_on='PROLIFIC_PID', right_on='Prolific-ID', how='left', suffixes=(None, '_text'))



#df_filtered_w_indfair_text.drop(columns=['IndFair-1', 'IndFair-2', 'IndFair-3', 'IndFair-4', 'IndFair-5', 'IndFair-6', 'IndFair-7', 'IndFair-8', 'IndFair-9'])

print('df_filtered_w_indfair_text.head():', df_filtered_w_indfair_text.head())


df_filtered.columns: Index(['Quality_flag', 'Bias-start_1', 'Bias-start_2', 'Bias-start_3',
       'Bias-start-rationale', 'Disc-start_1', 'Disc-start_2', 'Disc-start_3',
       'Disc-start_4', 'Disc-start_5', 'Disc-start_6', 'Disc-start-rationale',
       'IndFair-1', 'IndFair-2', 'IndFair-3', 'IndFair-4', 'IndFair-5',
       'IndFair-6', 'IndFair-7', 'IndFair-8', 'IndFair-9', 'Disc-end_1',
       'Disc-end_2', 'Disc-end_3', 'Disc-end_4', 'Disc-end_5', 'Disc-end_6',
       'Disc-end-rationale', 'Bias-end_1', 'Bias-end_2', 'Bias-end_3',
       'Bias-end_4', 'Bias-end-rationale', 'Percv-work_1', 'Percv-work_2',
       'Percv-work_3', 'Percv-work_4', 'Percv-work_5', 'Percv-work_6',
       'Percv-work-rationale', 'PROLIFIC_PID', 'Asian_race',
       'Explanation_style', 'Prolific_Participant_id',
       'Prolific_Total_approvals', 'Prolific_Fluent_languages', 'Prolific_Age',
       'Prolific_Sex', 'Prolific_Ethnicity_simplified',
       'Prolific_Country_of_birth', 'Prolific_Country_of_re

In [4]:
df_filtered_w_indfair_text.to_csv('df_filtered_w_indfair_text.csv')

In [5]:
#Convert the text answers to numeric, normalized, (between 0 and 1), and equidistant values to calculate individual similarity
IndFair_1_text_to_score_map = {'Yes: online hate speech against Chinese':0/4, 
                               'Yes: online hate speech against Asian but not Chinese':1/4,
                               'Yes: online hate speech against a non-Asian race/ethnicity (such as against Black, Hispanic, etc.)':2/4,
                               'Yes: online hate speech based on a non-race sensitive attribute (such as gender, sexual orientation, etc.)':3/4,
                               'No':4/4,
                               #'Prefer not to answer':float("nan")
                               'Prefer not to answer':np.nan
                              }

IndFair_2_text_to_score_map = {'Yes: verbal hate speech against Chinese':0/4, 
                               'Yes: online hate speech against Asian but not Chinese':1/4,
                               'Yes: online hate speech against a non-Asian race/ethnicity (such as against Black, Hispanic, etc.)':2/4,
                               'Yes: online hate speech based on a non-race sensitive attribute (such as gender, sexual orientation, etc.)':3/4,
                               'No':4/4,
                               #'Prefer not to answer':float("nan")
                               'Prefer not to answer':np.nan
                              }

IndFair_3_text_to_score_map = {'Never':0/4, 
                               '1 day - 1 month':1/4,
                               '1 month - 1 year':2/4,
                               '1 year - 5 years':3/4,
                               'Over 5 years':4/4,
                               #'Prefer not to answer':float("nan")
                               'Prefer not to answer':np.nan
                              }

IndFair_4_text_to_score_map = {'Born in the USA':0/4, 
                               'Moved to the USA at an age lower than 5 years old':1/4,
                               'Moved to the USA at an age between 5 and 18 years old':2/4,
                               'Moved to the USA at an age between 18 and 30 years old':3/4,
                               'Moved to the USA at an age higher than 30 years old':4/4,
                               #'Prefer not to answer':float("nan")
                               'Prefer not to answer':np.nan
                              }

IndFair_5_text_to_score_map = {'Mainland China':0/5, 
                               'Taiwan, Hong Kong, or Macau':1/5,
                               'A Sinosphere country (Japan, North/South Korea, or Vietnam)':2/5,
                               'An East Asian or Southeast Asian country not mentioned above (such as Mongolia, Singapore, etc.)':3/5,
                               'An Asian country outside East/Southeast Asia':4/5,
                               'No Asian background':5/5,
                               #'Prefer not to answer':float("nan")
                               'Prefer not to answer':np.nan
                              }

IndFair_6_text_to_score_map = {'0':0/3, 
                               '1':1/3,
                               '2':2/3,
                               '3 or more':3/3,
                               #'Prefer not to answer':float("nan")
                               'Prefer not to answer':np.nan
                              }

IndFair_7_text_to_score_map = {'Very unnecessary':0/4, 
                               'Unnecessary':1/4,
                               'Neutral':2/4,
                               'Necessary':3/4,
                               'Very necessary':4/4
                              }

IndFair_8_text_to_score_map = {'No sanction':0/4, 
                               'Deleting the hateful content (such as specific hate tweets)':1/4,
                               'Banning the hate speech creator from the relevant social media platform':2/4,
                               'Civil damages (such as financial compensation for the victims’ mental sufferings)':3/4,
                               'Criminal punishment (such as community service, probation, jail time)':4/4
                              }

IndFair_9_text_to_score_map = {'(Almost) always':0/5, 
                               'Daily':1/5,
                               'Weekly':2/5,
                               'Monthly':3/5,
                               'Yearly':4/5,
                               '(Almost) never':5/5
                              }

In [6]:
PROLIFIC_PID_to_accuracy_dict = dict(zip(df['PROLIFIC_PID'], df['accuracy']))
PROLIFIC_PID_to_stereotype_activation_dict = dict(zip(df['PROLIFIC_PID'], df['stereotype_activation']))
PROLIFIC_PID_to_mental_discomfort_dict = dict(zip(df['PROLIFIC_PID'], df['mental_discomfort']))
PROLIFIC_PID_to_perceived_workload_dict = dict(zip(df['PROLIFIC_PID'], df['perceived_workload']))
PROLIFIC_PID_to_label_time_dict = dict(zip(df['PROLIFIC_PID'], df['label_time']))

In [7]:
df_filtered_w_indfair_text['IndFair-1_score'] = df_filtered_w_indfair_text['IndFair-1_text'].map(IndFair_1_text_to_score_map)
df_filtered_w_indfair_text['IndFair-2_score'] = df_filtered_w_indfair_text['IndFair-2_text'].map(IndFair_2_text_to_score_map)
df_filtered_w_indfair_text['IndFair-3_score'] = df_filtered_w_indfair_text['IndFair-3_text'].map(IndFair_3_text_to_score_map)
df_filtered_w_indfair_text['IndFair-4_score'] = df_filtered_w_indfair_text['IndFair-4_text'].map(IndFair_4_text_to_score_map)
df_filtered_w_indfair_text['IndFair-5_score'] = df_filtered_w_indfair_text['IndFair-5_text'].map(IndFair_5_text_to_score_map)
df_filtered_w_indfair_text['IndFair-6_score'] = df_filtered_w_indfair_text['IndFair-6_text'].map(IndFair_6_text_to_score_map)
df_filtered_w_indfair_text['IndFair-7_score'] = df_filtered_w_indfair_text['IndFair-7_text'].map(IndFair_7_text_to_score_map)
df_filtered_w_indfair_text['IndFair-8_score'] = df_filtered_w_indfair_text['IndFair-8_text'].map(IndFair_8_text_to_score_map)
df_filtered_w_indfair_text['IndFair-9_score'] = df_filtered_w_indfair_text['IndFair-9_text'].map(IndFair_9_text_to_score_map)

df_IndFair = df_filtered_w_indfair_text[['PROLIFIC_PID', 'Explanation_style', 'IndFair-1_score', 'IndFair-2_score', 'IndFair-3_score', 'IndFair-4_score',
                                                   'IndFair-5_score', 'IndFair-6_score', 'IndFair-7_score', 'IndFair-8_score', 'IndFair-9_score']]
                                                   #'accuracy', 'stereotype_activation', 'mental_discomfort', 'perceived_workload', 'label_time']]

print('df_IndFair.head():', df_IndFair.head())

df_IndFair.head():                PROLIFIC_PID Explanation_style  IndFair-1_score  \
0  53987712fdf99b68e3a45021                NE             1.00   
1  5658000ca9872d0011e1ba26                NE             1.00   
2  56bae08f30d6b30005f8537a                NE             1.00   
3  595bdc5b57e9520001f78eef                NE             1.00   
4  59c3289b8e6ea10001f7b926                NE             0.75   

   IndFair-2_score  IndFair-3_score  IndFair-4_score  IndFair-5_score  \
0              NaN             0.50             0.25              0.4   
1              1.0             0.00             0.00              1.0   
2              1.0             0.25             0.00              1.0   
3              0.0             0.25             0.00              0.2   
4              1.0             0.75             0.50              0.6   

   IndFair-6_score  IndFair-7_score  IndFair-8_score  IndFair-9_score  
0         0.333333             0.75             0.50              0.4  
1

In [8]:
#Divide the df into 3 smaller dfs by explanation style
grouped = df_IndFair.groupby('Explanation_style')
# Iterate over each group and create smaller data frames
smaller_dfs_dict = {}
for Explanation_style, group in grouped:
    #print('Explanation_style:', Explanation_style)
    smaller_dfs_dict[Explanation_style] = group
    
#print('smaller_dfs_dict:', smaller_dfs_dict)

df_NE = smaller_dfs_dict['NE']
df_SM = smaller_dfs_dict['SM']
df_CE = smaller_dfs_dict['CE']

df_NE.to_csv('df_NE.csv', index=False)
df_SM.to_csv('df_SM.csv', index=False)
df_CE.to_csv('df_CE.csv', index=False)

print('df_NE.head:', df_NE.head)


df_NE.head: <bound method NDFrame.head of                 PROLIFIC_PID Explanation_style  IndFair-1_score  \
0   53987712fdf99b68e3a45021                NE             1.00   
1   5658000ca9872d0011e1ba26                NE             1.00   
2   56bae08f30d6b30005f8537a                NE             1.00   
3   595bdc5b57e9520001f78eef                NE             1.00   
4   59c3289b8e6ea10001f7b926                NE             0.75   
..                       ...               ...              ...   
88  6018a5c0e1600b187ccb8693                NE             1.00   
89  60fcd954f5ff1656599b53c9                NE             0.50   
90  61004696895aeda4629ecd6e                NE             0.75   
91  63d13bbf2993cf5e372b7994                NE             1.00   
92  63d423b81f5e313b9958723c                NE             1.00   

    IndFair-2_score  IndFair-3_score  IndFair-4_score  IndFair-5_score  \
0               NaN             0.50             0.25              0.4   
1    

In [9]:
import numpy as np
import pandas as pd
from itertools import combinations
from scipy.spatial.distance import cdist

def find_closest_pairs(df, k=2000):
    # Convert the dataframe to a NumPy array for easy calculations
    vectors = df.to_numpy()

    # Extract the PROLIFIC_PID (index) dimension (0) and the rest of the dimensions (IndFair_dimensions)
    PROLIFIC_PIDs = vectors[:, 0]
    #print('PROLIFIC_PIDs:', PROLIFIC_PIDs)
    IndFair_dimensions = vectors[:, 2:]
    #print('IndFair_dimensions:', IndFair_dimensions)
 
    pairwise_avg_distances = []
    for i, j in combinations(range(len(IndFair_dimensions)), 2):
        #print('IndFair_dimensions[i]:', IndFair_dimensions[i])
        #print('IndFair_dimensions[j]:', IndFair_dimensions[j])
        valid_indices = ~np.logical_or(np.isnan(list(IndFair_dimensions[i])), np.isnan(list(IndFair_dimensions[j])))

        if np.any(valid_indices):
            avg_distance = np.mean(np.abs(IndFair_dimensions[i, valid_indices] - IndFair_dimensions[j, valid_indices]))
            pairwise_avg_distances.append((i, j, PROLIFIC_PIDs[i], PROLIFIC_PIDs[j], avg_distance))

    
    
    # Sort pairs by avg_distance (index 4 in tuple above) in ascending order
    pairwise_avg_distances.sort(key=lambda x: x[4])

    # Get the top k pairs
    top_k_pairs = pairwise_avg_distances[:k]
    farthest_pairs = pairwise_avg_distances[-1]
    largest_avg_distance = farthest_pairs[4]
    
    print('largest_avg_distance:', largest_avg_distance)
    
    k_closest_pairs = []

    # Print the top k pairs of vectors
    for i, (idx1, idx2, PROLIFIC_PID1, PROLIFIC_PID2, avg_distance) in enumerate(top_k_pairs, 1):
        #print(f"Pair {i}:")
        #print(f"Vector 1 (N={PROLIFIC_PID1}): {IndFair_dimensions[idx1]}")
        #print(f"Vector 2 (N={PROLIFIC_PID2}): {IndFair_dimensions[idx2]}")
        #print(f"PROLIFIC_PID 1: {PROLIFIC_PID1}")
        #print(f"PROLIFIC_PID 2: {PROLIFIC_PID2}")
        #print(f"Average Distance: {avg_distance}\n")
        
        k_closest_pairs += [(PROLIFIC_PID1, PROLIFIC_PID2, avg_distance)]
        
    return k_closest_pairs, largest_avg_distance
        
        
    
k_closest_pairs_NE, largest_avg_distance_NE = find_closest_pairs(df=df_NE)
#print('k_closest_pairs_NE:', k_closest_pairs_NE)

k_closest_pairs_SM, largest_avg_distance_SM = find_closest_pairs(df=df_SM)
#print('k_closest_pairs_SM:', k_closest_pairs_SM)

k_closest_pairs_CE, largest_avg_distance_CE = find_closest_pairs(df=df_CE)
#print('k_closest_pairs_CE:', k_closest_pairs_CE)

largest_avg_distance: 0.7574074074074075
largest_avg_distance: 0.8259259259259258
largest_avg_distance: 0.762962962962963


In [10]:
from scipy.stats import sem #standard error of the mean

#Calculate difference between any two outputs of each pair of two highly similar individuals, sum such differences across the top k pairs 
#and take this sum as a proxy for `individual unfairness' score w.r.t. that output feature
def compute_diff_output_pairs(k_closest_pairs, PROLIFIC_PID_to_output_dict): 
    output_diff_list = []
    weighted_output_diff_list = []
    for close_pair in k_closest_pairs:
        first_PROLIFIC_PID = close_pair[0]
        second_PROLIFIC_PID = close_pair[1]
        avg_distance = close_pair[2]
        first_output = PROLIFIC_PID_to_output_dict[first_PROLIFIC_PID]
        second_output = PROLIFIC_PID_to_output_dict[second_PROLIFIC_PID]
        output_diff = abs(first_output - second_output)
        #Output difference weighted by (1-avg_distance) to count the output diff of higher-similarity individuals more 
        weighted_output_diff = (1-avg_distance)*abs(first_output - second_output)
        output_diff_list += [output_diff]
        weighted_output_diff_list += [weighted_output_diff]
        
    output_diff_mean = np.mean(output_diff_list)
    output_diff_sem = sem(output_diff_list)   #standard error of the mean
    
    weighted_output_diff_mean = np.mean(weighted_output_diff_list)
    weighted_output_diff_sem = sem(weighted_output_diff_list)   #standard error of the mean
    
    #print('output_diff_list:', output_diff_list)
    #print('output_diff_mean:', output_diff_mean)
    #print('output_diff_sem:', output_diff_sem)
    
    print('weighted_output_diff_mean:', weighted_output_diff_mean)
    #print('weighted_output_diff_sem:', weighted_output_diff_sem)
    
    return output_diff_list, weighted_output_diff_list

In [11]:
import scipy.stats as stats


In [12]:
#!python -m pip install statsmodels

In [13]:
#import statsmodels.stats.weightstats.ttest_ind

In [14]:
accuracy_diff_list_NE, weighted_accuracy_diff_list_NE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_NE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_accuracy_dict)
accuracy_diff_list_SM, weighted_accuracy_diff_list_SM = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_SM, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_accuracy_dict)
accuracy_diff_list_CE, weighted_accuracy_diff_list_CE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_CE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_accuracy_dict)

#stats.ttest_ind(accuracy_diff_list_NE, accuracy_diff_list_SM)
#stats.ttest_ind(accuracy_diff_list_SM, accuracy_diff_list_CE)
#stats.ttest_ind(accuracy_diff_list_CE, accuracy_diff_list_NE)


stats.ttest_ind(weighted_accuracy_diff_list_NE, weighted_accuracy_diff_list_SM)
stats.ttest_ind(weighted_accuracy_diff_list_SM, weighted_accuracy_diff_list_CE)
stats.ttest_ind(weighted_accuracy_diff_list_CE, weighted_accuracy_diff_list_NE)

# Accuracy individual unfairness: 
# CE > SM (significant only when k = 4000)

#For k = 100 pairs:
'''
weighted_output_diff_mean: 0.12242245363538196
weighted_output_diff_mean: 0.11732060181268056
weighted_output_diff_mean: 0.11802314805359725
Ttest_indResult(statistic=0.38615920951735766, pvalue=0.6997933851720435)
Ttest_indResult(statistic=-0.05704012066617719, pvalue=0.9545707560179117)
Ttest_indResult(statistic=-0.34457989396966654, pvalue=0.730775941247083)
'''

#For k = 200 pairs:
'''
weighted_output_diff_mean: 0.11286053236062848
weighted_output_diff_mean: 0.11606514548602034
weighted_output_diff_mean: 0.11483767353387501
Ttest_indResult(statistic=-0.3454084610268495, pvalue=0.7299697998638031)
Ttest_indResult(statistic=0.12919316715699108, pvalue=0.8972700650636408)
Ttest_indResult(statistic=0.20566177064399263, pvalue=0.8371602744404645)
'''

#For k = 400 pairs:
'''
weighted_output_diff_mean: 0.11229465661020718
weighted_output_diff_mean: 0.11314466901152034
weighted_output_diff_mean: 0.11791249166159873
Ttest_indResult(statistic=-0.1344350194804932, pvalue=0.893092489489393)
Ttest_indResult(statistic=-0.7329156114650753, pvalue=0.46382512985171986)
Ttest_indResult(statistic=0.8353950533526215, pvalue=0.4037453571743087)
'''

#For k = 1000 pairs:
'''
weighted_output_diff_mean: 0.10945435954891644
weighted_output_diff_mean: 0.10761800315786277
weighted_output_diff_mean: 0.11445407843968579
Ttest_indResult(statistic=0.47663606782826035, pvalue=0.6336734166620746)
Ttest_indResult(statistic=-1.7180944380521446, pvalue=0.08593432364245103)
Ttest_indResult(statistic=1.218887327527163, pvalue=0.22303084772169346)
'''

#For k = 2000 pairs:
'''
weighted_output_diff_mean: 0.10017758487927221
weighted_output_diff_mean: 0.09722716183601801
weighted_output_diff_mean: 0.1020758625060389
Ttest_indResult(statistic=1.1640670930246828, pvalue=0.2444662840348074)
Ttest_indResult(statistic=-1.8642691913037372, pvalue=0.06235714515379035)
Ttest_indResult(statistic=0.7100150903226394, pvalue=0.4777361877563392)
'''


#For k = 4000 pairs:
'''
weighted_output_diff_mean: 0.09011246142309953
weighted_output_diff_mean: 0.0873070511940956
weighted_output_diff_mean: 0.09233395264286517
Ttest_indResult(statistic=1.7439567329841836, pvalue=0.08120505291784812)
Ttest_indResult(statistic=-3.0267984500250247, pvalue=0.0024794540846570425)
Ttest_indResult(statistic=1.3000639344050937, pvalue=0.19361652131074011)
'''




weighted_output_diff_mean: 0.10017758487927221
weighted_output_diff_mean: 0.09722716183601801
weighted_output_diff_mean: 0.1020758625060389


Ttest_indResult(statistic=1.1640670930246826, pvalue=0.2444662840348074)

Ttest_indResult(statistic=-1.8642691913037372, pvalue=0.06235714515379034)

Ttest_indResult(statistic=0.7100150903226393, pvalue=0.4777361877563391)

'\nweighted_output_diff_mean: 0.12242245363538196\nweighted_output_diff_mean: 0.11732060181268056\nweighted_output_diff_mean: 0.11802314805359725\nTtest_indResult(statistic=0.38615920951735766, pvalue=0.6997933851720435)\nTtest_indResult(statistic=-0.05704012066617719, pvalue=0.9545707560179117)\nTtest_indResult(statistic=-0.34457989396966654, pvalue=0.730775941247083)\n'

'\nweighted_output_diff_mean: 0.11286053236062848\nweighted_output_diff_mean: 0.11606514548602034\nweighted_output_diff_mean: 0.11483767353387501\nTtest_indResult(statistic=-0.3454084610268495, pvalue=0.7299697998638031)\nTtest_indResult(statistic=0.12919316715699108, pvalue=0.8972700650636408)\nTtest_indResult(statistic=0.20566177064399263, pvalue=0.8371602744404645)\n'

'\nweighted_output_diff_mean: 0.11229465661020718\nweighted_output_diff_mean: 0.11314466901152034\nweighted_output_diff_mean: 0.11791249166159873\nTtest_indResult(statistic=-0.1344350194804932, pvalue=0.893092489489393)\nTtest_indResult(statistic=-0.7329156114650753, pvalue=0.46382512985171986)\nTtest_indResult(statistic=0.8353950533526215, pvalue=0.4037453571743087)\n'

'\nweighted_output_diff_mean: 0.10945435954891644\nweighted_output_diff_mean: 0.10761800315786277\nweighted_output_diff_mean: 0.11445407843968579\nTtest_indResult(statistic=0.47663606782826035, pvalue=0.6336734166620746)\nTtest_indResult(statistic=-1.7180944380521446, pvalue=0.08593432364245103)\nTtest_indResult(statistic=1.218887327527163, pvalue=0.22303084772169346)\n'

'\nweighted_output_diff_mean: 0.10017758487927221\nweighted_output_diff_mean: 0.09722716183601801\nweighted_output_diff_mean: 0.1020758625060389\nTtest_indResult(statistic=1.1640670930246828, pvalue=0.2444662840348074)\nTtest_indResult(statistic=-1.8642691913037372, pvalue=0.06235714515379035)\nTtest_indResult(statistic=0.7100150903226394, pvalue=0.4777361877563392)\n'

'\nweighted_output_diff_mean: 0.09011246142309953\nweighted_output_diff_mean: 0.0873070511940956\nweighted_output_diff_mean: 0.09233395264286517\nTtest_indResult(statistic=1.7439567329841836, pvalue=0.08120505291784812)\nTtest_indResult(statistic=-3.0267984500250247, pvalue=0.0024794540846570425)\nTtest_indResult(statistic=1.3000639344050937, pvalue=0.19361652131074011)\n'

In [15]:
stereotype_activation_diff_list_NE, weighted_stereotype_activation_diff_list_NE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_NE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_stereotype_activation_dict)
stereotype_activation_diff_list_SM, weighted_stereotype_activation_diff_list_SM = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_SM, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_stereotype_activation_dict)
stereotype_activation_diff_list_CE, weighted_stereotype_activation_diff_list_CE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_CE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_stereotype_activation_dict)

#stats.ttest_ind(stereotype_activation_diff_list_NE, stereotype_activation_diff_list_SM)
#stats.ttest_ind(stereotype_activation_diff_list_SM, stereotype_activation_diff_list_CE)
#stats.ttest_ind(stereotype_activation_diff_list_CE, stereotype_activation_diff_list_NE)

stats.ttest_ind(weighted_stereotype_activation_diff_list_NE, weighted_stereotype_activation_diff_list_SM)
stats.ttest_ind(weighted_stereotype_activation_diff_list_SM, weighted_stereotype_activation_diff_list_CE)
stats.ttest_ind(weighted_stereotype_activation_diff_list_CE, weighted_stereotype_activation_diff_list_NE)

# Stereotype activation individual unfairness: 
# SM > NE (significant when k = 1000, 2000, 4000)
# CE > SM (significant when k = 2000, 4000)
# CE > NE (significant when k = 1000, 2000, 4000)

#For k = 100 pairs:
'''
weighted_output_diff_mean: 0.2577708333206667
weighted_output_diff_mean: 0.1597106481059375
weighted_output_diff_mean: 0.23621296291081945
Ttest_indResult(statistic=2.0388121470990037, pvalue=0.042797921214456275)
Ttest_indResult(statistic=-1.8968838836120723, pvalue=0.059298903984714574)
Ttest_indResult(statistic=-0.5170173777963251, pvalue=0.6057210742977313)
'''

#For k = 200 pairs:
'''
weighted_output_diff_mean: 0.21559143515842014
weighted_output_diff_mean: 0.1934416335435774
weighted_output_diff_mean: 0.20930324068478473
Ttest_indResult(statistic=0.6642634149818615, pvalue=0.5069063776903922)
Ttest_indResult(statistic=-0.5123112733789307, pvalue=0.6087173512662998)
Ttest_indResult(statistic=-0.22937008251693147, pvalue=0.8186991113961757)
'''

#For k = 400 pairs:
'''
weighted_output_diff_mean: 0.1945675153826337
weighted_output_diff_mean: 0.22787023253522024
weighted_output_diff_mean: 0.212732225480308
Ttest_indResult(statistic=-1.516474208805488, pvalue=0.1297955007707865)
Ttest_indResult(statistic=0.7087669270486124, pvalue=0.47867612152505234)
Ttest_indResult(statistic=1.005736955644334, pvalue=0.3148469811718952)
'''

#For k = 1000 pairs:
'''
weighted_output_diff_mean: 0.16325578698370347
weighted_output_diff_mean: 0.21198391750548073
weighted_output_diff_mean: 0.21577055771201747
Ttest_indResult(statistic=-3.9907049664871637, pvalue=6.825476394319328e-05)
Ttest_indResult(statistic=-0.29971795384094857, pvalue=0.7644234786867578)
Ttest_indResult(statistic=4.782679344222651, pvalue=1.8562213112249608e-06)
'''

#For k = 2000 pairs:
'''
weighted_output_diff_mean: 0.14826423605654213
weighted_output_diff_mean: 0.17806860114868522
weighted_output_diff_mean: 0.2170108355057328
Ttest_indResult(statistic=-3.9112618511907056, pvalue=9.333949291234249e-05)
Ttest_indResult(statistic=-4.549287393573189, pvalue=5.542441059641809e-06)
Ttest_indResult(statistic=9.015967528727973, pvalue=2.963069430725915e-19)
'''

#For k = 4000 pairs:
'''
weighted_output_diff_mean: 0.12298933251823131
weighted_output_diff_mean: 0.15668105431055399
weighted_output_diff_mean: 0.20928534224511094
Ttest_indResult(statistic=-7.081050585897358, pvalue=1.5519174810278512e-12)
Ttest_indResult(statistic=-9.268032837778028, pvalue=2.3967240836845375e-20)
Ttest_indResult(statistic=16.830225569905448, pvalue=1.7283989176164353e-62)
'''

weighted_output_diff_mean: 0.14826423605654213
weighted_output_diff_mean: 0.17806860114868522
weighted_output_diff_mean: 0.2170108355057328


Ttest_indResult(statistic=-3.911261851190705, pvalue=9.333949291234245e-05)

Ttest_indResult(statistic=-4.549287393573189, pvalue=5.54244105964181e-06)

Ttest_indResult(statistic=9.015967528727973, pvalue=2.963069430725915e-19)

'\nweighted_output_diff_mean: 0.2577708333206667\nweighted_output_diff_mean: 0.1597106481059375\nweighted_output_diff_mean: 0.23621296291081945\nTtest_indResult(statistic=2.0388121470990037, pvalue=0.042797921214456275)\nTtest_indResult(statistic=-1.8968838836120723, pvalue=0.059298903984714574)\nTtest_indResult(statistic=-0.5170173777963251, pvalue=0.6057210742977313)\n'

'\nweighted_output_diff_mean: 0.21559143515842014\nweighted_output_diff_mean: 0.1934416335435774\nweighted_output_diff_mean: 0.20930324068478473\nTtest_indResult(statistic=0.6642634149818615, pvalue=0.5069063776903922)\nTtest_indResult(statistic=-0.5123112733789307, pvalue=0.6087173512662998)\nTtest_indResult(statistic=-0.22937008251693147, pvalue=0.8186991113961757)\n'

'\nweighted_output_diff_mean: 0.1945675153826337\nweighted_output_diff_mean: 0.22787023253522024\nweighted_output_diff_mean: 0.212732225480308\nTtest_indResult(statistic=-1.516474208805488, pvalue=0.1297955007707865)\nTtest_indResult(statistic=0.7087669270486124, pvalue=0.47867612152505234)\nTtest_indResult(statistic=1.005736955644334, pvalue=0.3148469811718952)\n'

'\nweighted_output_diff_mean: 0.16325578698370347\nweighted_output_diff_mean: 0.21198391750548073\nweighted_output_diff_mean: 0.21577055771201747\nTtest_indResult(statistic=-3.9907049664871637, pvalue=6.825476394319328e-05)\nTtest_indResult(statistic=-0.29971795384094857, pvalue=0.7644234786867578)\nTtest_indResult(statistic=4.782679344222651, pvalue=1.8562213112249608e-06)\n'

'\nweighted_output_diff_mean: 0.14826423605654213\nweighted_output_diff_mean: 0.17806860114868522\nweighted_output_diff_mean: 0.2170108355057328\nTtest_indResult(statistic=-3.9112618511907056, pvalue=9.333949291234249e-05)\nTtest_indResult(statistic=-4.549287393573189, pvalue=5.542441059641809e-06)\nTtest_indResult(statistic=9.015967528727973, pvalue=2.963069430725915e-19)\n'

'\nweighted_output_diff_mean: 0.12298933251823131\nweighted_output_diff_mean: 0.15668105431055399\nweighted_output_diff_mean: 0.20928534224511094\nTtest_indResult(statistic=-7.081050585897358, pvalue=1.5519174810278512e-12)\nTtest_indResult(statistic=-9.268032837778028, pvalue=2.3967240836845375e-20)\nTtest_indResult(statistic=16.830225569905448, pvalue=1.7283989176164353e-62)\n'

In [16]:
mental_discomfort_diff_list_NE, weighted_mental_discomfort_diff_list_NE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_NE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_mental_discomfort_dict)
mental_discomfort_diff_list_SM, weighted_mental_discomfort_diff_list_SM = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_SM, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_mental_discomfort_dict)
mental_discomfort_diff_list_CE, weighted_mental_discomfort_diff_list_CE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_CE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_mental_discomfort_dict)

#stats.ttest_ind(mental_discomfort_diff_list_NE, mental_discomfort_diff_list_SM)
#stats.ttest_ind(mental_discomfort_diff_list_SM, mental_discomfort_diff_list_CE)
#stats.ttest_ind(mental_discomfort_diff_list_CE, mental_discomfort_diff_list_NE)

stats.ttest_ind(weighted_mental_discomfort_diff_list_NE, weighted_mental_discomfort_diff_list_SM)
stats.ttest_ind(weighted_mental_discomfort_diff_list_SM, weighted_mental_discomfort_diff_list_CE)
stats.ttest_ind(weighted_mental_discomfort_diff_list_CE, weighted_mental_discomfort_diff_list_NE)


# Mental discomfort individual unfairness: 
# SM < NE (significant when k = 400, 1000, 2000, 4000)
# CE > SM (significant when k = 400, 1000, 2000, 4000)
# CE < NE (significant when k = 1000, 2000, 4000)

#For k = 100 pairs:
'''
weighted_output_diff_mean: 4.655125000000001
weighted_output_diff_mean: 5.525895833333334
weighted_output_diff_mean: 5.351652777777776
Ttest_indResult(statistic=-1.0831459564462071, pvalue=0.2800608965985112)
Ttest_indResult(statistic=0.22531267332063712, pvalue=0.82196842003369)
Ttest_indResult(statistic=0.8735857194592426, pvalue=0.3834021779748411)
'''

#For k = 200 pairs:
'''
weighted_output_diff_mean: 4.780996527777778
weighted_output_diff_mean: 4.650804067460317
weighted_output_diff_mean: 5.21670138888889
Ttest_indResult(statistic=0.24782068381686428, pvalue=0.8044009535017123)
Ttest_indResult(statistic=-1.1295753836268752, pvalue=0.2593355119328024)
Ttest_indResult(statistic=0.8078145910473841, pvalue=0.4196798637234407)
'''

#For k = 400 pairs:
'''
weighted_output_diff_mean: 5.496617476851852
weighted_output_diff_mean: 4.0083146494709005
weighted_output_diff_mean: 5.282732638888889
Ttest_indResult(statistic=4.14327961822941, pvalue=3.78931374645726e-05)
Ttest_indResult(statistic=-3.688660115360122, pvalue=0.0002407242162685208)
Ttest_indResult(statistic=-0.5490648443179097, pvalue=0.5831145475500782)
'''

#For k = 1000 pairs:
'''
weighted_output_diff_mean: 5.4116379629629625
weighted_output_diff_mean: 4.006368253968254
weighted_output_diff_mean: 4.783809788359788
Ttest_indResult(statistic=6.534980936963549, pvalue=8.050850495882216e-11)
Ttest_indResult(statistic=-3.7427270106820587, pvalue=0.00018717547611519266)
Ttest_indResult(statistic=-2.72141005002426, pvalue=0.006556812390161719)
'''

#For k = 2000 pairs:
'''
weighted_output_diff_mean: 5.055785763888889
weighted_output_diff_mean: 3.9955488095238096
weighted_output_diff_mean: 4.493781233465608
Ttest_indResult(statistic=7.4406732671073, pvalue=1.2195352218170593e-13)
Ttest_indResult(statistic=-3.6449253683130878, pvalue=0.0002708698001253381)
Ttest_indResult(statistic=-3.7486238485024996, pvalue=0.00018032045532610541)
'''


#For k = 4000 pairs:
'''
weighted_output_diff_mean: 4.483446527777778
weighted_output_diff_mean: 3.667976603835979
weighted_output_diff_mean: 4.1396169229497355
Ttest_indResult(statistic=8.881236568147534, pvalue=8.060559492206256e-19)
Ttest_indResult(statistic=-5.357321742284999, pvalue=8.681561378635521e-08)
Ttest_indResult(statistic=-3.58076758951581, pvalue=0.0003446188229036143)
'''

weighted_output_diff_mean: 5.055785763888889
weighted_output_diff_mean: 3.9955488095238096
weighted_output_diff_mean: 4.493781233465608


Ttest_indResult(statistic=7.440673267107299, pvalue=1.219535221817068e-13)

Ttest_indResult(statistic=-3.6449253683130873, pvalue=0.00027086980012533857)

Ttest_indResult(statistic=-3.748623848502499, pvalue=0.00018032045532610596)

'\nweighted_output_diff_mean: 4.655125000000001\nweighted_output_diff_mean: 5.525895833333334\nweighted_output_diff_mean: 5.351652777777776\nTtest_indResult(statistic=-1.0831459564462071, pvalue=0.2800608965985112)\nTtest_indResult(statistic=0.22531267332063712, pvalue=0.82196842003369)\nTtest_indResult(statistic=0.8735857194592426, pvalue=0.3834021779748411)\n'

'\nweighted_output_diff_mean: 4.780996527777778\nweighted_output_diff_mean: 4.650804067460317\nweighted_output_diff_mean: 5.21670138888889\nTtest_indResult(statistic=0.24782068381686428, pvalue=0.8044009535017123)\nTtest_indResult(statistic=-1.1295753836268752, pvalue=0.2593355119328024)\nTtest_indResult(statistic=0.8078145910473841, pvalue=0.4196798637234407)\n'

'\nweighted_output_diff_mean: 5.496617476851852\nweighted_output_diff_mean: 4.0083146494709005\nweighted_output_diff_mean: 5.282732638888889\nTtest_indResult(statistic=4.14327961822941, pvalue=3.78931374645726e-05)\nTtest_indResult(statistic=-3.688660115360122, pvalue=0.0002407242162685208)\nTtest_indResult(statistic=-0.5490648443179097, pvalue=0.5831145475500782)\n'

'\nweighted_output_diff_mean: 5.4116379629629625\nweighted_output_diff_mean: 4.006368253968254\nweighted_output_diff_mean: 4.783809788359788\nTtest_indResult(statistic=6.534980936963549, pvalue=8.050850495882216e-11)\nTtest_indResult(statistic=-3.7427270106820587, pvalue=0.00018717547611519266)\nTtest_indResult(statistic=-2.72141005002426, pvalue=0.006556812390161719)\n'

'\nweighted_output_diff_mean: 5.055785763888889\nweighted_output_diff_mean: 3.9955488095238096\nweighted_output_diff_mean: 4.493781233465608\nTtest_indResult(statistic=7.4406732671073, pvalue=1.2195352218170593e-13)\nTtest_indResult(statistic=-3.6449253683130878, pvalue=0.0002708698001253381)\nTtest_indResult(statistic=-3.7486238485024996, pvalue=0.00018032045532610541)\n'

'\nweighted_output_diff_mean: 4.483446527777778\nweighted_output_diff_mean: 3.667976603835979\nweighted_output_diff_mean: 4.1396169229497355\nTtest_indResult(statistic=8.881236568147534, pvalue=8.060559492206256e-19)\nTtest_indResult(statistic=-5.357321742284999, pvalue=8.681561378635521e-08)\nTtest_indResult(statistic=-3.58076758951581, pvalue=0.0003446188229036143)\n'

In [17]:
perceived_workload_diff_list_NE, weighted_perceived_workload_diff_list_NE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_NE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_perceived_workload_dict)
perceived_workload_diff_list_SM, weighted_perceived_workload_diff_list_SM = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_SM, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_perceived_workload_dict)
perceived_workload_diff_list_CE, weighted_perceived_workload_diff_list_CE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_CE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_perceived_workload_dict)

#stats.ttest_ind(perceived_workload_diff_list_NE, perceived_workload_diff_list_SM)
#stats.ttest_ind(perceived_workload_diff_list_SM, perceived_workload_diff_list_CE)
#stats.ttest_ind(perceived_workload_diff_list_CE, perceived_workload_diff_list_NE)

stats.ttest_ind(weighted_perceived_workload_diff_list_NE, weighted_perceived_workload_diff_list_SM)
stats.ttest_ind(weighted_perceived_workload_diff_list_SM, weighted_perceived_workload_diff_list_CE)
stats.ttest_ind(weighted_perceived_workload_diff_list_CE, weighted_perceived_workload_diff_list_NE)

# Perceived workload individual unfairness: 
# SM > NE (significant when k = 100, 200, 400, 1000, 2000, 4000)
# CE > NE (significant when k = 100, 200, 400, 1000, 2000, 4000)

#For k = 100 pairs:
'''
weighted_output_diff_mean: 0.5706111110497917
weighted_output_diff_mean: 0.8227094907606737
weighted_output_diff_mean: 0.9733518519055834
Ttest_indResult(statistic=-3.186158698149972, pvalue=0.0016753326444008197)
Ttest_indResult(statistic=-1.4845607585644502, pvalue=0.13925046035089672)
Ttest_indResult(statistic=4.50187402005396, pvalue=1.148434950958941e-05)
'''

#For k = 200 pairs:
'''
weighted_output_diff_mean: 0.5492245369812813
weighted_output_diff_mean: 0.855410962303494
weighted_output_diff_mean: 0.9662170139455938
Ttest_indResult(statistic=-5.3878319736402736, pvalue=1.223258942032522e-07)
Ttest_indResult(statistic=-1.5222231442615386, pvalue=0.12874727282709594)
Ttest_indResult(statistic=6.568714448336779, pvalue=1.5903457108734867e-10)
'''

#For k = 400 pairs:
'''
weighted_output_diff_mean: 0.5682591627654734
weighted_output_diff_mean: 0.8411076802132739
weighted_output_diff_mean: 0.9377390460091319
Ttest_indResult(statistic=-6.899446956084667, pvalue=1.0635232189609764e-11)
Ttest_indResult(statistic=-2.0325040114887387, pvalue=0.04243330658559672)
Ttest_indResult(statistic=8.804525088583166, pvalue=8.069899258103868e-18)
'''

#For k = 1000 pairs:
'''
weighted_output_diff_mean: 0.6433807098456905
weighted_output_diff_mean: 0.8453794642745843
weighted_output_diff_mean: 0.8888008928649994
Ttest_indResult(statistic=-7.710016543906661, pvalue=1.9692458335445406e-14)
Ttest_indResult(statistic=-1.5003242351795596, pvalue=0.13368843107422554)
Ttest_indResult(statistic=9.1921542332229, pvalue=9.371967377752567e-20)
'''

#For k = 2000 pairs:
'''
weighted_output_diff_mean: 0.6797075231387575
weighted_output_diff_mean: 0.8072925953484142
weighted_output_diff_mean: 0.8310520695506516
Ttest_indResult(statistic=-6.990057580791016, pvalue=3.2055311234786664e-12)
Ttest_indResult(statistic=-1.2085145191885167, pvalue=0.2269208869965165)
Ttest_indResult(statistic=8.25283268285702, pvalue=2.0780353643993687e-16)
'''

#For k = 4000 pairs:
'''
weighted_output_diff_mean: 0.6349704764751658
weighted_output_diff_mean: 0.7323678764374242
weighted_output_diff_mean: 0.7443531690837392
Ttest_indResult(statistic=-8.039483192760208, pvalue=1.0314196747960769e-15)
Ttest_indResult(statistic=-0.9462082682795959, pvalue=0.34407095477764116)
Ttest_indResult(statistic=9.132978517206414, pvalue=8.313281879536956e-20)
'''

weighted_output_diff_mean: 0.6797075231387575
weighted_output_diff_mean: 0.8072925953484142
weighted_output_diff_mean: 0.8310520695506516


Ttest_indResult(statistic=-6.990057580791016, pvalue=3.2055311234786664e-12)

Ttest_indResult(statistic=-1.2085145191885167, pvalue=0.2269208869965165)

Ttest_indResult(statistic=8.25283268285702, pvalue=2.0780353643993687e-16)

'\nweighted_output_diff_mean: 0.5706111110497917\nweighted_output_diff_mean: 0.8227094907606737\nweighted_output_diff_mean: 0.9733518519055834\nTtest_indResult(statistic=-3.186158698149972, pvalue=0.0016753326444008197)\nTtest_indResult(statistic=-1.4845607585644502, pvalue=0.13925046035089672)\nTtest_indResult(statistic=4.50187402005396, pvalue=1.148434950958941e-05)\n'

'\nweighted_output_diff_mean: 0.5492245369812813\nweighted_output_diff_mean: 0.855410962303494\nweighted_output_diff_mean: 0.9662170139455938\nTtest_indResult(statistic=-5.3878319736402736, pvalue=1.223258942032522e-07)\nTtest_indResult(statistic=-1.5222231442615386, pvalue=0.12874727282709594)\nTtest_indResult(statistic=6.568714448336779, pvalue=1.5903457108734867e-10)\n'

'\nweighted_output_diff_mean: 0.5682591627654734\nweighted_output_diff_mean: 0.8411076802132739\nweighted_output_diff_mean: 0.9377390460091319\nTtest_indResult(statistic=-6.899446956084667, pvalue=1.0635232189609764e-11)\nTtest_indResult(statistic=-2.0325040114887387, pvalue=0.04243330658559672)\nTtest_indResult(statistic=8.804525088583166, pvalue=8.069899258103868e-18)\n'

'\nweighted_output_diff_mean: 0.6433807098456905\nweighted_output_diff_mean: 0.8453794642745843\nweighted_output_diff_mean: 0.8888008928649994\nTtest_indResult(statistic=-7.710016543906661, pvalue=1.9692458335445406e-14)\nTtest_indResult(statistic=-1.5003242351795596, pvalue=0.13368843107422554)\nTtest_indResult(statistic=9.1921542332229, pvalue=9.371967377752567e-20)\n'

'\nweighted_output_diff_mean: 0.6797075231387575\nweighted_output_diff_mean: 0.8072925953484142\nweighted_output_diff_mean: 0.8310520695506516\nTtest_indResult(statistic=-6.990057580791016, pvalue=3.2055311234786664e-12)\nTtest_indResult(statistic=-1.2085145191885167, pvalue=0.2269208869965165)\nTtest_indResult(statistic=8.25283268285702, pvalue=2.0780353643993687e-16)\n'

'\nweighted_output_diff_mean: 0.6349704764751658\nweighted_output_diff_mean: 0.7323678764374242\nweighted_output_diff_mean: 0.7443531690837392\nTtest_indResult(statistic=-8.039483192760208, pvalue=1.0314196747960769e-15)\nTtest_indResult(statistic=-0.9462082682795959, pvalue=0.34407095477764116)\nTtest_indResult(statistic=9.132978517206414, pvalue=8.313281879536956e-20)\n'

In [18]:
label_time_diff_list_NE, weighted_label_time_diff_list_NE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_NE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_label_time_dict)
label_time_diff_list_SM, weighted_label_time_diff_list_SM = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_SM, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_label_time_dict)
label_time_diff_list_CE, weighted_label_time_diff_list_CE = compute_diff_output_pairs(k_closest_pairs=k_closest_pairs_CE, PROLIFIC_PID_to_output_dict=PROLIFIC_PID_to_label_time_dict)

#stats.ttest_ind(label_time_diff_list_NE, label_time_diff_list_SM)
#stats.ttest_ind(label_time_diff_list_SM, label_time_diff_list_CE)
#stats.ttest_ind(label_time_diff_list_CE, label_time_diff_list_NE)

stats.ttest_ind(weighted_label_time_diff_list_NE, weighted_label_time_diff_list_SM)
stats.ttest_ind(weighted_label_time_diff_list_SM, weighted_label_time_diff_list_CE)
stats.ttest_ind(weighted_label_time_diff_list_CE, weighted_label_time_diff_list_NE)


# Label time individual unfairness: 
# SM > NE (significant when k = 100, 200, 400, 1000, 2000, 4000)
# CE > NE (significant when k = 100, 200, 400, 1000, 2000, 4000)

#For k = 100 pairs:
'''
weighted_output_diff_mean: 116.00078965277777
weighted_output_diff_mean: 155.62271079166666
weighted_output_diff_mean: 184.6133419513889
Ttest_indResult(statistic=-2.114062514953106, pvalue=0.03576151010740404)
Ttest_indResult(statistic=-1.178385431614034, pvalue=0.24005722931268078)
Ttest_indResult(statistic=3.3026036648825357, pvalue=0.0011365743474422783)
'''

#For k = 200 pairs:
'''
weighted_output_diff_mean: 118.74838106944443
weighted_output_diff_mean: 167.54684451785712
weighted_output_diff_mean: 161.87683057986112
Ttest_indResult(statistic=-3.652919768723226, pvalue=0.0002939469243371378)
Ttest_indResult(statistic=0.3515521357569209, pvalue=0.7253602065172884)
Ttest_indResult(statistic=3.192549326104111, pvalue=0.0015222885210858727)
'''

#For k = 400 pairs:
'''
weighted_output_diff_mean: 117.09176768518519
weighted_output_diff_mean: 169.4160534849537
weighted_output_diff_mean: 180.69123690798614
Ttest_indResult(statistic=-5.793644879357288, pvalue=9.908931356344894e-09)
Ttest_indResult(statistic=-0.978057775172034, pvalue=0.32834215582006643)
Ttest_indResult(statistic=6.344391004135578, pvalue=3.7390991456520775e-10)
'''

#For k = 1000 pairs:
'''
weighted_output_diff_mean: 104.41188085231481
weighted_output_diff_mean: 162.28681441458332
weighted_output_diff_mean: 170.82560791547618
Ttest_indResult(statistic=-10.329991402294493, pvalue=2.0950223010953718e-24)
Ttest_indResult(statistic=-1.2182549390783648, pvalue=0.22327094252640428)
Ttest_indResult(statistic=11.269204186977992, pvalue=1.331567922893053e-28)
'''

#For k = 2000 pairs:
'''
weighted_output_diff_mean: 95.09161596122684
weighted_output_diff_mean: 147.49455279604828
weighted_output_diff_mean: 157.51341821531085
Ttest_indResult(statistic=-13.319800699266953, pvalue=1.22782983440831e-39)
Ttest_indResult(statistic=-2.0634409569813097, pvalue=0.03913515923839124)
Ttest_indResult(statistic=15.76914066550379, pvalue=2.1433603927598545e-54)
'''

#For k = 4000 pairs:
'''
weighted_output_diff_mean: 84.17795434201388
weighted_output_diff_mean: 137.80743589578373
weighted_output_diff_mean: 133.61233400520007
Ttest_indResult(statistic=-19.48215068474814, pvalue=1.2530603511439413e-82)
Ttest_indResult(statistic=1.2843884509979202, pvalue=0.19904327265038285)
Ttest_indResult(statistic=19.900622214708473, pvalue=4.742413246940743e-86)
'''

weighted_output_diff_mean: 95.09161596122684
weighted_output_diff_mean: 147.49455279604828
weighted_output_diff_mean: 157.51341821531085


Ttest_indResult(statistic=-13.31980069926695, pvalue=1.22782983440831e-39)

Ttest_indResult(statistic=-2.0634409569813097, pvalue=0.03913515923839124)

Ttest_indResult(statistic=15.76914066550379, pvalue=2.1433603927598545e-54)

'\nweighted_output_diff_mean: 116.00078965277777\nweighted_output_diff_mean: 155.62271079166666\nweighted_output_diff_mean: 184.6133419513889\nTtest_indResult(statistic=-2.114062514953106, pvalue=0.03576151010740404)\nTtest_indResult(statistic=-1.178385431614034, pvalue=0.24005722931268078)\nTtest_indResult(statistic=3.3026036648825357, pvalue=0.0011365743474422783)\n'

'\nweighted_output_diff_mean: 118.74838106944443\nweighted_output_diff_mean: 167.54684451785712\nweighted_output_diff_mean: 161.87683057986112\nTtest_indResult(statistic=-3.652919768723226, pvalue=0.0002939469243371378)\nTtest_indResult(statistic=0.3515521357569209, pvalue=0.7253602065172884)\nTtest_indResult(statistic=3.192549326104111, pvalue=0.0015222885210858727)\n'

'\nweighted_output_diff_mean: 117.09176768518519\nweighted_output_diff_mean: 169.4160534849537\nweighted_output_diff_mean: 180.69123690798614\nTtest_indResult(statistic=-5.793644879357288, pvalue=9.908931356344894e-09)\nTtest_indResult(statistic=-0.978057775172034, pvalue=0.32834215582006643)\nTtest_indResult(statistic=6.344391004135578, pvalue=3.7390991456520775e-10)\n'

'\nweighted_output_diff_mean: 104.41188085231481\nweighted_output_diff_mean: 162.28681441458332\nweighted_output_diff_mean: 170.82560791547618\nTtest_indResult(statistic=-10.329991402294493, pvalue=2.0950223010953718e-24)\nTtest_indResult(statistic=-1.2182549390783648, pvalue=0.22327094252640428)\nTtest_indResult(statistic=11.269204186977992, pvalue=1.331567922893053e-28)\n'

'\nweighted_output_diff_mean: 95.09161596122684\nweighted_output_diff_mean: 147.49455279604828\nweighted_output_diff_mean: 157.51341821531085\nTtest_indResult(statistic=-13.319800699266953, pvalue=1.22782983440831e-39)\nTtest_indResult(statistic=-2.0634409569813097, pvalue=0.03913515923839124)\nTtest_indResult(statistic=15.76914066550379, pvalue=2.1433603927598545e-54)\n'

'\nweighted_output_diff_mean: 84.17795434201388\nweighted_output_diff_mean: 137.80743589578373\nweighted_output_diff_mean: 133.61233400520007\nTtest_indResult(statistic=-19.48215068474814, pvalue=1.2530603511439413e-82)\nTtest_indResult(statistic=1.2843884509979202, pvalue=0.19904327265038285)\nTtest_indResult(statistic=19.900622214708473, pvalue=4.742413246940743e-86)\n'