In [36]:
## Script for computing the split-half correlation of the STS3k dataset
# Run with python 3.9 base
# James Fodor 2024

import pandas as pd
import numpy as np
import math
from sklearn.metrics import cohen_kappa_score

from scipy import stats

In [3]:
# Load the Excel file
data_T_all = pd.read_excel('Data-experiment\STS3k_all_participant_data.xlsx')

# Filter rows where Adversarial is equal to 'NADV'
data_T_NADV = data_T_all[data_T_all['Adversarial'] == 'NADV']
data_T_ADV = data_T_all[data_T_all['Adversarial'] == 'ADV']

data_T_all

Unnamed: 0,Main ID,Adversarial,Sentence pair,1,2,3,4,5,6,7,...,494,495,496,497,498,499,500,501,Mean,Std
0,1,NADV,A predominant concept distinguishes pain from ...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.123752,0.532998
1,2,NADV,The characteristic theme accelerates the predo...,1.0,1.0,1.0,1.0,5.0,1.0,1.0,...,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.439122,0.963809
2,3,NADV,The absurdity of society intrinsically justifi...,1.0,1.0,1.0,2.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.427146,0.927072
3,4,NADV,The purpose of sincerity anticipates repeated ...,1.0,1.0,1.0,1.0,5.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.111776,0.488879
4,5,NADV,Modern capabilities opposite cultural denotati...,2.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.241517,0.788865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,2796,NADV,Computers change quickly in the modern world.\...,,,,,,,,...,,,,,,,,,3.095238,1.476958
2796,2797,NADV,The president commences talks with the prime m...,,,,,,,,...,,,,,,,,,1.100000,0.300000
2797,2798,NADV,The children ran out of the house.\nThe dog ch...,,,,,,,,...,,,,,,,,,1.714286,1.160577
2798,2799,NADV,Potatoes grow well in most climates.\nCarrots ...,,,,,,,,...,,,,,,,,,1.238095,0.425918


In [48]:
# Function to actually perform the split-half computation
def split_half_Spearman_Brown(data, num_splits, remove_ten):
    
    # Prepare the rating data
    data_ratings = data.iloc[:, 3:-2].values
    if remove_ten==True:
        data_ratings = np.delete(data_ratings, slice(0, 10), axis=0)  # Exclude the first 10 pairs
    data_ratings = data_ratings.astype(float)  # Convert the data from string to float

    # Check the dimensions
    num_sentences = data_ratings.shape[0]
    print('Number of the sentences =', num_sentences)
    num_subjects = data_ratings.shape[1]
    print('Number of the subjects =', num_subjects)
    
    # Compute split-half reliability with n different splits
    split_half_correls = []
    cohens_kappas = []

    for split_num in np.arange(num_splits):
        # Random split of the subjects
        idx_subjects_random = np.random.permutation(num_subjects)
        idx_subjects_group1 = idx_subjects_random[:num_subjects // 2]
        idx_subjects_group2 = idx_subjects_random[num_subjects // 2:]

        # Average rating in group 1
        data_tmp = data_ratings[:, idx_subjects_group1]
        data_mean_group1 = []
        data_mean_norm_group1 = []
        for row in data_tmp:
            tmp_data_mean = np.nanmean(row)
            tmp_data_mean_norm = (tmp_data_mean - 1) / 6  # Normalization is applied
            data_mean_group1.append(tmp_data_mean)
            data_mean_norm_group1.append(tmp_data_mean_norm)
        data_mean_group1 = np.array(data_mean_group1)
        data_mean_norm_group1 = np.array(data_mean_norm_group1)

        # Average rating in group 2
        data_tmp = data_ratings[:, idx_subjects_group2]
        data_mean_group2 = []
        data_mean_norm_group2 = []
        for row in data_tmp:
            tmp_data_mean = np.nanmean(row)
            tmp_data_mean_norm = (tmp_data_mean - 1) / 6  # Normalization is applied
            data_mean_group2.append(tmp_data_mean)
            data_mean_norm_group2.append(tmp_data_mean_norm)
        data_mean_group2 = np.array(data_mean_group2)
        data_mean_norm_group2 = np.array(data_mean_norm_group2)
        
        # Compute cohen's kappa
        data_round_group1 = np.array([math.ceil(x) if math.isnan(x)==False else 0 for x in data_mean_group1])
        data_round_group2 = np.array([math.ceil(x) if math.isnan(x)==False else 0 for x in data_mean_group2])
        cohens_kappa = cohen_kappa_score(data_round_group1, data_round_group2, weights='linear')
        cohens_kappas.append(cohens_kappa)
        
        # Correlation between groups 1 and 2
        correl = np.corrcoef(data_mean_norm_group1, data_mean_norm_group2)[0,1]
        split_half_correls.append(correl)
        
    # Apply Spearman–Brown prediction formula
    mean_split_half_correl = np.nanmean(split_half_correls)
    corrected_mean_split_half_correl = 2*mean_split_half_correl/(1+mean_split_half_correl)
    print('Split-half correlation: {:.3f}'.format(corrected_mean_split_half_correl))
    
    mean_cohens_kappa = np.nanmean(cohens_kappas)
    corrected_mean_cohens_kappa = 2*mean_cohens_kappa/(1+mean_cohens_kappa)
    print('Cohens kappa: {:.3f}'.format(corrected_mean_cohens_kappa))
    
    return corrected_mean_split_half_correl, corrected_mean_cohens_kappa, split_half_correls, cohens_kappas

In [49]:
# Compute the value for three splits of the dataset
all_msh, all_cohens, all_correls, all_cohens_kappas = split_half_Spearman_Brown(data_T_all, 100, True)
NADV_msh, NADV_cohens, NADV_correls, NADV_kappas = split_half_Spearman_Brown(data_T_NADV, 100, True)
ADV_msh, ADV_cohens, ADV_correls, ADV_kappas = split_half_Spearman_Brown(data_T_ADV, 100, False)

Number of the sentences = 2790
Number of the subjects = 501
Split-half correlation: 0.953
Cohens kappa: 0.832
Number of the sentences = 1054
Number of the subjects = 501
Split-half correlation: 0.950
Cohens kappa: 0.825
Number of the sentences = 1736
Number of the subjects = 501


  tmp_data_mean = np.nanmean(row)


Split-half correlation: 0.939
Cohens kappa: 0.804


In [77]:
# Compute t-statistic for differences of correlations
t_statistic, p_value = stats.ttest_ind(NADV_correls, ADV_correls)
print('t-statistic:', t_statistic)  
print('p-value:', p_value)

t-statistic: 35.28301545387555
p-value: 2.457508755211328e-87


In [130]:
## Resampling analysis for ONLY_MOD sentence pairs
places = [515,520,531,544,552,560,568,580,592,630,638,656,665,676,689,695,699,715,721,726,738,743,747,755,759,766,771,786,791,799]
times = [514,516,537,543,549,565,569,573,577,581,588,604,605,608,614,641,645,666,670,675,684,701,711,716,720,725,734,739,754,762,783,785]
manner = [555,564,599,610,619,625,636,659,681,685,710,730,742,758,773,787,805]
IOBJ = [511,521,528,533,538,542,548,553,586,591,600,609,620,631,696,706,736,748,767,777,782]
SUBJ_adj = [512,517,522,526,529,535,540,546,550,556,561,566,571,578,582,585,587,593,598,601,606,612,617,621,626,632,637,642,647,651,652,657,661,667,672,677,682,690,692,700,702,708,719,724,727,731,732,735,740,746,752,756,760,763,769,772,778,784,789,792,796,802,804,808]
DOBJ_adj = [524,530,536,539,547,554,558,563,567,572,574,576,583,590,594,597,603,611,616,624,629,634,639,646,648,653,662,668,674,678,683,688,693,697,704,709,713,718,722,741,749,751,757,770,776,781,790,794,797,803,809]
IOBJ_adj = [518,559,575,635,643,655,658,664,671,679,714,717,723,765,793]
passive = [525,534,584,595,615,623,628,660,691,705,744,806]

data_ratings = data_T_all.iloc[:, 3:-2].values

num_subjects = 501
num_resamples = 100

for subset in [places, times, manner, IOBJ, SUBJ_adj, DOBJ_adj, IOBJ_adj, passive]:
    resampled_means = []
    for x in np.arange(num_resamples):
        set_of_subjects = np.random.choice(np.arange(0,num_subjects), size=num_subjects, replace=True)
        mean_subset = (np.nanmean(data_ratings[np.ix_(subset,set_of_subjects)], axis=1)-1)/6
        resampled_means.append(mean_subset)
        
    # t_statistic, p_value = stats.ttest_1samp(1-np.mean(resampled_means,axis=1), 0.22)
    t_statistic, p_value = stats.ttest_1samp(1-np.array(resampled_means).flatten(), 0.22)
    print(1-np.mean(resampled_means))
    print('t-statistic:', t_statistic)
    print('p-value:', p_value)

0.18035074007553298
t-statistic: -32.310958004698584
p-value: 8.536387047157436e-197
0.2192170476165174
t-statistic: -0.49113368370383176
p-value: 0.6233655900150772
0.22205916583562024
t-statistic: 1.379506868394236
p-value: 0.16792006515286023
0.2581748432732137
t-statistic: 13.020268002854674
p-value: 2.513688017178855e-37
0.24070054247790307
t-statistic: 17.533050198598207
p-value: 2.9370972801420706e-67
0.21510000237793014
t-statistic: -3.2627919155559084
p-value: 0.0011104707936341511
0.245185392972449
t-statistic: 5.5939997889199615
p-value: 2.633516211089287e-08
0.13620257779215839
t-statistic: -20.14760571933381
p-value: 5.516960271691878e-78
