In [4]:
import xlsxwriter
import os
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats as stats
import numpy as np

def descriptiveStatsModel2to3():
    #read data from excel sheet
    df=pd.read_excel('./data.xlsx')

    #filter samples
    df['Equally_Sized_Quartile'], equally_sized_bins = pd.qcut(df['ABS_EstAI2GT'], q=3, labels=["Q1", "Q2", "Q3"], retbins=True)
    print("Equally Sized Quartile Thresholds:", equally_sized_bins)
    #Result: Q1=correct 0-5%, Q2=minor error 6-12%, Q3=severe error 13-79% -> Q2 & Q3 = incorrect
    #distances from the baseline and AI-assisted estimate to the GT exceed 5% (incorrect assessment Q2 & Q3), indicating a notable bias/ error
    df = df[(df.ABS_EstB2GT > 5) & (df.ABS_EstAI2GT > 5)]
    dfi = df
    
    print(np.median(df["ABS_PredAI2EstB"]))
    
    #Model2
    #for the AI prediction to be considered congruent, its distance from the GT also has to exceed 5%
    df = df[df.ABS_PredAI2GT > 5]
    #distances from EstB2GT, PredAI2GT, EstAI2GT all must have the same sign to suggest a consistent tendency for over-/ underestimation between human and machine
    df = df[(df.PredAI2GT * df.EstB2GT)>0]
    df = df[((df.EstB2GT * df.EstAI2GT)>0) & ((df.PredAI2GT * df.EstAI2GT)>0)]
    #for the AI prediction to be considered congruent, it also has to be close to the baseline estimate
    #here the dataset is split based on the median distance from the AI predictions to the baseline estimates
    df = df[df.PredAI2EstB.abs() <= 15]
    
    #Model3
    #for the AI prediction to be considered incongruent, it has to be further away from the baseline estimate
    dfi = dfi[dfi.PredAI2EstB.abs() > 15]
    
    # Model2: descriptive statistics
    print("Model 2")
    n = df['confidenceScore_EstAI'].count()
    print(n)
    avg_conf = df['confidenceScore_EstAI'].mean()
    print(avg_conf)
    avg_JAS = df['JAS'].mean()
    print(avg_JAS)
    
    # Model3: descriptive statistics 
    print("Model 3")
    ni = dfi['confidenceScore_EstAI'].count()
    print(ni)
    avg_confi = dfi['confidenceScore_EstAI'].mean()
    print(avg_confi)
    avg_JASi = dfi['JAS'].mean()
    print(avg_JASi)

def GeneraldescriptiveStats():
    #read data from excel sheet
    df=pd.read_excel('./data.xlsx')

    print("All samples")
    n = df['confidenceScore_EstAI'].count()
    print(n)
    avg_conf = df['confidenceScore_EstAI'].mean()
    print(avg_conf)
    avg_JAS = df['JAS'].mean()
    print(avg_JAS)

    #presence of time pressure during AI-aided TCP assessment dummy coded as: 0 = without time pressure, 1 = with time pressure
    #No time pressure
    dfnoTP = df[df.TP == 0]
    print("No time pressure during AI-assisted assessment")
    n = dfnoTP['confidenceScore_EstAI'].count()
    print(n)
    avg_conf = dfnoTP['confidenceScore_EstAI'].mean()
    print(avg_conf)
    avg_JAS = dfnoTP['JAS'].mean()
    print(avg_JAS)
    #Time pressure
    dfTP = df[df.TP == 1]
    print("Time pressure during AI-assisted assessment")
    n = dfTP['confidenceScore_EstAI'].count()
    print(n)
    avg_conf = dfTP['confidenceScore_EstAI'].mean()
    print(avg_conf)
    avg_JAS = dfTP['JAS'].mean()
    print(avg_JAS)

    
descriptiveStatsModel2to3()
GeneraldescriptiveStats()

Equally Sized Quartile Thresholds: [ 0.  5. 12. 79.]
15.0
Model 4
78
3.871794871794872
0.5547859911912774
Model 5
139
3.2446043165467624
0.49369275004628316
All samples
560
3.6375
0.5205759909823003
No time pressure during AI-assisted assessment
280
3.65
0.489440819111538
Time pressure during AI-assisted assessment
280
3.625
0.5517111628530629
