In [68]:
import pandas as pd
import os

from scipy.stats import shapiro, ttest_ind, mannwhitneyu

In [69]:
base = os.path.dirname(os.getcwd())
stats_path = os.path.join(base, 'csv files', 'For_Stats.csv')

stats_df = pd.read_csv(stats_path)

In [70]:
same = stats_df[stats_df['is_same'] == 1]['judge_goe']
not_same = stats_df[stats_df['is_same'] == 0]['judge_goe']

In [71]:
nx, ny = len(same), len(not_same)
print(f"Sample sizes: {nx}, {ny}")

Sample sizes: 16088, 193531


In [72]:
sample_0 = same.sample(5000, random_state = 42)
sample_1 = not_same.sample(5000, random_state = 42)

In [73]:
stat_0, p_0 = shapiro(sample_0)
stat_1, p_1 = shapiro(sample_1)

In [74]:
print(f"Group is_same=0: p={p_0:.5f}, {'Normal Distribution' if p_0 > 0.05 else 'Not Normal Distribution'}")
print(f"Group is_same=1: p={p_1:.5f}, {'Normal Distribution' if p_0 > 0.05 else 'Not Normal Distribution'}")

Group is_same=0: p=0.00000, Not Normal Distribution
Group is_same=1: p=0.00000, Not Normal Distribution


In [75]:
U1, p = mannwhitneyu(same, not_same, alternative="two-sided")
U2 = nx * ny - U1

In [76]:
print(f"\nMann-Whitney U-Test: U1_stat={U1:.4f}, p={p}")
print(f"\nMann-Whitney U-Test: U2_stat={U2:.4f}")


Mann-Whitney U-Test: U1_stat=1706749269.0000, p=7.594038389084582e-95

Mann-Whitney U-Test: U2_stat=1406777459.0000


In [77]:
stats_df

Unnamed: 0,rank,name,nation,startnr,total,tech,pcs,deductions,competition,element,...,panel_median,judge_id,judge_goe,judge_nation,is_same,judge_name,goe_dist,higher,lower,pt_bias
0,1,Michal BREZINA,CZE,26,89.77,48.05,41.72,0.0,ec2020SEG001OF,4S+2T,...,2.0,Judge No.1,1.0,GEO,0,Salome CHIGOGIDZE,1.0,0,1,0
1,1,Michal BREZINA,CZE,26,89.77,48.05,41.72,0.0,ec2020SEG001OF,3F,...,3.0,Judge No.1,2.0,GEO,0,Salome CHIGOGIDZE,1.0,0,1,0
2,1,Michal BREZINA,CZE,26,89.77,48.05,41.72,0.0,ec2020SEG001OF,FSSp4,...,3.0,Judge No.1,3.0,GEO,0,Salome CHIGOGIDZE,0.0,0,0,0
3,1,Michal BREZINA,CZE,26,89.77,48.05,41.72,0.0,ec2020SEG001OF,3A,...,3.0,Judge No.1,3.0,GEO,0,Salome CHIGOGIDZE,0.0,0,0,0
4,1,Michal BREZINA,CZE,26,89.77,48.05,41.72,0.0,ec2020SEG001OF,StSq4,...,4.0,Judge No.1,2.0,GEO,0,Salome CHIGOGIDZE,2.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209614,24,Meda VARIAKOJYTE,LTU,1,88.83,48.22,40.61,0.0,wc2025SEG004OF,2A+2T,...,0.0,Judge No.9,0.0,FRA,0,Florence VUYLSTEKER,0.0,0,0,0
209615,24,Meda VARIAKOJYTE,LTU,1,88.83,48.22,40.61,0.0,wc2025SEG004OF,3Sq+2T,...,-5.0,Judge No.9,-5.0,FRA,0,Florence VUYLSTEKER,0.0,0,0,0
209616,24,Meda VARIAKOJYTE,LTU,1,88.83,48.22,40.61,0.0,wc2025SEG004OF,LSp2,...,1.0,Judge No.9,-1.0,FRA,0,Florence VUYLSTEKER,2.0,0,1,0
209617,24,Meda VARIAKOJYTE,LTU,1,88.83,48.22,40.61,0.0,wc2025SEG004OF,StSq3,...,0.0,Judge No.9,-2.0,FRA,0,Florence VUYLSTEKER,2.0,0,1,0
