In [2]:
import pandas as pd
from scipy.stats import pearsonr
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [3]:
# read merged_final.csv
subjective_results = pd.read_csv('subjective_results.csv')
subjective_results.head()


Unnamed: 0,participant_id,age,gender_participant,country,province,education,headphone_brand,stimuli,score,stimuli_group,stimuli_service,gender_stimuli,dialect
0,ymxfxn696we9rp1tnnub3f,26,P,AR,Buenos Aires,5,samsung,E/E2/arf_00610_00913913795.wav,5.0,E2,open_srl_es_ar_female_2,F,ARG
1,ymxfxn696we9rp1tnnub3f,26,P,AR,Buenos Aires,5,samsung,E/E2/arf_00610_00546762557.wav,5.0,E2,open_srl_es_ar_female_2,F,ARG
2,9fig36z1cml41m265ckifw,25,M,AR,Buenos Aires,4,Sony,E/E2/arf_00610_00621699750.wav,5.0,E2,open_srl_es_ar_female_2,F,ARG
3,vj735xlt2yj805wyn5rimq,59,F,AR,Buenos Aires,2,,E/E2/arf_00610_00912390302.wav,4.0,E2,open_srl_es_ar_female_2,F,ARG
4,vj735xlt2yj805wyn5rimq,59,F,AR,Buenos Aires,2,,E/E2/arf_00610_01772510223.wav,4.0,E2,open_srl_es_ar_female_2,F,ARG


# Gender preference analysis

In [4]:
# convert gender to categorical 
gender_mapping = {'M': 0, 'F': 1, "P": 2, "X": 3}

In [5]:
subjective_results['gender_participant'] = subjective_results['gender_participant'].map(gender_mapping)
subjective_results['gender_stimuli'] = subjective_results['gender_stimuli'].map(gender_mapping)

# Calculate Pearson correlation coefficient and p-value
correlation, p_value = pearsonr(subjective_results['gender_participant'], subjective_results['gender_stimuli'])

# Display results
print(f"Pearson Correlation Coefficient: {correlation}")
print(f"P-value: {p_value}")

# Check if the correlation is statistically significant (common threshold is 0.05)
if p_value < 0.05:
    print("The correlation is statistically significant.")
else:
    print("There is no statistically significant correlation.")

Pearson Correlation Coefficient: -0.01864310931614816
P-value: 0.18568660106658505
There is no statistically significant correlation.


# education analysis

In [6]:
model = ols('score ~ education', data=subjective_results).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)


                sum_sq      df         F    PR(>F)
education     0.262773     1.0  0.150944  0.697651
Residual   8772.212926  5039.0       NaN       NaN


In [7]:
model = ols('score ~ education', data=subjective_results).fit()
summary = model.summary()
print(summary)


                            OLS Regression Results                            
Dep. Variable:                  score   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                    0.1509
Date:                Thu, 18 Jan 2024   Prob (F-statistic):              0.698
Time:                        23:39:29   Log-Likelihood:                -8549.2
No. Observations:                5041   AIC:                         1.710e+04
Df Residuals:                    5039   BIC:                         1.712e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.7074      0.046     58.492      0.0

# age analysis

In [8]:
# sort subjective_results by age
subjective_results.sort_values(by=['age'], inplace=True)

subjective_results.head()

Unnamed: 0,participant_id,age,gender_participant,country,province,education,headphone_brand,stimuli,score,stimuli_group,stimuli_service,gender_stimuli,dialect
1025,qutvhu57mnvi6fizy1t3,2,1,CO,,1,test,E/E4/arm_08784_00640197534.wav,5.0,E4,open_srl_es_ar_male_1,0,ARG
2121,qutvhu57mnvi6fizy1t3,2,1,CO,,1,test,C/C2/fiona2_65.wav,3.0,C2,PollyN_Fiona,1,CAST
929,qutvhu57mnvi6fizy1t3,2,1,CO,,1,test,A/A5/tts_dev_thomas_19.wav,1.0,A5,tts_dewhitte,1,ES
2122,qutvhu57mnvi6fizy1t3,2,1,CO,,1,test,C/C2/fiona2_86.wav,4.0,C2,PollyN_Fiona,1,CAST
17,qutvhu57mnvi6fizy1t3,2,1,CO,,1,test,E/E2/arf_00610_00848604729.wav,5.0,E2,open_srl_es_ar_female_2,1,ARG


# average score by group

In [9]:
# Group by stimuli_group and calculate the average score, include stimuli_name
average_scores = subjective_results.groupby('stimuli_group')['score'].mean().reset_index()

# save the average scores to a csv file
average_scores.to_csv('average_scores.csv', index=False)