In [1]:
import numpy as np
import pandas as pd
from confidence_intervals import evaluate_with_conf_int
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error, root_mean_squared_error


densemos = pd.read_csv('targets_outputs.csv')
densemos_960 = pd.read_csv('targets_outputs_960.csv')
test_nisqa = pd.read_csv('test_nisqa_merged.csv')
NISQA_finetuned = pd.read_csv('NISQA_custom_results.csv')
NISQA_finetuned_paired = pd.merge(test_nisqa, NISQA_finetuned, on='stimuli_y')
nisqa_v1 = pd.read_csv('NISQA_v1_results.csv')

#get speaker column for NISQA_v1
nisqa_v1["speaker"] = nisqa_v1["stimuli"].apply(lambda x: x.split('/')[-2])

In [2]:
nisqa_v1.to_csv("delete.csv")

In [3]:
nisqa_v1_targets = nisqa_v1['mos_pred']
nisqa_v1_outputs = nisqa_v1['mos']
nisqa_v1_speakers = nisqa_v1['speaker'] 


nisqa_targets = NISQA_finetuned_paired['mos_pred']
nisqa_outputs = NISQA_finetuned_paired['mos_y']
nisqa_speakers = NISQA_finetuned_paired['speaker']

targets = densemos['target']
outputs = densemos['output']
speakers = densemos['condition']

targets_960 = densemos_960['target']
outputs_960 = densemos_960['output']
speakers_960 = densemos_960['condition']

In [4]:
nisqa_v1_pcc_ci = evaluate_with_conf_int(nisqa_v1_targets, lambda x, y: pearsonr(x, y)[0], nisqa_v1_outputs, num_bootstraps=1000, conditions=nisqa_v1_speakers)
nisqa_v1_pcc_ci


(0.7061387032981716, (-0.0011534038067503136, 0.8344992718990856))

In [13]:
nisqa_pcc_ci = evaluate_with_conf_int(nisqa_targets, lambda x, y: pearsonr(x, y)[0], nisqa_outputs, conditions=nisqa_speakers)
nisqa_pcc_ci


(0.727660349867634, (0.021844197763322127, 0.8731473070912424))

In [11]:
nisqa_v1_pcc_ci = evaluate_with_conf_int(nisqa_v1_targets, lambda x, y: pearsonr(x, y)[0], nisqa_v1_outputs)
nisqa_v1_mae_ci = evaluate_with_conf_int(nisqa_v1_targets, mean_absolute_error, nisqa_v1_outputs, conditions=nisqa_v1_speakers)
nisqa_v1_rmse_ci = evaluate_with_conf_int(nisqa_v1_targets, root_mean_squared_error, nisqa_v1_outputs, conditions=nisqa_v1_speakers)

In [6]:
nisqa_pcc_ci = evaluate_with_conf_int(nisqa_targets, lambda x, y: pearsonr(x, y)[0], nisqa_outputs)
nisqa_mae_ci = evaluate_with_conf_int(nisqa_targets, mean_absolute_error, nisqa_outputs, conditions=nisqa_speakers)
nisqa_rmse_ci = evaluate_with_conf_int(nisqa_targets, root_mean_squared_error, nisqa_outputs, conditions=nisqa_speakers)

In [7]:
densemos_pcc_ci = evaluate_with_conf_int(targets, lambda x, y: pearsonr(x, y)[0], outputs, conditions=speakers)
densemos_mae_ci = evaluate_with_conf_int(targets, mean_absolute_error, outputs, conditions=speakers)
densemos_rmse_ci = evaluate_with_conf_int(targets, root_mean_squared_error, outputs, conditions=speakers)

densemos_960_pcc_ci = evaluate_with_conf_int(targets_960, lambda x, y: pearsonr(x, y)[0], outputs_960, conditions=speakers_960)
densemos_960_mae_ci = evaluate_with_conf_int(targets_960, mean_absolute_error, outputs_960, conditions=speakers_960)
densemos_960_rmse_ci = evaluate_with_conf_int(targets_960, root_mean_squared_error, outputs_960, conditions=speakers_960)

# CI Results

In [9]:
# print ci for each model and each metric
print("NISQA v1 PCC CI: ", (round(nisqa_v1_pcc_ci[0], 2), (round(nisqa_v1_pcc_ci[1][0], 2), round(nisqa_v1_pcc_ci[1][1], 2))))
print("NISQA v1 MAE CI: ", (round(nisqa_v1_mae_ci[0], 2), (round(nisqa_v1_mae_ci[1][0], 2), round(nisqa_v1_mae_ci[1][1], 2))))
print("NISQA v1 RMSE CI: ", (round(nisqa_v1_rmse_ci[0], 2), (round(nisqa_v1_rmse_ci[1][0], 2), round(nisqa_v1_rmse_ci[1][1], 2))))
print()
print("NISQA finetuned PCC CI: ", (round(nisqa_pcc_ci[0], 2), (round(nisqa_pcc_ci[1][0], 2), round(nisqa_pcc_ci[1][1], 2))))
print("NISQA finetuned MAE CI: ", (round(nisqa_mae_ci[0], 2), (round(nisqa_mae_ci[1][0], 2), round(nisqa_mae_ci[1][1], 2))))
print("NISQA finetuned RMSE CI: ", (round(nisqa_rmse_ci[0], 2), (round(nisqa_rmse_ci[1][0], 2), round(nisqa_rmse_ci[1][1], 2))))
print()
print("DenseMOS PCC CI: ", (round(densemos_pcc_ci[0], 2), (round(densemos_pcc_ci[1][0], 2), round(densemos_pcc_ci[1][1], 2))))
print("DenseMOS MAE CI: ", (round(densemos_mae_ci[0], 2), (round(densemos_mae_ci[1][0], 2), round(densemos_mae_ci[1][1], 2))))
print("DenseMOS RMSE CI: ", (round(densemos_rmse_ci[0], 2), (round(densemos_rmse_ci[1][0], 2), round(densemos_rmse_ci[1][1], 2))))
print()
print("DenseMOS 960 PCC CI: ", (round(densemos_960_pcc_ci[0], 2), (round(densemos_960_pcc_ci[1][0], 2), round(densemos_960_pcc_ci[1][1], 2))))
print("DenseMOS 960 MAE CI: ", (round(densemos_960_mae_ci[0], 2), (round(densemos_960_mae_ci[1][0], 2), round(densemos_960_mae_ci[1][1], 2))))
print("DenseMOS 960 RMSE CI: ", (round(densemos_960_rmse_ci[0], 2), (round(densemos_960_rmse_ci[1][0], 2), round(densemos_960_rmse_ci[1][1], 2))))

NISQA v1 PCC CI:  (0.71, (0.65, 0.75))
NISQA v1 MAE CI:  (0.99, (0.8, 1.22))
NISQA v1 RMSE CI:  (1.19, (0.95, 1.42))

NISQA finetuned PCC CI:  (0.73, (0.67, 0.78))
NISQA finetuned MAE CI:  (0.81, (0.6, 1.02))
NISQA finetuned RMSE CI:  (1.06, (0.79, 1.29))

DenseMOS PCC CI:  (0.62, (0.4, 0.75))
DenseMOS MAE CI:  (0.81, (0.69, 0.93))
DenseMOS RMSE CI:  (1.07, (0.92, 1.22))

DenseMOS 960 PCC CI:  (0.6, (0.38, 0.73))
DenseMOS 960 MAE CI:  (0.8, (0.69, 0.93))
DenseMOS 960 RMSE CI:  (1.07, (0.94, 1.23))
