# Evaluate Naturalness Survey

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st
import seaborn as sns 

In [2]:
df = pd.read_csv("Batch_4365949_batch_results_2.csv")

## Convert rating to numbers

In [3]:
label_to_num = {
    "Excellent - Completely natural speech" : 5,
    "Good - Mostly natural speech": 4,
    "Fair - Equally natural and unnatural speech": 3,
    "Poor - Mostly unnatural speech" : 2,
    "Bad - Completely unnatural speech" : 1
}

In [4]:
df_copy = df.copy()
df_copy["naturalness"] = df_copy["Answer.audio-naturalness.label"].apply(lambda x: label_to_num[x])

## Seperate our results from Face2Speech results

In [11]:
ours = df_copy[df_copy["Input.audio_url"].str.contains("bjoernpl/ThesisSurveyFiles/blob/main/ours/")]
theirs = df_copy[df_copy["Input.audio_url"].str.contains("DeNA/Face2Speech//blob/master/docs")]
both = pd.DataFrame({
    "ours" : ours["naturalness"].value_counts(),
    "theirs" : theirs["naturalness"].value_counts()
}).sort_index()
both

Unnamed: 0,ours,theirs
1,9,23
2,41,78
3,181,186
4,295,255
5,87,75


## Calculate Mean and 95% confidence interval

In [16]:
m = ours["naturalness"].mean()
l,t = st.t.interval(alpha=0.95, df=len(ours)-1, loc=m, scale=st.sem(ours["naturalness"])) 

print(f"Mean naturalness rating: {float(m):.2f} +- {float(m-l):.2f}")

Mean naturalness rating: 3.67 +- 0.07


In [15]:
m = theirs["naturalness"].mean()
l,t = st.t.interval(alpha=0.95, df=len(theirs)-1, loc=m, scale=st.sem(theirs["naturalness"])) 

print(f"Mean naturalness rating: {float(m):.2f} +- {float(m-l):.2f}")

Mean naturalness rating: 3.46 +- 0.08
