In [22]:
require(forcats)

In [23]:
# load data from file
data = read.csv("~/research_data/mozilla_voice_comparison/ppt_data.csv")

data = as.data.frame(lapply(data, function (x) if (is.factor(x)) factor(x) else x))
    
data$rate.speed = ordered(data$rate.speed, levels = c("Much too fast", "Too fast", "Just right",
                                                      "Too slow", "Much too slow"))
    
data$rate.speed.simple = data$rate.speed
levels(data$rate.speed.simple) = c('Too fast','Too fast','Just right','Too slow','Too slow')
                            
data$rate.experience = ordered(data$rate.experience, levels = c("Excellent", "Good", "OK", "Poor", "Very poor"))
data$rate.experience.simple = data$rate.experience
levels(data$rate.experience.simple) = c('Good','Good','OK','Poor','Poor')
                            
data$mean.opinion = as.numeric(factor(data$rate.experience, levels=rev(levels(data$rate.experience))))

In [24]:
experience = as.data.frame.matrix(round(prop.table(table(data$voice, data$rate.experience.simple), margin = 1), digits = 3)*100)
experience = merge(aggregate(rate.again ~ voice, data=data, FUN = "median"), experience, by.y=0, by.x = 'voice')
experience = merge(aggregate(mean.opinion ~ voice, data=data, FUN = "mean"), experience, by = 'voice')
experience$mean.opinion = round((experience$mean.opinion/5), digits = 3)
experience$rate.again = (experience$rate.again/10)

In [25]:
include = c('voice', 'mean.opinion','rate.again','Good','Poor')
experience = experience[,include]
header = c('Voice','MPS','Repeat','% pos','% neg')
names(experience) = header

In [26]:
rank = experience[order(-experience$`% pos`),]
rank$pos_rank = rank(-rank$`% pos`)
rank = rank[order(-experience$MPS),]
rank$mos_rank = rank(-rank$MPS)
rank$rank_diff = rank$pos_rank - rank$mos_rank
result = merge(experience, rank[c('Voice','rank_diff')], by = 'Voice')
result[order(-result$rank_diff),]

Unnamed: 0_level_0,Voice,MPS,Repeat,% pos,% neg,rank_diff
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
19,Voicery Nichole,0.691,0.6,43.9,8.8,6
8,Judy GL1,0.704,0.3,48.2,14.3,4
20,Windows Female,0.7,0.5,47.8,15.2,4
11,Judy Wave2,0.737,0.6,54.9,7.8,3
14,Mozilla TTS:Nancy2,0.678,0.2,34.1,9.8,3
5,Google A,0.682,0.4,47.1,15.7,1
1,.Abe,0.78,0.7,75.5,4.1,0
3,.Jofish,0.847,0.8,89.1,1.8,0
4,Android UK Male,0.583,0.1,29.2,41.7,0
6,Google C,0.736,0.7,59.6,8.5,0


In [28]:
rank = experience
rank = rank[order(-rank$MPS),]
rank$rank = rank(-rank$MPS)
rank[order(rank$Voice),]

Voice,MPS,Repeat,% pos,% neg,rank
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
.Abe,0.78,0.7,75.5,4.1,2
.Janice,0.686,0.3,49.0,12.2,13
.Jofish,0.847,0.8,89.1,1.8,1
Android UK Male,0.583,0.1,29.2,41.7,20
Google A,0.682,0.4,47.1,15.7,15
Google C,0.736,0.7,59.6,8.5,5
iOS Female,0.557,0.0,21.6,37.3,21
Judy GL1,0.704,0.3,48.2,14.3,10
Judy GL2,0.668,0.3,52.0,20.0,17
Judy Wave1,0.776,0.7,68.3,12.2,3


In [8]:
summary(aov(data$mean.opinion ~ data$voice))

              Df Sum Sq Mean Sq F value Pr(>F)    
data$voice    21  113.7   5.414   6.163 <2e-16 ***
Residuals   1068  938.2   0.878                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [10]:
kruskal.test(rate.experience ~ voice, data = data)


	Kruskal-Wallis rank sum test

data:  rate.experience by voice
Kruskal-Wallis chi-squared = 113.06, df = 21, p-value = 1.316e-14


In [11]:
wilcox.test(experience$`% pos`, experience$MPS, paired = TRUE)


	Wilcoxon signed rank test

data:  experience$`% pos` and experience$MPS
V = 231, p-value = 9.537e-07
alternative hypothesis: true location shift is not equal to 0


In [12]:
wilcox.test(experience$`% neg`, experience$MPS, paired = TRUE)


	Wilcoxon signed rank test

data:  experience$`% neg` and experience$MPS
V = 231, p-value = 9.537e-07
alternative hypothesis: true location shift is not equal to 0
