# Task 3: Do Video Quality, Video Fragmentation, Video Unclearness, and Video Discontinuity build the same general construct? What is their internal consistency? If not, which combination does?

In [1]:
# install.packages('dplyr')      # processing 
# install.packages('gdata')      # file reading
# install.packages('psych')      # Cronbach Alpha for internal consistency

In [2]:
library(dplyr)      # processing
library(readxl)     # reading in data
library(psych)      # Cronbach Alpha library
options(warn=-1)    


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



# Wrapper functions and global mean decision

In [3]:
# Global adjustable variable if we were not sure about dealing with multiple answers per participant
# every participant is now several times in used data set
# if TRUE: MOS per participant used

use_mean_for_cronbach <- FALSE

In [4]:
# function to evaluate Cronbach alpha values

# source: Quality & Usability Seminar, TU Berlin, 
# slides for reliability, December 2019

get_internal_consistency_label <- function(alpha) {
    if(alpha < 0.5) {
        return('unacceptable')
    } else if(alpha < 0.6) {
        return('poor')
    } else if(alpha < 0.7) {
        return('questionable')
    } else if(alpha < 0.8) {
        return('acceptable')
    } else if(alpha < 0.9) {
        return('good')
    } else {
        return('excellent')
    }
}

In [5]:
# reads in initial data set based

get_initial_data <- function() {
    data <- read_excel('datasets/DB01_gaming_video_quality_dataset.xlsx')
    if(use_mean_for_cronbach) {
        res <- data %>% 
                    group_by(PID) %>% 
                    summarise(meanVQ = mean(VQ),
                              meanVF = mean(VF),
                              meanVU = mean(VU),
                              meanVD = mean(VD)) %>%
                    rename(VQ = meanVQ, 
                           VF = meanVF, 
                           VU = meanVU, 
                           VD = meanVD) %>%
                    select(VQ,VF,VU,VD)
    } else {
        res <- data %>% select(VQ,VF,VU,VD)
    }
    res
}

In [6]:
# function that returns data frame without passed scales hindering reliability excluded

get_updated_data <- function (data, exclude_cols=NULL) {
    if(missing(exclude_cols) || is.null(exclude_cols)) {
        return(data)
    } else {
        return(data %>% select(-exclude_cols)) # exclude passed column to increase Cronbach alpha
    }
}

In [7]:
# prints Cronbach alpha values if items were removed from our data set

print_results_if_scale_removed <- function(input) {
    current_alpha <- alpha(input)[[1]] %>% select(raw_alpha)
    cronbach_if_scale_dropped <- alpha(input)[[2]] %>% 
                                select(raw_alpha) %>% 
                                rename(Cronbach_If_Scale_Dropped = raw_alpha)

    cronbach_if_scale_dropped
}

In [8]:
# prints current formatted Cronbach alpha value

print_current_results <- function(input) {
    if(ncol(input) == 1) {
        input
    } else {
        current_alpha <- alpha(input)[[1]] %>% select(raw_alpha)
        current_alpha <- round(current_alpha, digits=5)
        
        return(
            paste0('Current Cronbach alpha (', 
           paste(rownames(alpha(input)[[2]]), collapse=', '), 
           ') : ', current_alpha, ' => ', 
           get_internal_consistency_label(current_alpha), 
           ' internal consistency')
        )
    }
}

# Reading in initial data set with VQ, VF, VU & VD scales + internal consistency results

In [9]:
# print initial data set
data <- get_initial_data()

current_data <- get_updated_data(data, exclude_cols = NULL)
current_data

VQ,VF,VU,VD
1.4,1.3,1.6,5.7
1.2,1.2,4.6,6.3
2.5,2.3,2.8,4.4
2.0,3.0,2.0,4.8
2.4,3.0,2.0,5.5
2.0,2.2,2.0,4.0
2.0,2.0,2.0,6.0
1.5,1.2,1.2,5.6
2.4,2.3,2.6,5.3
2.3,2.8,2.0,5.7


In [10]:
# print internal consistency analysis results for current scales

print_current_results(current_data)
print_results_if_scale_removed(current_data)

Unnamed: 0,Cronbach_If_Scale_Dropped
VQ,0.5253684
VF,0.6560034
VU,0.6500011
VD,0.829821


# Remove "most lucrative scale" to improve internal consistency + internal consistency results

In [11]:
# remove first item from data frame to increase Cronbach's alpha

current_data <- get_updated_data(current_data, exclude_cols = c('VD'))
current_data

VQ,VF,VU
1.4,1.3,1.6
1.2,1.2,4.6
2.5,2.3,2.8
2.0,3.0,2.0
2.4,3.0,2.0
2.0,2.2,2.0
2.0,2.0,2.0
1.5,1.2,1.2
2.4,2.3,2.6
2.3,2.8,2.0


In [12]:
print_current_results(current_data)
print_results_if_scale_removed(current_data)

Unnamed: 0,Cronbach_If_Scale_Dropped
VQ,0.6172722
VF,0.8502522
VU,0.8247029


# Repeat scale removal to increase internal consistency

In [13]:
# remove second item from data frame to increase Cronbach's alpha

current_data <- get_updated_data(current_data, exclude_cols = c('VF'))
current_data

VQ,VU
1.4,1.6
1.2,4.6
2.5,2.8
2.0,2.0
2.4,2.0
2.0,2.0
2.0,2.0
1.5,1.2
2.4,2.6
2.3,2.0


In [14]:
# print internal consistency analysis results for current scales

print_current_results(current_data)
print_results_if_scale_removed(current_data)

Unnamed: 0,Cronbach_If_Scale_Dropped
VQ,0.7460601
VU,0.5566057


#### Note: Removing any other attribute from the _current_ data set would diminish our reliability!

# Just to be 100% certain: check if greedy approach yields greatest internal consistency

In [15]:
# Note: Cronbach Alpha would not be computable & desirable with only one item left!

print_current_results(get_updated_data(data, exclude_cols = NULL))
print_current_results(get_updated_data(data, exclude_cols = c('VD')))
print_current_results(get_updated_data(data, exclude_cols = c('VF')))
print_current_results(get_updated_data(data, exclude_cols = c('VQ')))
print_current_results(get_updated_data(data, exclude_cols = c('VU')))
print_current_results(get_updated_data(data, exclude_cols = c('VD', 'VF')))
print_current_results(get_updated_data(data, exclude_cols = c('VD', 'VU')))
print_current_results(get_updated_data(data, exclude_cols = c('VD', 'VQ')))
print_current_results(get_updated_data(data, exclude_cols = c('VF', 'VQ')))
print_current_results(get_updated_data(data, exclude_cols = c('VF', 'VU')))
print_current_results(get_updated_data(data, exclude_cols = c('VQ', 'VU')))

#### => Previously calculated scale combination of VQ, VU has greatest internal consistency: greedy approach indeed led to desired result!