# Data processor

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import itertools
from pcgsepy.stats.tests import anova, THRESHOLD_PVALUE
from pcgsepy.stats.tests import kruskal_wallis
from pcgsepy.stats.plots import plot_rankings
from pcgsepy.stats.plots import plot_scores

In [None]:
n_volunteers = 10
n_questions = 5
n_categories = 4

questions_place = {
    'A': 'Solution satisfaction',
    'B': 'System efficiency',
    'C': 'User-friendliness',
    'D': 'Runtime response',
    'E': 'User fatigue'
} 

scale = ['', 'Poor', 'Fair', 'Good', 'Excellent']

samples_name = ['random-emitter', 'preference-matrix-emitter', 'contextual-bandit-emitter']

## Load and preprocess data

In [None]:
questionnaire_data = 'dummy-questionnaire.csv'
scores_data = 'dummy-scores.csv'

questionnaire_responses = np.loadtxt(questionnaire_data, delimiter=',', dtype=np.uint8)
scores_responses = np.loadtxt(scores_data, delimiter=',', dtype=np.uint8)

responses = np.zeros(shape=((n_questions, n_categories)), dtype=np.uint8)
for row in questionnaire_responses:
    for i, elem in enumerate(row):
        responses[i][elem - 1] = responses[i][elem - 1] + 1

samples = [row for row in scores_responses]

## Questionnaire plots

In [None]:
axd = plt.figure(constrained_layout=True).subplot_mosaic(
    """
    AB
    CD
    E.
    """
)

for i in range(len(list(questions_place.keys()))):
    plot_idx = list(questions_place.keys())[i]
    axd[plot_idx].bar(np.arange(n_categories), responses[i], 1, color='lightblue', alpha=0.75)
    axd[plot_idx].set_xticklabels(scale)
    axd[plot_idx].set_yticks(np.arange(0, n_volunteers + 1, 1))
    axd[plot_idx].set_title(questions_place[plot_idx])
    axd[plot_idx].grid()

# plt.suptitle('Questionnaire responses distribution')

plt.show()

In [None]:
avg_response = np.mean(questionnaire_responses, axis=0)

plt.bar(questions_place.values(), avg_response)
plt.ylim(0, n_categories)
plt.xticks(rotation = 45)
# plt.title('Average category score')
plt.show()

## Shapiro-Wilk test

In [None]:
from pcgsepy.stats.tests import shapiro_wilk

print('## SHAPIRO-WILK TEST ##')

shapiro_test = shapiro_wilk(samples=samples)

for (stat, pvalue), name in zip(shapiro_test, samples_name):
    print(f'Result score for {name}:')
    print(f'\tStatistic: {stat}')
    print(f'\tp-value: {pvalue}')

## One-way ANOVA test

In [None]:
print('## ONE-WAY ANOVA TEST ##')

anova_test = anova(samples=samples)

overall_stat, overall_pvalue = anova_test[0]

print('Overall score:')
print(f'\tStatistic: {overall_stat}')
print(f'\tp-value: {overall_pvalue}')

if overall_pvalue < THRESHOLD_PVALUE:
    for (stat, pvalue), name in zip(anova_test[1:], itertools.combinations(iterable=samples_name, r=2)):
        print(f'Result score for {" x ".join(list(name))}:')
        print(f'\tStatistic: {stat}')
        print(f'\tp-value: {pvalue}')

## Kruskal-Wallis H-test

In [None]:
print('## KRUSKAL-WALLIS TEST ##')

kruskal_wallis_test = kruskal_wallis(samples=samples)

overall_stat, overall_pvalue = kruskal_wallis_test[0]

print('Overall score:')
print(f'\tStatistic: {overall_stat}')
print(f'\tp-value: {overall_pvalue}')

if overall_pvalue < THRESHOLD_PVALUE:
    for (stat, pvalue), name in zip(kruskal_wallis_test[1:], itertools.combinations(iterable=samples_name, r=2)):
        print(f'Result score for {" x ".join(list(name))}:')
        print(f'\tStatistic: {stat}')
        print(f'\tp-value: {pvalue}')

## Ranking plots

In [None]:
plot_rankings(samples=samples,
              labels=['3rd place', '2nd place', '1st place'],
              names=samples_name,
              title='')

plot_scores(samples=samples,
            names=samples_name,
            score_to_value={1: 1, 2: 0.5, 3: 0.25},
            title=''
            )