# Questionnaire processing

In [None]:
import csv
import json
import numpy as np
import os
import random

In [None]:
questionnaire_csv = './Space Engineers AI Spaceship Generator Questionnaire.csv'
file_uploads_dir = './Space Engineers AI Spaceship Generator Questionnaire (File responses)'
metrics_dir = os.path.join(file_uploads_dir, 'metrics')
comparator_dir = os.path.join(file_uploads_dir, 'comparator')

questions_place = {
    'A': 'Solution satisfaction',
    'B': 'System efficiency',
    'C': 'Runtime response',
    'D': 'User fatigue'
} 

scale = ['', 'Poor', 'Fair', 'Good', 'Excellent']

samples_name = ['Human', 'Random', 'Greedy', 'Contextual Bandit']

experiments = {
    'Human': {
        'solution_satisfaction': [],
        'system_efficiency': [],
        'runtime_response': [],
        'user_fatigue': [],
        'time_elapsed_emitter': [],
        'n_interactions': [],
        'avg_complexity': [],
        'n_solutions_feas': [],
        'n_solutions_infeas': [],
        'scores': []
    },
    'Random': {
        'solution_satisfaction': [],
        'system_efficiency': [],
        'runtime_response': [],
        'user_fatigue': [],
        'time_elapsed_emitter': [],
        'n_interactions': [],
        'avg_complexity': [],
        'n_solutions_feas': [],
        'n_solutions_infeas': [],
        'scores': []
    },
    'Greedy': {
        'solution_satisfaction': [],
        'system_efficiency': [],
        'runtime_response': [],
        'user_fatigue': [],
        'time_elapsed_emitter': [],
        'n_interactions': [],
        'avg_complexity': [],
        'n_solutions_feas': [],
        'n_solutions_infeas': [],
        'scores': []
    },
    'Contextual Bandit': {
        'solution_satisfaction': [],
        'system_efficiency': [],
        'runtime_response': [],
        'user_fatigue': [],
        'time_elapsed_emitter': [],
        'n_interactions': [],
        'avg_complexity': [],
        'n_solutions_feas': [],
        'n_solutions_infeas': [],
        'scores': []
    },
}

user_friendliness = []
feedbacks = []
n_volunteers = 0

In [None]:
needs_anonymizing = False

In [None]:
def order_by_rng(rng_seed):
    random.seed(rng_seed)
    my_emitterslist = samples_name.copy()
    random.shuffle(my_emitterslist)
    return my_emitterslist

## Anonymize file uploads

Files in `file_uploads_dir` are metrics and configurations ranking, but contain the name of the uploader.

In [None]:
if needs_anonymizing:
    for subdir, ext in zip([metrics_dir, comparator_dir], ['', '.json']):
        files = os.listdir(subdir)
        for f in files:
            prefix, _ = f.split(' - ')
            # ext = name_ext.split('.')[1]
            os.rename(os.path.join(subdir, f),
                      os.path.join(subdir, f'{prefix}{ext}'))

In [None]:
# files = os.listdir(comparator_dir)
# for f in files:
#     with open(os.path.join(comparator_dir, f), 'r') as fin:
#         content = fin.read()
#     with open(os.path.join(comparator_dir, f), 'w') as fout:
#         content = content.replace('\'', '\"')
#         fout.write(content)

## Load the `csv` file

In [None]:
with open(questionnaire_csv, newline='', encoding='utf8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
    
    for row in reader:
        rng_seed = int(row['Please insert your ID here'])
        
        try:
            with open(os.path.join(comparator_dir, f'{rng_seed}_res.json'), 'r') as f:
                scores = json.loads(f.read())
            
            with open(os.path.join(metrics_dir, f'user_metrics_{rng_seed}')) as f:
                metrics = json.loads(f.read())
            
            experiments_order = order_by_rng(rng_seed=rng_seed)
            for i, v in enumerate(experiments_order):
                experiments[v]['solution_satisfaction'].append(int(row[f'({i + 1}) Solution satisfaction']))
                experiments[v]['system_efficiency'].append(int(row[f'({i + 1}) System variety']))
                experiments[v]['runtime_response'].append(int(row[f'({i + 1}) Runtime response']))
                experiments[v]['user_fatigue'].append(int(row[f'({i + 1}) Fatigue']))            
                experiments[v]['scores'].append(int(scores[v]))
                for metric in metrics.keys():
                    ms = metrics[metric]
                    k = samples_name.index(v)
                    if isinstance(ms, list):
                        m = ms[k]
                    else:
                        m = ms.get(str(k))
                    avg_m = np.average(m)
                    experiments[v][metric].append(avg_m)
                    
            user_friendliness.append(int(row['Ease of use']))
            feedbacks.append(row['Please provide any additional feedback here'])
            
            n_volunteers += 1
        except FileNotFoundError as e:
            print('Skipped', rng_seed)
            pass


In [None]:
import numpy as np

print('### Experiments feedback ###')
print(f'Average user-friendliness: {np.mean(user_friendliness)}')
print('Additional feedbacks: ')
for f in feedbacks:
    if f:
        print(f)
print('###        -----        ###')

In [None]:
n_volunteers

In [None]:
samples = [np.asarray(experiments[v]['scores']) for v in samples_name]

## Plots

In [None]:
from pcgsepy.setup_utils import setup_matplotlib

setup_matplotlib(type3_fix=False,
                 larger_fonts=True)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

feedback_keys = ['solution_satisfaction', 'system_efficiency', 'runtime_response', 'user_fatigue']

### Feedback per experiment

In [None]:
user_feedback = {v:{k: {i: 0 for i in range(len(scale))} for k in feedback_keys} for v in samples_name}

for sample in samples_name:
    for k in feedback_keys:
        for v in experiments[sample][k]:
            user_feedback[sample][k][v] += 1

for sample in samples_name:    
    axd = plt.figure(constrained_layout=True).subplot_mosaic(
        """
        AB
        CD
        """
    )

    for i, (plot_idx, metric) in enumerate(questions_place.items()):
        axd[plot_idx].bar(np.arange(1, len(scale)), [user_feedback[sample][metric.replace(' ', '_').lower()][j] for j in range(1, len(scale))], 1, color='lightblue', alpha=0.75)
        axd[plot_idx].set_xticklabels(scale)
        axd[plot_idx].set_yticks(np.arange(0, n_volunteers + 1, n_volunteers // 5))
        axd[plot_idx].set_title(questions_place[plot_idx])
        axd[plot_idx].grid()

    # plt.suptitle(f'Questionnaire responses distribution ({sample} emitter)')

    plt.savefig(f'./plots/questionnaire-plots-{sample.replace(" ", "_").lower()}')
    
    plt.show()

### Average feedback

In [None]:
plt.rc('xtick', labelsize=14)

In [None]:
avg_response = {v:{k: np.mean(experiments[v][k]) for k in feedback_keys} for v in samples_name}

for sample in samples_name:
    plt.figure(figsize=(10,4))
    plt.bar(questions_place.values(), [avg_response[sample][k] for k in feedback_keys])
    plt.ylim(0, len(feedback_keys))
    # plt.xticks(rotation = 45)
    # plt.title(f'Average category score ({sample} emitter)')
    plt.savefig(f'./plots/avg-category-score-{sample.replace(" ", "_").lower()}')
    plt.show()

### Rankings

In [None]:
plt.rc('xtick', labelsize=16) 

In [None]:
from pcgsepy.stats.plots import plot_rankings
from pcgsepy.stats.plots import plot_scores

display_names = [x.replace('Contextual Bandit', 'Cont. Param.') for x in samples_name]

plot_rankings(samples=samples,
              labels=['1st place', '2nd place', '3rd place', '4th place'],
              names=display_names,
              title='',
              filename='./plots/emitters-rankings')

plot_scores(samples=samples,
            names=display_names,
            score_to_value={1: 4, 2: 3, 3: 2, 4: 1},
            title='',
            filename='./plots/emitters-score')

In [None]:
samples

In [None]:
for name, sample in zip(samples_name, samples):
    print(name)
    print(f'#1: {np.sum([1 if x == 1 else 0 for x in sample])}')
    print(f'#2: {np.sum([1 if x == 2 else 0 for x in sample])}')
    print(f'#3: {np.sum([1 if x == 3 else 0 for x in sample])}')
    print(f'#4: {np.sum([1 if x == 4 else 0 for x in sample])}')

### Metrics

In [None]:
avg_time_elapsed_emitter = [np.mean(experiments[v]['time_elapsed_emitter']) for v in samples_name]


plt.bar(display_names, avg_time_elapsed_emitter)
plt.ylim(0, max(avg_time_elapsed_emitter))
# plt.xticks(rotation = 45)
plt.savefig('./plots/avg-time-elapsed')
plt.show()

In [None]:
avg_n_interactions = [np.mean(experiments[v]['n_interactions']) for v in samples_name]

plt.bar(display_names, avg_n_interactions)
plt.ylim(0, max(avg_n_interactions))
# plt.xticks(rotation = 45)
plt.savefig('./plots/avg_n_interactions')
plt.show()

In [None]:
avg_complexity = [np.mean(experiments[v]['avg_complexity']) for v in samples_name]

plt.bar(display_names, avg_complexity)
plt.ylim(0, max(avg_complexity))
# plt.xticks(rotation = 45)
plt.savefig('./plots/avg_complexity')
plt.show()

In [None]:
n_solutions_feas = [np.mean(experiments[v]['n_solutions_feas']) for v in samples_name]
n_solutions_infeas = [np.mean(experiments[v]['n_solutions_infeas']) for v in samples_name]

n_solutions = [nf + ninf for nf, ninf in zip(n_solutions_feas, n_solutions_infeas)]

plt.bar(display_names, n_solutions)
plt.ylim(0, max(n_solutions))
# plt.xticks(rotation = 45)
plt.savefig('./plots/n_solutions')
plt.show()

## LaTeX tables

In [None]:
table_header = """
\\begin{table}[!t]
        \\centering
        \\resizebox{.5\\textwidth}{!}{%
        \\begin{tabular}{|l|l|cccc|c|}
        \\hline
        \\multicolumn{1}{|c|}{\\multirow{2}{*}{\\textbf{Emitter}}} & \\multirow{2}{*}{Metric} & \\multicolumn{4}{c|}{Score} & \\multirow{2}{*}{Rank} \\\\
        \\multicolumn{1}{|c|}{} &  & Poor & Fair & Good & Excellent &  \\\\ \\hline
"""

table_body = []

table_body_module = """
        \\multirow{4}{*}{EMITTERNAME} & Solution Satisfaction & SSPOOR & SSFAIR & SSGOOD & SSEXCELLENT & SSRANK \\\\
         & System Efficiency & SEPOOR & SEFAIR & SEGOOD & SEEXCELLENT & SERANK \\\\
         & Runtime Response & RRPOOR & RRFAIR & RRGOOD & RREXCELLENT & RRRANK \\\\
         & User fatigue & UFPOOR & UFFAIR & UFGOOD & UFEXCELLENT & UFRANK \\\\ \\hline
"""

table_footer = """
        \\end{tabular}%
        }
        \\caption{Questionnaire results obtained with N candidates using the different emitters.}
        \\label{tab:questionnaire-res}
    \\end{table}
""".replace('N', str(n_volunteers))

In [None]:
feedback_scores = {}
feedback_ranks = {}

for sample in samples_name:
    feedback_scores[sample] = {}
    feedback_ranks[sample] = {}
    for k in feedback_keys:
        d = user_feedback[sample][k]
        score = np.sum(np.multiply(list(d.keys()), list(d.values())))
        feedback_scores[sample][k] = score

In [None]:
for k in feedback_keys:
    emitters = sorted(samples_name, key=lambda x: feedback_scores[x][k], reverse=True)
    for sample in samples_name:
        feedback_ranks[sample][k] = emitters.index(sample) + 1

In [None]:
for sample in samples_name:
    emitter_table = table_body_module.replace('EMITTERNAME', sample)
    
    emitter_table = emitter_table.replace('SSPOOR', str(user_feedback[sample]['solution_satisfaction'][1]))
    emitter_table = emitter_table.replace('SSFAIR', str(user_feedback[sample]['solution_satisfaction'][2]))
    emitter_table = emitter_table.replace('SSGOOD', str(user_feedback[sample]['solution_satisfaction'][3]))
    emitter_table = emitter_table.replace('SSEXCELLENT', str(user_feedback[sample]['solution_satisfaction'][4]))
    emitter_table = emitter_table.replace('SSRANK', str(feedback_ranks[sample]['solution_satisfaction']))
    
    emitter_table = emitter_table.replace('SEPOOR', str(user_feedback[sample]['system_efficiency'][1]))
    emitter_table = emitter_table.replace('SEFAIR', str(user_feedback[sample]['system_efficiency'][2]))
    emitter_table = emitter_table.replace('SEGOOD', str(user_feedback[sample]['system_efficiency'][3]))
    emitter_table = emitter_table.replace('SEEXCELLENT', str(user_feedback[sample]['system_efficiency'][4]))
    emitter_table = emitter_table.replace('SERANK', str(feedback_ranks[sample]['system_efficiency']))
    
    emitter_table = emitter_table.replace('RRPOOR', str(user_feedback[sample]['runtime_response'][1]))
    emitter_table = emitter_table.replace('RRFAIR', str(user_feedback[sample]['runtime_response'][2]))
    emitter_table = emitter_table.replace('RRGOOD', str(user_feedback[sample]['runtime_response'][3]))
    emitter_table = emitter_table.replace('RREXCELLENT', str(user_feedback[sample]['runtime_response'][4]))
    emitter_table = emitter_table.replace('RRRANK', str(feedback_ranks[sample]['runtime_response']))
    
    emitter_table = emitter_table.replace('UFPOOR', str(user_feedback[sample]['user_fatigue'][1]))
    emitter_table = emitter_table.replace('UFFAIR', str(user_feedback[sample]['user_fatigue'][2]))
    emitter_table = emitter_table.replace('UFGOOD', str(user_feedback[sample]['user_fatigue'][3]))
    emitter_table = emitter_table.replace('UFEXCELLENT', str(user_feedback[sample]['user_fatigue'][4]))
    emitter_table = emitter_table.replace('UFRANK', str(feedback_ranks[sample]['user_fatigue']))
    
    table_body.append(emitter_table)

In [None]:
table_body = '\n'.join(table_body)
print(f'{table_header}\n{table_body}\n{table_footer}')

## Tests

### Shapiro-Wilk

In [None]:
from pcgsepy.stats.tests import shapiro_wilk

print('## SHAPIRO-WILK TEST ##')

shapiro_test = shapiro_wilk(samples=samples)

for (stat, pvalue), name in zip(shapiro_test, samples_name):
    print(f'Result score for {name}:')
    print(f'\tStatistic: {stat}')
    print(f'\tp-value: {pvalue}')

### One-way ANOVA

In [None]:
import itertools
from pcgsepy.stats.tests import anova, THRESHOLD_PVALUE

print('## ONE-WAY ANOVA TEST ##')

anova_test = anova(samples=samples)

overall_stat, overall_pvalue = anova_test[0]

print('Overall score:')
print(f'\tStatistic: {overall_stat}')
print(f'\tp-value: {overall_pvalue}')

if overall_pvalue < THRESHOLD_PVALUE:
    for (stat, pvalue), name in zip(anova_test[1:], itertools.combinations(iterable=samples_name, r=2)):
        print(f'Result score for {" x ".join(list(name))}:')
        print(f'\tStatistic: {stat}')
        print(f'\tp-value: {pvalue}')

### Kruskal-Wallis

In [None]:
import itertools
from pcgsepy.stats.tests import kruskal_wallis

print('## KRUSKAL-WALLIS TEST ##')

kruskal_wallis_test = kruskal_wallis(samples=samples)

overall_stat, overall_pvalue = kruskal_wallis_test[0]

print('Overall score:')
print(f'\tStatistic: {overall_stat}')
print(f'\tp-value: {overall_pvalue}')

if overall_pvalue < THRESHOLD_PVALUE:
    for (stat, pvalue), name in zip(kruskal_wallis_test[1:], itertools.combinations(iterable=samples_name, r=2)):
        print(f'Result score for {" x ".join(list(name))}:')
        print(f'\tStatistic: {stat}')
        print(f'\tp-value: {pvalue}')

### T-test

In [None]:
from scipy.stats import ttest_ind

first_places = []
for sample in samples:
    onehot_sample = []
    for elem in sample:
        if elem == 4:
            onehot_sample.append(1)
        else:
            onehot_sample.append(0)
    first_places.append(onehot_sample)

ttest_ind(first_places[1], first_places[3])