In [2]:
%load_ext autoreload
%autoreload 2

In [119]:
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from gps_accuracy.gps_accuracy import GpxResult
from gps_accuracy.gps_accuracy import GpxEvaluator
import pandas as pd
from pandas import DataFrame
import itertools
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import List, Dict, Tuple
import natsort

def parse_csv(path_to_csv: Path) -> DataFrame:
    with open(path_to_csv) as csv_file:
        return pd.read_csv(csv_file)

class InputType(Enum):
    Touch = 1
    TUI = 2
    
class Metaphor(Enum):
    Gesture = 1
    Joystick = 2
    Car =  3

class InputCombination(Enum):
    TouchGesture = 1
    TouchJoystick = 2
    TuiJoystick = 3
    TuiCar = 4

    def __repr__(self):
        return self.name
    
    @staticmethod
    def build(input_type: InputType, metaphor: Metaphor):
        if input_type == InputType.Touch:
            if metaphor == Metaphor.Gesture:
                return InputCombination.TouchGesture
            if metaphor == Metaphor.Joystick:
                return InputCombination.TouchJoystick
        if input_type == InputType.TUI:
            if metaphor == Metaphor.Joystick:
                return InputCombination.TuiJoystick
            if metaphor == Metaphor.Car:
                return InputCombination.TuiCar
            
class RankCategory(Enum):
    Fastest = 1
    MostAccurate = 2
    Ranking = 3

class InputFilter(Enum):
    InputAll = 1
    InputCategorized = 2
    
class ResultParam(Enum):
    Time = 1
    MeanError = 2
    MedianError = 3
    PercentileError = 4
    Distance = 5
    DeltaDistance = 6
    ZoomMin = 7
    ZoomMax = 8
    ZoomMean = 9
    ZoomChange = 10

@dataclass
class UsabilityPerType:
    input_combination: InputCombination
    answers: Dict[str, str]
    
def parse_input_answer(answer: str) -> Tuple[InputCombination]:
    if answer == "Touch - Geste" or answer == "Touch - Gesten":
        return InputCombination.build(InputType.Touch, Metaphor.Gesture)
    elif answer == "Touch - Joystick":
        return InputCombination.build(InputType.Touch, Metaphor.Joystick)
    elif answer == "Tangible - Joystick":
        return InputCombination.build(InputType.TUI, Metaphor.Joystick)
    elif answer == "Tangible - Auto":
        return InputCombination.build(InputType.TUI, Metaphor.Car)
        
@dataclass
class QuestionnaireResult:
    user_id: int
    age: int
    sequence: List[Tuple[InputCombination]]
    usage_frequency: Dict[str, str]
    usabilities: List[UsabilityPerType]
    fastest: Dict[str, Tuple[InputCombination]]
    most_accurate: Dict[str, Tuple[InputCombination]]
    ranking: List[InputCombination]
    
    def __init__(self, answers):
        self.questions_per_input = 8
        self.start_indices = {
            "TouchGesture" : 10,
            "TouchJoystick": 19,
            "TuiCar": 28,
            "TuiJoystick" : 37
        }
        self.user_id = answers.iloc[1]
        self.age = answers.iloc[2]
        self.sequence = self._get_sequence(answers)
        self.usage_frequency = self._get_usage_frequency(answers)
        self.usabilities = self._get_usabilities(answers)
        self.fastest = self._get_fastest(answers)
        self.most_accurate = self._get_most_accurate(answers)
        self.ranking = self._get_ranking(answers)
    
    def _get_sequence(self, answers):
        sequence = []
        next_input = answers.iloc[9]
        while next_input != "Fertig":
            parsed_answer = parse_input_answer(next_input)
            sequence.append(parsed_answer)
            next_index = self.start_indices[f"{parsed_answer.name}"] + self.questions_per_input
            next_input = answers.iloc[next_index]
        return sequence
    
    def _get_usage_frequency(self, answers) -> Dict[str, str]:
        return {"Smartphone": answers.iloc[3], "Tablet": answers.iloc[4], "Multitouch-Tisch": answers.iloc[5],
                  "Tangibles": answers.iloc[6], "Videospiele": answers.iloc[7]}
    
    def _get_usabilities(self, answers) -> List[UsabilityPerType]:
        usabilities = []
        for input_combination in InputCombination:
            key = f"{input_combination.name}"
            if key not in self.start_indices.keys():
                continue
            
            start_index = self.start_indices[key]
            _answers = {}
            for index in range(start_index, start_index + self.questions_per_input):
                question = answers.index[index]
                answer = answers[question]
                _answers[question.split("[")[0].strip()] = answer
            usabilities.append(UsabilityPerType(input_combination, _answers))
        return usabilities
    
    def _get_fastest(self, answers):
        result = {}
        for track, i in enumerate(range(46, 49)):
            result[f"Track {track+1}"] = parse_input_answer(answers.iloc[i])
        return result
    
    def _get_most_accurate(self, answers):
        result = {}
        for track, i in enumerate(range(49, 52)):
            result[f"Track {track + 1}"] = parse_input_answer(answers.iloc[i])
        return result
    
    def _get_ranking(self, answers):
        result = {}
        for rank, i in enumerate(range(52, 56)):
            result[f"Platz {rank + 1}"] = (parse_input_answer(answers.iloc[i]))
        return result
    
@dataclass
class QuestionnaireRepository:
    results: List[QuestionnaireResult]
    def __init__(self):
        path_to_csv = Path('questionnaire_results/Fragebogen Masterarbeit.csv')
        self.data_frame: DataFrame  = parse_csv(path_to_csv)
        self.results = self.parse_data_frame(self.data_frame)
        data = {
            'UserId': [result.user_id for result in self.results],
            'Age': [result.age for result in self.results],
            'Sequence': [result.sequence for result in self.results],
            RankCategory.Fastest.name : [result.fastest for result in self.results],
            RankCategory.MostAccurate.name: [result.most_accurate for result in self.results],
            RankCategory.Ranking.name: [result.ranking for result in self.results],
            'UsageFrequency': [result.usage_frequency for result in self.results],
        }
        self.data_frame = pd.DataFrame(data)
    
    def parse_data_frame(self, data_frame: DataFrame) -> List[QuestionnaireResult]:
        return [QuestionnaireResult(data_frame.loc[index]) for index in data_frame.index]
        
    def get_by_user(self, user_id: int) -> QuestionnaireResult:
        return [result for result in self.results if result.user_id == user_id][0]
    
    
class QuestionnairePlotter:
    def __init__(self):
        self.repo = QuestionnaireRepository()
        
    def summary(self):
        return self.repo.data_frame.style.format()
        
    def print_sequence(self):
        for result in self.results:
            print(f"UserId: {result.user_id} -> {result.sequence}")
    
    def plot_rankings(self, category: RankCategory):
        key_value_counts = defaultdict(lambda: defaultdict(int))
        for d in self.repo.data_frame[category.name]:
            for key, value in d.items():
                key_value_counts[key][value] += 1
        
        num_keys = len(key_value_counts)    
        fig, axes = plt.subplots(nrows=num_keys, figsize=(8, 4 * num_keys))
        for ax, (key, value_counts) in zip(axes, key_value_counts.items()):
            ax.yaxis.set_major_locator(MaxNLocator(integer=True))
            x_values = [e.name for e in value_counts.keys()]
            y_values = list(value_counts.values())
            ax.bar(x_values, y_values)
            ax.set_title(f"{key}")
            ax.set_ylabel("Frequency")


################################################################


@dataclass
class ReferenceTrack:
    track_id: int
    file: Path
    
    def __init__(self, file_path: Path):
        self.track_id = int(file_path.stem)
        self.file = file_path

@dataclass
class RecordedTrack:
    track_id: int
    user_id: int
    input_type: InputType
    metaphor: Metaphor
    file: Path
    result: GpxResult
    
    def __init__(self, file_path: Path):
        file_name = file_path.stem
        parts = file_name.split("_")
        self.user_id: int = int(parts[0])
        self.track_id: int = int(parts[1])
        self.input_type: InputType = InputType[parts[2]]
        self.metaphor: Metaphor = Metaphor[parts[3]]
        self.file: Path = file_path
    
    def evaluate(self, reference_track: ReferenceTrack):
        evaluator = GpxEvaluator(reference_track.file, self.file)
        self.result = evaluator.evaluate()
              
@dataclass
class TrackRepository:
    reference_tracks: dict
    recorded_tracks: List[RecordedTrack]
    
    def __init__(self, user_ids: List[int] = None):
        reference_track_list = [ReferenceTrack(track_file) for track_file in Path("reference_tracks").iterdir() if track_file.is_file()]
        self.reference_tracks = {track.track_id: track for track in reference_track_list}
        self.recorded_track_pathes = [track_path for track_path in Path("recorded_tracks").iterdir() if track_path.is_file()]
        self.recorded_track_pathes = natsort.natsorted(self.recorded_track_pathes)
        self.recorded_tracks = [RecordedTrack(track_file) for track_file in self.recorded_track_pathes]
        self._evaluate()
        self.question_repo = QuestionnaireRepository()
        tracks = list(itertools.chain(*[self.get_by_user(user_id) for user_id in user_ids])) if user_ids else self.get_all()
        data = {
            'UserId': [track.user_id for track in tracks],
            'Track':  [track.track_id for track in tracks],
            'InputAll': [f"{track.input_type.name}_{track.metaphor.name}" for track in tracks],
            'InputCategorized': [track.input_type.name for track in tracks],
            ResultParam.Time.name: [track.result.time for track in tracks],
            ResultParam.MeanError.name: [track.result.error_mean for track in tracks],
            ResultParam.MedianError.name: [track.result.error_median for track in tracks],
            ResultParam.PercentileError.name: [track.result.error_percentile for track in tracks],
            ResultParam.Distance.name: [track.result.distance for track in tracks],
            ResultParam.DeltaDistance.name: [track.result.delta_distance for track in tracks],
            ResultParam.ZoomMin.name: [track.result.zoom_min for track in tracks],
            ResultParam.ZoomMax.name: [track.result.zoom_max for track in tracks],
            ResultParam.ZoomMean.name: [track.result.zoom_mean for track in tracks],
            ResultParam.ZoomChange.name: [track.result.zoom_change for track in tracks],
        }
        self.data_frame = pd.DataFrame(data)
    
    def _evaluate(self):
        for track in self.recorded_tracks:
            reference_track = self.reference_tracks[track.track_id]
            track.evaluate(reference_track)
    
    def get_recorded_pathes(self) -> List[Path]:
        return self.recorded_track_pathes
        
    def get_by_track(self, track_id: int) -> List[RecordedTrack]:
        return [track for track in self.recorded_tracks if track.track_id == track_id]

    def get_by_user(self, user_id: int) -> List[RecordedTrack]:
        return [track for track in self.recorded_tracks if track.user_id == user_id]
    
    def get_by_input_type(self, input_type: InputType) -> List[RecordedTrack]:
        return [track for track in self.recorded_tracks if track.input_type == input_type]
    
    def get_by_metaphor(self, metaphor: Metaphor) -> List[RecordedTrack]:
        return [track for track in self.recorded_tracks if track.metaphor == metaphor]
    
    def get_all(self) -> List[RecordedTrack]:
        return self.recorded_tracks
    
    def get_min_by_input(self, param: ResultParam):
        min_time_indices = self.data_frame.groupby(["UserId", "Track"])[param.name].idxmin()
        df_min_time = self.data_frame.loc[min_time_indices]
        return df_min_time[["UserId","Track", "InputAll"]]
    
    def get_max_by_input(self, param: ResultParam):
        min_time_indices = self.data_frame.groupby("UserId")[param.name].idxmax()
        df_min_time = self.data_frame.loc[min_time_indices]
        return df_min_time[["UserId", "InputAll"]]
    

class TrackResultPlotter:
    def __init__(self, user_ids: List[int] = None):
        self.track_repo = TrackRepository(user_ids)
        self.question_repo = QuestionnaireRepository()
    
    def summary(self):
        return self.track_repo.data_frame.style.format(precision=2, )
    
    def print_result(self, result_param: ResultParam, input_filter: InputFilter, aggfunc: str, min:float = None, max:float = None, plot = False, color=False):
        table = self.track_repo.data_frame.pivot_table(index=input_filter.name, columns="Track", values=result_param.name, aggfunc=[aggfunc], sort=False)
        if plot:
            plt.figure()
            table.plot.bar()
        style = table.style
        if color:
            style = style.background_gradient(axis=0, cmap='Reds', vmin=min, vmax=max)
        return style.format(precision=2)
    
    def compare_with_questionnaire(self):
        user_ids = self.track_repo.data_frame["UserId"].unique().tolist()
        best_time_data = self.track_repo.get_min_by_input(ResultParam.Time)
        best_accuracy_data = self.track_repo.get_min_by_input(ResultParam.MeanError)
        # return data.loc[data["Track"] == 1]["InputAll"].tolist()
        best_time_data = {
            "UserId": [result.user_id for result in self.question_repo.results if result.user_id in user_ids],
            "Ranking": [result.ranking.values() for result in self.question_repo.results if result.user_id in user_ids],
            "EstimatedFastestTrack1": [result.fastest["Track 1"].name for result in self.question_repo.results if result.user_id in user_ids],
            "ActualFastestTrack1": best_time_data.loc[best_time_data["Track"] == 1]["InputAll"].tolist(),
            "EstimatedMostAccurateTrack1": [result.most_accurate["Track 1"].name for result in self.question_repo.results if result.user_id in user_ids],
            "ActualMostAccurateTrack1": best_accuracy_data.loc[best_accuracy_data["Track"] == 1]["InputAll"].tolist(),
            "EstimatedFastestTrack2": [result.fastest["Track 2"].name for result in self.question_repo.results if result.user_id in user_ids],
            "ActualFastestTrack2": best_time_data.loc[best_time_data["Track"] == 2]["InputAll"].tolist(),
            "EstimatedMostAccurateTrack2": [result.most_accurate["Track 2"].name for result in self.question_repo.results if result.user_id in user_ids],
            "ActualMostAccurateTrack2": best_accuracy_data.loc[best_accuracy_data["Track"] == 2]["InputAll"].tolist(),
            "EstimatedFastestTrack3": [result.fastest["Track 3"].name for result in self.question_repo.results if result.user_id in user_ids],
            "ActualFastestTrack3": best_time_data.loc[best_time_data["Track"] == 3]["InputAll"].tolist(),
            "EstimatedMostAccurateTrack3": [result.most_accurate["Track 3"].name for result in self.question_repo.results if result.user_id in user_ids],
            "ActualMostAccurateTrack3": best_accuracy_data.loc[best_accuracy_data["Track"] == 3]["InputAll"].tolist(),
        }
        data_frame = pd.DataFrame(best_time_data)
        return data_frame.style.format()


    

In [120]:
track_plotter = TrackResultPlotter()
# track_plotter.summary()


In [121]:
track_plotter.compare_with_questionnaire()

Unnamed: 0,UserId,Ranking,EstimatedFastestTrack1,ActualFastestTrack1,EstimatedMostAccurateTrack1,ActualMostAccurateTrack1,EstimatedFastestTrack2,ActualFastestTrack2,EstimatedMostAccurateTrack2,ActualMostAccurateTrack2,EstimatedFastestTrack3,ActualFastestTrack3,EstimatedMostAccurateTrack3,ActualMostAccurateTrack3
0,1,"dict_values([TuiJoystick, TuiCar, TouchGesture, TouchJoystick])",TuiJoystick,TUI_Joystick,TuiCar,Touch_Gesture,TuiCar,Touch_Joystick,TuiJoystick,Touch_Gesture,TuiCar,Touch_Joystick,TouchGesture,Touch_Gesture
1,2,"dict_values([TuiJoystick, TuiCar, TouchJoystick, TouchGesture])",TuiJoystick,TUI_Joystick,TuiJoystick,TUI_Car,TouchJoystick,TUI_Joystick,TuiJoystick,Touch_Gesture,TuiCar,Touch_Joystick,TuiCar,TUI_Car
2,3,"dict_values([TuiCar, TouchGesture, TuiJoystick, TouchJoystick])",TuiJoystick,TUI_Joystick,TuiCar,Touch_Gesture,TuiCar,Touch_Joystick,TuiCar,TUI_Car,TuiCar,Touch_Gesture,TuiCar,Touch_Gesture
3,4,"dict_values([TuiCar, TuiJoystick, TouchJoystick, TouchGesture])",TuiCar,Touch_Joystick,TuiCar,TUI_Car,TuiCar,Touch_Joystick,TuiCar,Touch_Gesture,TuiCar,Touch_Gesture,TuiCar,TUI_Car
4,5,"dict_values([TuiCar, TouchJoystick, TuiJoystick, TouchGesture])",TouchJoystick,Touch_Joystick,TuiCar,Touch_Gesture,TouchJoystick,Touch_Joystick,TuiJoystick,TUI_Car,TouchJoystick,Touch_Joystick,TuiJoystick,TUI_Car
5,6,"dict_values([TuiJoystick, TuiCar, TouchGesture, TouchJoystick])",TuiJoystick,Touch_Gesture,TuiJoystick,Touch_Gesture,TuiJoystick,Touch_Joystick,TuiCar,Touch_Gesture,TuiCar,TUI_Joystick,TuiJoystick,Touch_Gesture
6,7,"dict_values([TouchGesture, TuiCar, TuiJoystick, TouchJoystick])",TuiJoystick,Touch_Joystick,TouchGesture,Touch_Gesture,TuiJoystick,Touch_Joystick,TouchGesture,TUI_Car,TuiCar,Touch_Gesture,TuiCar,Touch_Gesture
7,8,"dict_values([TuiJoystick, TouchJoystick, TuiCar, TouchGesture])",TuiJoystick,TUI_Car,TouchGesture,Touch_Gesture,TuiJoystick,TUI_Car,TouchJoystick,Touch_Gesture,TuiCar,TUI_Car,TuiJoystick,Touch_Gesture
8,9,"dict_values([TuiCar, TouchJoystick, TuiJoystick, TouchGesture])",TouchJoystick,TUI_Joystick,TuiCar,TUI_Car,TouchJoystick,TUI_Joystick,TuiCar,Touch_Joystick,TouchJoystick,Touch_Gesture,TouchGesture,Touch_Joystick
9,11,"dict_values([TuiJoystick, TuiCar, TouchJoystick, TouchGesture])",TouchJoystick,Touch_Joystick,TuiJoystick,TUI_Joystick,TuiJoystick,Touch_Joystick,TuiJoystick,Touch_Joystick,TouchJoystick,Touch_Joystick,TuiJoystick,TUI_Car


In [None]:
track_plotter.print_result(ResultParam.MeanError, InputFilter.InputAll, aggfunc='mean', color=True, plot=False)

Unnamed: 0_level_0,mean,mean,mean
Track,1,2,3
InputAll,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
TUI_Car,1.16,1.01,1.24
TUI_Joystick,1.85,1.27,1.49
Touch_Gesture,0.76,0.84,0.86
Touch_Joystick,1.09,1.13,1.49


In [None]:
track_plotter.print_result(ResultParam.MedianError, InputFilter.InputAll, aggfunc='mean', color=True, plot=False)

Unnamed: 0_level_0,mean,mean,mean
Track,1,2,3
InputAll,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
TUI_Car,0.84,0.77,0.85
TUI_Joystick,1.05,0.86,0.91
Touch_Gesture,0.62,0.69,0.69
Touch_Joystick,0.55,0.77,1.04


In [None]:
track_plotter.print_result(ResultParam.DeltaDistance, InputFilter.InputAll, aggfunc='mean', color=True, plot=False)

Unnamed: 0_level_0,mean,mean,mean
Track,1,2,3
InputAll,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
TUI_Car,3.28,3.69,-16.83
TUI_Joystick,32.7,23.09,711.86
Touch_Gesture,33.74,42.98,15.68
Touch_Joystick,24.27,28.66,21.88


In [None]:
track_plotter.print_result(ResultParam.Time, InputFilter.InputAll, aggfunc='mean', color=True, plot=False)

Unnamed: 0_level_0,mean,mean,mean
Track,1,2,3
InputAll,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
TUI_Car,103.64,120.18,94.2
TUI_Joystick,88.45,100.73,85.09
Touch_Gesture,110.55,120.73,59.91
Touch_Joystick,81.55,84.82,73.09
