# Maze analysis

1. Place the RAW CSV file in the same folder as this notebook and rename it to "maze_data_raw.csv"
2. Run all the code cells of this notebook (Commands -> Run All Cells)
3. You will find a newly generated "maze_analysis.csv" file in the folder.

## Constants

In [14]:
import csv
import math
import pandas as pd

In [15]:
"""monsters sets"""

SHAPES = ["square", "circle", "triangle", "house", "cloud", "heart", "octagon", "star"]
COLORS = ["blue", "green", "red", "yellow", "pink", "purple", "white", "orange"]
PATTERNS = ["solid", "dots", "stripes", "hearts", "crosses", "ticks", "diamonds", "checked"]

# each Monster is [shape, color, pattern]
MONSTERS_SETS = [
    [
        ["circle", "green", "crosses"],
        ["circle", "green", "checked"],
        ["square", "blue", "solid"],
        ["square", "blue", "ticks"],
        ["star", "blue", "diamonds"],
        ["star", "blue", "stripes"],
        ["triangle", "green", "hearts"],
        ["triangle", "green", "dots"]
    ],
    [
        ["heart", "blue", "dots"],
        ["house", "blue", "dots"],
        ["circle", "blue", "solid"],
        ["cloud", "blue", "solid"],
        ["square", "green", "hearts"],
        ["triangle", "green", "hearts"],
        ["octagon", "green", "stripes"],
        ["star", "green", "stripes"]
    ],
    [
        ["circle", "blue", "hearts"],
        ["circle", "blue", "checked"],
        ["circle", "green", "crosses"],
        ["circle", "green", "ticks"],
        ["square", "red", "stripes"],
        ["square", "red", "diamonds"],
        ["square", "yellow", "solid"],
        ["square", "yellow", "dots"]
    ],
    [
        ["square", "white", "dots"],
        ["square", "pink", "dots"],
        ["square", "blue", "solid"],
        ["square", "yellow", "solid"],
        ["circle", "orange", "hearts"],
        ["circle", "purple", "hearts"],
        ["circle", "red", "stripes"],
        ["circle", "green", "stripes"]
    ],
    [
        ["cloud", "blue", "dots"],
        ["octagon", "blue", "dots"],
        ["star", "green", "dots"],
        ["triangle", "green", "dots"],
        ["circle", "red", "solid"],
        ["house", "red", "solid"],
        ["square", "yellow", "solid"],
        ["heart", "yellow", "solid"]
    ],
    [
        ["circle", "pink", "solid"],
        ["circle", "green", "solid"],
        ["square", "orange", "solid"],
        ["square", "blue", "solid"],
        ["triangle", "white", "dots"],
        ["triangle", "purple", "dots"],
        ["star", "red", "dots"],
        ["star", "yellow", "dots"]
    ]
]

MAZE_QUESTIONS = {
    "11": "blue",
    "12": "green",
    "21": "square",
    "22": "star",
    "23": "circle",
    "24": "triangle",
    "31": "solid",
    "32": "ticks",
    "33": "diamonds",
    "34": "stripes",
    "35": "crosses",
    "36": "checked",
    "37": "hearts",
    "38": "dots"
}

MAZE_TARGETS = {
    "31": ["square", "blue", "solid"],
    "32": ["square", "blue", "ticks"],
    "33": ["star", "blue", "diamonds"],
    "34": ["star", "blue", "stripes"],
    "35": ["circle", "green", "crosses"],
    "36": ["circle", "green", "checked"],
    "37": ["triangle", "green", "hearts"],
    "38": ["triangle", "green", "dots"],

    # replica for differently coded files
    "1": ["square", "blue", "solid"],
    "2": ["square", "blue", "ticks"],
    "3": ["star", "blue", "diamonds"],
    "4": ["star", "blue", "stripes"],
    "5": ["circle", "green", "crosses"],
    "6": ["circle", "green", "checked"],
    "7": ["triangle", "green", "hearts"],
    "8": ["triangle", "green", "dots"]
}

## Models

In [16]:
"""models"""

class Monster:
    "Represents a monster"

    def __init__(self, shape: str, color: str, pattern: str, is_target: bool = False):
        self.shape = shape
        self.color = color
        self.pattern = pattern
        self.is_target = is_target

    def has_shape(self, shape: str):
        """checks if the monster has a certain shape"""
        return self.shape == shape

    def has_color(self, color: str):
        """checks if the monster has a certain color"""
        return self.color == color

    def has_pattern(self, pattern: str):
        """checks if the monster has a certain shape"""
        return self.pattern == pattern

    def set_as_target(self):
        """Sets the monster as the target of the game"""
        self.is_target = True

    def __eq__(self, other):
        return isinstance(other, Monster) and self.has_shape(other.shape) \
            and self.has_color(other.color) and self.has_pattern(other.pattern)

    def __repr__(self):
        return "<Monster shape:%s color:%s pattern:%s>" % (self.shape, self.color, self.pattern)

    def __str__(self):
        return self.__repr__()


class MonsterQuestion:
    "Represents a question about monsters"

    def __init__(self, question_text: str):
        if question_text in SHAPES:
            self.feature = "shape"
        elif question_text in COLORS:
            self.feature = "color"
        elif question_text in PATTERNS:
            self.feature = "pattern"
        else:
            print("ERROR: could not determine question type of: " + question_text)
        self.value = question_text
        self.infogain = 0.00
        self.y_split = 0
        self.n_split = 0

    def is_about_shape(self):
        """checks the question's type"""
        return self.feature == "shape"

    def is_about_color(self):
        """checks the question's type"""
        return self.feature == "color"

    def is_about_pattern(self):
        """checks the question's type"""
        return self.feature == "pattern"

    def set_infogain(self, value, y_split, n_split):
        """Sets the question's infogain value"""
        self.infogain = value
        self.y_split = y_split
        self.n_split = n_split

    def __repr__(self):
        return "<MonsterQuestion feature:%s value:%s ig:%s>" \
            % (self.feature, self.value, self.infogain)

    def __str__(self):
        return self.__repr__()


class MonstersSpace:
    "Represents the combination based hypothesis space"

    def __init__(self, set_id: int, target_monster: Monster):
        self.target_monster = target_monster
        self.space = []
        for m in MONSTERS_SETS[set_id]:
            monster = Monster(m[0], m[1], m[2])
            if monster == self.target_monster:
                monster.set_as_target()
            self.space.append(monster)

    def ask_question(self, question: MonsterQuestion):
        """Asks a question, returns its infogain value"""
        answer = self.evaluate_question(question)
        infogain, y_split, n_split = self.calculate_question_infogain(question)
        if answer is True:
            self.delete_question_not_targets(question)
        else:
            self.delete_question_yes_targets(question)
        return infogain, y_split, n_split

    def evaluate_question(self, question: MonsterQuestion):
        """Evalutes question in space"""
        if question.is_about_shape() and self.target_monster.has_shape(question.value):
            return True
        if question.is_about_color() and self.target_monster.has_color(question.value):
            return True
        if question.is_about_pattern() and self.target_monster.has_pattern(question.value):
            return True
        return False

    def calculate_question_infogain(self, question: MonsterQuestion):
        """Calculates the infogain of a question"""
        n_prior = len(self.space)
        if n_prior <= 0:
            return 0.0
        n_yes = self.count_question_yes_targets(question)
        n_no = self.count_question_no_targets(question)
        
        if n_yes + n_no != n_prior:
            print("ERROR: n_yes + n_no != n_prior")

        h_prior = math.log2(n_prior)
        h_post_no = 0 if n_no == 0 else (n_no / n_prior) * math.log2(n_no)
        h_post_yes = 0 if n_yes == 0 else  (n_yes / n_prior) * math.log2(n_yes)
        h_post = h_post_no + h_post_yes

        infogain = h_prior - h_post
        #infogain_round = math.ceil(infogain * 100.0) / 100.0
        infogain_round = round(infogain, 4)
        return infogain_round, n_yes, n_no

    def count_question_yes_targets(self, question: MonsterQuestion):
        "Counts the targets of a question"
        count = 0
        for monster in self.space:
            if question.is_about_shape() and monster.has_shape(question.value):
                count = count + 1
            elif question.is_about_color() and monster.has_color(question.value):
                count = count + 1
            elif question.is_about_pattern() and monster.has_pattern(question.value):
                count = count + 1
        return count

    def count_question_no_targets(self, question: MonsterQuestion):
        "Counts the targets of a question"
        return len(self.space) - self.count_question_yes_targets(question)

    def delete_question_not_targets(self, question: MonsterQuestion):
        """Deletes the monsters not targeted by a question"""
        for monster in self.space[:]:
            if question.is_about_shape() and not monster.has_shape(question.value):
                self.space.remove(monster)
            elif question.is_about_color() and not monster.has_color(question.value):
                self.space.remove(monster)
            elif question.is_about_pattern() and not monster.has_pattern(question.value):
                self.space.remove(monster)

    def delete_question_yes_targets(self, question: MonsterQuestion):
        """Deletes the monsters not targeted by a question"""
        for monster in self.space[:]:
            if question.is_about_shape() and monster.has_shape(question.value):
                self.space.remove(monster)
            elif question.is_about_color() and monster.has_color(question.value):
                self.space.remove(monster)
            elif question.is_about_pattern() and monster.has_pattern(question.value):
                self.space.remove(monster)

## Analysis

In [27]:
"""Maze analysis script"""

# Constants
EXP_NAME = "maze"
UID_COLUMN_NAME = "participant_id"
AGE_COLUMN_NAME = "participant_age"


class RecordAnalyzer:
    """Analyzes a record"""

    def __init__(self, row):
        # parse record data
        self.data = row
        #print(self.data["participant_id"])

        # analyze cardgame questions
        set_id = int(self.data["cardgame_set_id"]) - 1
        target_monster = Monster(
            self.data["cardgame_target_shape"],
            self.data["cardgame_target_color"],
            self.data["cardgame_target_pattern"]
        )
        self.space = MonstersSpace(set_id, target_monster)
        self.cardgame_questions = []
        self.rate_cardgame_questions()
        #print()

        # analyze maze questions
        # the maze analysis is emulated like it was a cardgame with set_id 0
        maze_target_id = self.data["maze_target"]
        maze_target_raw = MAZE_TARGETS.get(str(maze_target_id))
        if maze_target_raw is None:
            print("ERROR: could not parse maze target: " + str(maze_target_id))
            print(self.data)
        maze_target = Monster(
            maze_target_raw[0],
            maze_target_raw[1],
            maze_target_raw[2]
        )
        #print("maze target: " + str(maze_target))
        self.maze_space = MonstersSpace(0, maze_target)
        self.maze_questions = []
        self.rate_maze_questions()
        #print()
        #print()

    def rate_cardgame_questions(self):
        """Iterates over the monsters questions and rates them"""
        for i in range(1, 15):
            question_text = self.data.get("cardgame_q_" + str(i))
            if question_text == "" or question_text is None:
                break
            question = MonsterQuestion(question_text)
            infogain, y_split, n_split = self.space.ask_question(question)
            question.set_infogain(infogain, y_split, n_split)
            self.cardgame_questions.append(question)
            #print(question)

        # calc mean IGs
        total_ig = 0
        total_ig_non_zero = 0
        zero_questions_count = 0
        for question in self.cardgame_questions:
            total_ig = total_ig + question.infogain
            if question.infogain > 0:
                total_ig_non_zero = total_ig_non_zero + question.infogain
            else:
                zero_questions_count = zero_questions_count + 1
        self.cardgame_mean_ig = total_ig / len(self.cardgame_questions)
        self.cardgame_nonzero_questions_count = (len(self.cardgame_questions) - zero_questions_count)
        self.cardgame_mean_ig_non_zero = 0 if self.cardgame_nonzero_questions_count == 0 \
            else total_ig_non_zero / self.cardgame_nonzero_questions_count
        self.cardgame_mean_ig_last_excluded = 0 if (len(self.cardgame_questions) - 1) == 0 else \
            (total_ig - self.cardgame_questions[-1].infogain) / (len(self.cardgame_questions) - 1)


    def rate_maze_questions(self):
        """Iterates over the maze questions and rates them"""
        for i in range(1, 15):
            question_id = self.data.get("maze_q_" + str(i))
            if question_id == "" or question_id is None:
                break
            question_text = MAZE_QUESTIONS.get(str(question_id))
            if question_text == "" or question_text is None:
                break
            question = MonsterQuestion(question_text)
            infogain, y_split, n_split = self.maze_space.ask_question(question)
            question.set_infogain(infogain, y_split, n_split)
            self.maze_questions.append(question)
            #print(question)

        # calc mean IGs
        total_ig = 0
        total_ig_non_zero = 0
        zero_questions_count = 0
        for question in self.maze_questions:
            total_ig = total_ig + question.infogain
            if question.infogain > 0:
                total_ig_non_zero = total_ig_non_zero + question.infogain
            else:
                zero_questions_count = zero_questions_count + 1
        self.maze_mean_ig = total_ig / len(self.maze_questions)
        self.maze_nonzero_questions_count = (len(self.maze_questions) - zero_questions_count)
        self.maze_mean_ig_non_zero = 0 if self.maze_nonzero_questions_count == 0 \
            else total_ig_non_zero / self.maze_nonzero_questions_count
        self.maze_mean_ig_last_excluded = 0 if (len(self.maze_questions) - 1) == 0 else \
            (total_ig - self.maze_questions[-1].infogain) / (len(self.maze_questions) - 1)

    def get_record_as_dictionary(self):
        """formats the record as a dictonary"""

        result = {}
        result[UID_COLUMN_NAME] = self.data[UID_COLUMN_NAME]
        result[AGE_COLUMN_NAME] = self.data[AGE_COLUMN_NAME]

        # cardgame mean IGs
        #result["cardgame_mean_ig"] = round(self.cardgame_mean_ig, 4)
        result["cardgame_zero_questions_count"] = self.cardgame_nonzero_questions_count
        result["cardgame_mean_ig_zeros_excluded"] = round(self.cardgame_mean_ig_non_zero, 4)
        #result["cardgame_mean_ig_last_excluded"] = round(self.cardgame_mean_ig_last_excluded, 4)

        # maze mean IGs
        #result["maze_mean_ig"] = round(self.maze_mean_ig, 4)
        result["maze_zero_questions_count"] = self.maze_nonzero_questions_count
        result["maze_mean_ig_zeros_excluded"] = round(self.maze_mean_ig_non_zero, 4)
        #result["maze_mean_ig_last_excluded"] = round(self.maze_mean_ig_last_excluded, 4)

        # cardgame single IGs
        for i in range(0, 15):
            
            ig_value = None if i >= len(self.cardgame_questions) else self.cardgame_questions[i].infogain
            y_split = None if i >= len(self.cardgame_questions) else self.cardgame_questions[i].y_split
            n_split = None if i >= len(self.cardgame_questions) else self.cardgame_questions[i].n_split
            
            result["cardgame_q_"+str(i+1)+"_ig"] = ig_value
            result["cardgame_q_"+str(i+1)+"_y_split"] = y_split
            result["cardgame_q_"+str(i+1)+"_n_split"] = n_split


        # maze single IGs
        for i in range(0, 15):
            
            ig_value = None if i >= len(self.maze_questions) else self.maze_questions[i].infogain
            y_split = None if i >= len(self.maze_questions) else self.maze_questions[i].y_split
            n_split = None if i >= len(self.maze_questions) else self.maze_questions[i].n_split
            
            result["maze_q_"+str(i+1)+"_ig"] = ig_value
            result["maze_q_"+str(i+1)+"_y_split"] = y_split
            result["maze_q_"+str(i+1)+"_n_split"] = n_split

        return result

    def __str__(self):
        return self.data[UID_COLUMN_NAME]


class App:
    """Script main class"""

    # App properties
    args = None
    records = []

    def __init__(self):
        self.read_csv_file()
        self.export_csv_file()

    def read_csv_file(self):
        """Reads the input csv file"""
        with open('maze_data_raw.csv') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for row in csv_reader:
                if row[UID_COLUMN_NAME] != "" and row[UID_COLUMN_NAME] is not None:
                    self.read_csv_row(row)

    def read_csv_row(self, row):
        """Reads every single row of the input csv file"""
        record = RecordAnalyzer(row)
        self.records.append(record)

    def export_csv_file(self):
        """exports csv file"""
        results = []
        for record in self.records:
            results.append(record.get_record_as_dictionary())

        # save the csv
        df_records = pd.DataFrame(results)
        df_records.to_csv(EXP_NAME + '_analysis.csv')

## Run analysis

In [28]:
# Click Runtime -> Run All to run the analysis
App()
print(EXP_NAME + '_analysis.csv saved.')

maze_analysis.csv saved.
