In [None]:
import os

import pandas as pd
import matplotlib.pyplot as plt

# Data cleaning

In [None]:
directory = 'asset/full_benchmark'

def clean(in_file, ou_file):
    with open(in_file, "r") as f_in:
        # For each line in the file (check the number of ,)
        with open(ou_file, "w") as f_out:
            for line in f_in:
                # If line contains 12 , append to output file
                if line.count(",") == 12:
                    f_out.write(line)

for file in os.listdir(directory):
    if file.endswith(".csv"):
        clean(os.path.join(directory, file), os.path.join('asset/clean_benchmark', file))
    else:
        continue

In [None]:
# iterate over the files in asset/full_benchmark
# for each file, read it into a dataframe

df = pd.DataFrame(
    columns=[
        'instance','runnable','model','cumulative','bias',
        'use_first_maze','maze','win','steps','episodes',
        'first_win','convergence_count','cumulative_reward'
    ]
)

directory = 'asset/clean_benchmark'

for file in os.listdir(directory):
    if file.endswith(".csv"):
        print(file)
        df_loop = pd.read_csv(os.path.join(directory, file))
        df = pd.concat([df, df_loop], ignore_index=True)
    else:
        continue

In [None]:
df.head()

In [None]:
print(df['model'].unique())

In [None]:
df["maze"] = df["maze"].apply(lambda x: int(x.split("_")[1]))
df.head()

# Parametrization

In [None]:
# Dictionaries for parametrization

# 0. Translations from dataset model name
Q_LEARNING = "Q-Learning"
C45 = "C45"
PDT = "PDT"
NAIVE_BAYES = "NaiveBayes"
RANDOM_FOREST = "RandomForest"
NEURAL_NETWORK = "NeuralNetwork"
KNN = "kNN"
MANUAL = "Manual"

# 1. Dictionary for model names
MODEL_NAME = {
    "Q-Learning": Q_LEARNING,
    "QLearning": Q_LEARNING,
    "Qlearning": Q_LEARNING,
    "nan": Q_LEARNING,

    "C4.5": C45,
    "C45": C45,
    "C45WekaModel": C45,

    "PDT": PDT,
    "PDTWekaModel": PDT,

    "Naive Bayes": NAIVE_BAYES,
    "NaiveBayesWekaModel": NAIVE_BAYES,

    "Random Forest": RANDOM_FOREST,

    "Neural Network": NEURAL_NETWORK,

    "kNN": KNN,

    "Manual": MANUAL,
    "ManualModel": MANUAL,
}


# 2. Dictionary of colors for each model
MODEL_COLOR = {
    Q_LEARNING: "tab:blue",
    C45: "tab:orange",
    PDT: "tab:red",
    NAIVE_BAYES: "tab:green",
    RANDOM_FOREST: "tab:gray",
    NEURAL_NETWORK: "tab:cyan",
    KNN: "tab:brown",
    MANUAL: "tab:purple",
}

# 3. Dictionary of parametrization for plotting
PLOT_PARAM = {
    # CUMULATIVE: {
    #     "alpha": 0.5,
    # },
}


# Plotting

In [None]:
def plot_comparison(data, column, title, save=None):
    """
    Plot a comparison of the given column for each model in a bar plot.
    """

    data.groupby(["maze", "model"])[
        column
    ].mean().unstack().plot.bar(
        figsize=(10, 5),
        width=0.5,
        title=title,
        xlabel="maze",
        ylabel=column,
    )
    if save is not None:
        plt.savefig(save, dpi=300)
    plt.show()


## Plot first win

In [None]:
# Plot first win of each model in a bar plot, separated per maze

# List of experiments parameters to chose from

# 1. Previous knowledge: cumulative, use_first_maze, use_previous_maze (when False/False)
previous_knowledge = {
    "cumulative": [True, False, False],
    "use_first_maze": [False, True, False],
}

# 2. Bias
biases = [0, -0.02]

# 0. Recover only the first win of each model
# for bias in biases:
#     for i in range(3):
#         print(bias, i)
#
#         knowledge = "cumulative" if previous_knowledge["cumulative"][i] else None
#         knowledge = "use_first_maze" if previous_knowledge["use_first_maze"][i] else knowledge
#         knowledge = "previous_maze" if knowledge is None else knowledge
#
#         df_plot = df[
#             (df["first_win"] == True)
#             & (df["bias"] == bias)
#             & (df["cumulative"] == previous_knowledge["cumulative"][i])
#             & (df["use_first_maze"] == previous_knowledge["use_first_maze"][i])
#         ]
#
#         print(df_plot.head())
#
#         if df_plot.empty:
#             continue
#
#         plot_comparison(
#             df_plot,
#             "episodes",
#             f"First win (bias={bias}, knowledge={knowledge})",
#             save=f"asset/first_win_{bias}_{knowledge}.png",
#         )

df_first_win = df[
    (df['first_win'] == True)
    & (df['bias'] == -0.2)
    & (df['cumulative'] == False)
    & (df['use_first_maze'] == False)
]

print(df_first_win.head())

# 1. Group by model and maze
plot_comparison(
    df_first_win,
    "episodes",
    "First win",
    save="asset/first_win.png",
)

## Plot cumulative reward