In [None]:
import os

import pandas as pd
import matplotlib.pyplot as plt

# Data cleaning

In [None]:
directory = 'asset/full_benchmark'

def clean(in_file, ou_file):
    with open(in_file, "r") as f_in:
        # For each line in the file (check the number of ,)
        with open(ou_file, "w") as f_out:
            for line in f_in:
                # If line contains 12 , append to output file
                if line.count(",") == 12:
                    f_out.write(line)

for file in os.listdir(directory):
    if file.endswith(".csv"):
        clean(os.path.join(directory, file), os.path.join('asset/clean_benchmark', file))
    else:
        continue

In [None]:
# iterate over the files in asset/full_benchmark
# for each file, read it into a dataframe

df = pd.DataFrame(
    columns=[
        'instance','runnable','model','cumulative','bias',
        'use_first_maze','maze','win','steps','episodes',
        'first_win','convergence_count','cumulative_reward'
    ]
)

directory = 'asset/clean_benchmark'

for file in os.listdir(directory):
    if file.endswith(".csv"):
        print(file)
        df_loop = pd.read_csv(os.path.join(directory, file))
        df = pd.concat([df, df_loop], ignore_index=True)
    else:
        continue

In [None]:
df.head()

In [None]:
print(df['model'].unique())

In [None]:
df["maze"] = df["maze"].apply(lambda x: int(x.split("_")[1]))

# Replace N/A with Q-Learning
df["model"] = df["model"].fillna("Q-Learning")

print(df['model'].unique())

df.head()

# Parametrization

In [None]:
# Dictionaries for parametrization

# 1. List for model names
Q_LEARNING = ["Q-Learning", "Qlearning", "QLearning"]
C45 = ["C45", "C4.5", "C45WekaModel"]
PDT = ["PDTWekaModel"]
NAIVE_BAYES = ["NaiveBayes", "Naive Bayes", "NaiveBayesWekaModel"]
RANDOM_FOREST = ["RandomForest", "Random Forest", "RandomForestModel"]
NEURAL_NETWORK = ["NeuralNetwork", "Neural Network", "NeuralNetworkModel"]
KNN = ["kNN", "kNNModel"]
MANUAL = ["Manual", "ManualModel"]

WEKA_MODELS = ["C45WekaModel", "PDTWekaModel", "NaiveBayesWekaModel"]
NEW_MODELS = ["C45Model", "NaiveBayesModel", "NeuralNetworkModel", "RandomForestModel", "kNNModel"]

# 2. Dictionary of colors for each model
# MODEL_COLOR = {
#     Q_LEARNING: "tab:blue",
#     C45: "tab:orange",
#     PDT: "tab:red",
#     NAIVE_BAYES: "tab:green",
#     RANDOM_FOREST: "tab:gray",
#     NEURAL_NETWORK: "tab:cyan",
#     KNN: "tab:brown",
#     MANUAL: "tab:purple",
# }

# 3. Dictionary of parametrization for plotting
PLOT_PARAM = {
    # CUMULATIVE: {
    #     "alpha": 0.5,
    # },
}


# Plotting

In [None]:
data_q_learner = df[(df["runnable"] == "RunnableQLearner") & (df["first_win"] == True)]

def plot_comparison(data, column, title, save=None, q_learner=True):
    """
    Plot a comparison of the given column for each model in a bar plot.
    """

    if q_learner:
        data_compare = pd.concat([data, data_q_learner])
    else:
        data_compare = data

    data_compare.groupby(["maze", "model"])[
        column
    ].mean().unstack().plot.bar(
        figsize=(10, 5),
        width=0.5,
        title=title,
        xlabel="maze",
        ylabel=column,
    )
    if save is not None:
        plt.savefig(save, dpi=300)
    plt.show()


## Plot first win

In [None]:
df_first_win = df[
    (df['first_win'] == True)
    & (df['bias'] == 0)
    & (df['cumulative'] == True)
    & (df['use_first_maze'] == False)
]

# Plot
plot_comparison(
    df_first_win[
        # contains models
        df_first_win["model"].isin(
            NEW_MODELS
        )
    ],
    "episodes",
    "First win",
    save="asset/images/first_win/first_win_new.png",
    q_learner=True
)


In [None]:
# Plot first win of each model in a bar plot, separated per maze

# List of experiments parameters to chose from

# 1. Previous knowledge: cumulative, use_first_maze, use_previous_maze (when False/False)
previous_knowledge = {
    "cumulative": [True, False, False],
    "use_first_maze": [False, True, False],
}

# 2. Bias
BIAS = [0, -0.2]

# 3. Models
MODELS = {
    "new_models": NEW_MODELS,
    "weka_models": WEKA_MODELS,
}

# 0. Recover only the first win of each model
for models in MODELS:
    for bias in BIAS:
        for i in range(3):
            print(bias, previous_knowledge["cumulative"][i], previous_knowledge["use_first_maze"][i])

            knowledge = "cumulative" if previous_knowledge["cumulative"][i] else None
            knowledge = "use_first_maze" if previous_knowledge["use_first_maze"][i] else knowledge
            knowledge = "previous_maze" if knowledge is None else knowledge

            df_plot = df[
                (df["first_win"] == True)
                & (df["bias"] == bias)
                & (df["cumulative"] == previous_knowledge["cumulative"][i])
                & (df["use_first_maze"] == previous_knowledge["use_first_maze"][i])
            ]

            print(df_plot.head())

            if df_plot.empty:
                continue

            plot_comparison(
                df_plot[
                    df_plot["model"].isin(
                        MODELS[models]
                    )
                ],
                "episodes",
                f"First win (model={models}, bias={bias}, knowledge={knowledge})",
                save=f"asset/images/first_win/first_win_{models}_{knowledge}_{bias}.png",
            )


In [None]:
BIAS = list(df["bias"].unique())

# sort the list
BIAS.sort()

cumulative = True
use_first_maze = False

knowledge = "cumulative" if cumulative else None
knowledge = "use_first_maze" if use_first_maze else knowledge
knowledge = "previous_maze" if knowledge is None else knowledge

for bias in BIAS:
    df_bias = df[
        (df['first_win'] == True)
        & (df['bias'] == bias)
        & (df['cumulative'] == cumulative)
        & (df['use_first_maze'] == use_first_maze)
    ]

    # Plot
    plot_comparison(
        df_bias[
            # contains models
            df_bias["model"].isin(
                ["RandomForestModel"]
            )
        ],
        "episodes",
        f"First win (model=RandomForestModel, bias={bias}, knowledge={knowledge})",
        save=f"asset/images/bias/first_win_random_forest_{knowledge}_bias_{bias}.png",
        q_learner=True
    )

## First, second, third win

In [None]:
df_win = df[
    # (df['first_win'] == True)
    (df['win'] <= 3)
    & (df['bias'] == 0)
    & (df['cumulative'] == True)
    & (df['use_first_maze'] == False)
    & (df['model'].isin(["NaiveBayesModel"]))
]

df_win.groupby(["maze", "win"])[
    "episodes"
].mean().unstack().plot.bar(
    figsize=(10, 5),
    width=0.5,
    title="First 3 Wins",
    xlabel="maze",
    ylabel="episodes",
)

plt.show()

## Plot cumulative reward