In [None]:
import pandas as pd

import emission.core.get_database as edb
import emission.core.wrapper.entry as ecwe
import emission.storage.decorations.analysis_timeseries_queries as esda
import emission.storage.decorations.trip_queries as esdt
import emission.storage.decorations.timeline as esdl
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.timequery as estt
from uuid import UUID

import matplotlib.pyplot as plt
import itertools

%matplotlib inline

In [None]:
font = {'family' : 'normal',
        'weight' : 'regular',
        'size'   : 18}

plt.rc('font', **font)

In [None]:
import seaborn as sns

In [None]:
multi_index = pd.MultiIndex.from_product([["trips without prediction", "accuracy overall", "accuracy of trips with predictions", "f1 weighted"], ["mode", "purpose", "replaced"]])

### For our multi-index massaging to work properly, please ensure that the first two rows of the csv are modified as follows

```
ignore,model_name,trips without prediction,...
index,model_name,mode,purpose,replaced,...
```

In [None]:
all_eval_results = pd.read_csv("/tmp/all_model_results_jul_30_2022.csv", header=[0,1]).drop(columns=["ignore"])
all_eval_results.head()

In [None]:
all_eval_results.set_index("model_name", inplace=True); all_eval_results.head()

In [None]:
all_eval_results = all_eval_results.transpose().reset_index()
all_eval_results.head()

In [None]:
all_eval_results.rename(columns = {"level_0": "metric", "level_1": "label_type"}, inplace=True)
all_eval_results.head()

In [None]:
def remove_brackets(cn):
    if type(cn) == tuple:
        return cn[0]
    else:
        return cn
    
print(remove_brackets("abc"))
print(remove_brackets("[abc]"))
print(remove_brackets(("abc")))
print(remove_brackets(("abc",)))

In [None]:
all_eval_results = all_eval_results.rename(mapper=lambda cn: remove_brackets(cn), axis=1)
all_eval_results.head()

In [None]:
all_eval_results.set_index("label_type", inplace=True)
all_eval_results.head()

In [None]:
all_eval_results.drop(columns=["adaboost basic", "random forest with start end r150m", "random forest with start end trip r150m", "final random forest with clustering"], inplace=True)
all_eval_results.head()

In [None]:
metrics_list = list(all_eval_results.groupby("metric").groups.keys())
ax_list = all_eval_results.groupby("metric").plot(kind="bar", legend=False)
ax_list.apply(lambda ax: ax.legend(loc="upper left", bbox_to_anchor=(1, 1)))
for ax, key in zip(ax_list, metrics_list):
    ax.set_title(key)

In [None]:
all_eval_results = all_eval_results[all_eval_results.metric != "trips without prediction"]
all_eval_results = all_eval_results[all_eval_results.metric != "accuracy of trips w predictions"]
all_eval_results.head()

In [None]:
font = {'family' : 'normal',
        'weight' : 'regular',
        'size'   : 24}

plt.rc('font', **font)

In [None]:

fig, ax_arr = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(20,8), dpi=400)
# ax_list = list(itertools.chain.from_iterable(ax_arr))
ax_list = ax_arr
for i, (metric, result_df) in enumerate(all_eval_results[all_eval_results.metric != "trips without prediction"].groupby("metric")):
    print(f"plotting {metric} at location {i}")
    # print(result_df)
    result_df.plot(kind="bar", ax=ax_list[i], title=metric, legend=False, ylim=(0,1))
fig.suptitle("Comparing the prediction accuracy of clustering and classification algorithms")
ax_list[1].legend(loc="lower center", bbox_to_anchor=(-0.2, 1.2))

In [None]:
fig, ax_arr = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(20,5))
# ax_list = list(itertools.chain.from_iterable(ax_arr))
ax_list = ax_arr
for i, (metric, result_df) in enumerate(all_eval_results[all_eval_results.metric != "trips without prediction"].groupby("metric")):
    print(f"plotting {metric} at location {i}")
    # print(result_df)
    result_df.plot(kind="line", ax=ax_list[i], title=metric, legend=False, ylim=(0,1))
ax_list[1].legend(loc="upper left", bbox_to_anchor=(1, 1))