In [2]:
import os.path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from docopt import docopt
from joblib import load as joblib_load

from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import precision_recall_curve, precision_score, recall_score
from sklearn.metrics import roc_curve
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

import altair as alt

In [11]:
def plot_f1_scores_altair(train_test_scores_path, plot_path):
    train_test_scores = pd.read_csv(train_test_scores_path, index_col=0)
    train_test_scores.index.name = "model"
    train_test_scores = train_test_scores.reset_index()
    train_test_scores = train_test_scores.rename(
        columns={"train_f1_score": "train", "test_f1_score": "test"}
    )
    train_test_scores = train_test_scores.sort_values("test")
    train_test_scores = pd.melt(
        train_test_scores, id_vars="model", var_name="score_type", value_name="score"
    ).sort_values("model")

    my_chart = (
        alt.Chart(train_test_scores, title="F-1 Scores for Different Models")
        .mark_bar()
        .encode(
            x=alt.X("score_type", title=None),
            y="score",
            color=alt.Color("score_type", title=None),
            column=alt.Column(
                "model:N",
                title="Model",
                spacing=40,
                sort=[
                    "random_forest",
                    "knn",
                    "logistic_regression",
                    "svc",
                    "dummy_classifier",
                ],
            ),
        )
        .configure_axis(
            labelFontSize=15,
            titleFontSize=15,
        )
        .configure_legend(
            labelFontSize=15,
        )
        .configure_header(labelFontSize=13)
        .configure_title(fontSize=20)
    )
    
    my_chart.save(plot_path)

In [12]:
def plot_f1_scores_matplotlib(train_test_scores_path, plot_path):
    train_test_scores = pd.read_csv(train_test_scores_path, index_col=0)
    train_test_scores.index.name = "model"
    train_test_scores = train_test_scores.reset_index()
    train_test_scores = train_test_scores.rename(
        columns={"train_f1_score": "train", "test_f1_score": "test"}
    )
    train_test_scores = train_test_scores.sort_values("test")
    train_test_scores = pd.melt(
        train_test_scores, id_vars="model", var_name="score_type", value_name="score"
    ).sort_values("model")

    my_chart = (
        alt.Chart(train_test_scores, title="F-1 Scores for Different Models")
        .mark_bar()
        .encode(
            x=alt.X("score_type", title=None),
            y="score",
            color=alt.Color("score_type", title=None),
            column=alt.Column(
                "model:N",
                title="Model",
                spacing=40,
                sort=[
                    "random_forest",
                    "knn",
                    "logistic_regression",
                    "svc",
                    "dummy_classifier",
                ],
            ),
        )
        .configure_axis(
            labelFontSize=15,
            titleFontSize=15,
        )
        .configure_legend(
            labelFontSize=15,
        )
        .configure_header(labelFontSize=13)
        .configure_title(fontSize=20)
    )
    
    my_chart.save(plot_path)

In [13]:
output_dir_path = '/home/ken/Desktop/plot_data'
train_test_scores_path = os.path.join(output_dir_path, 'train_test_f1_scores.csv')
f1_score_plot_path = os.path.join(output_dir_path, 'train_test_f1_scores.png')

In [22]:
train_test_scores.index.tolist()

['dummy_classifier', 'svc', 'knn', 'logistic_regression', 'random_forest']

In [43]:
train_test_scores = pd.read_csv(train_test_scores_path, index_col=0)
index_list = train_test_scores.index.tolist()
index_titlle_list = [model_name.title().replace('_', ' ') for model_name in index_list]
x = np.arange(5)
y1 = train_test_scores.loc[:,'train_f1_score'].tolist()
y2 = train_test_scores.loc[:,'test_f1_score'].tolist()
width = 0.4
plt.bar(x-0.2, y1, width)
plt.bar(x+0.2, y2, width)
plt.xticks(x,  index_titlle_list,rotation = 45)
plt.legend(['train', 'test'])
plt.title('F-1 Scores for Different Models')
plt.tight_layout()
plt.savefig(f1_score_plot_path)
plt.clf()

<Figure size 640x480 with 0 Axes>

In [14]:
plot_f1_scores_altair(train_test_scores_path, f1_score_plot_path)

  for col_name, dtype in df.dtypes.iteritems():


In [39]:
'ken_wang'.title()

'Ken Wang'