In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from datareader import read_data, algs, display_ranks, combine_all_metrics

# Readin data

In [None]:
data, scores, test_users = read_data('Results for AmazonGames', 'AMZG')

In [None]:
all_metrics = combine_all_metrics(scores, data)

In [None]:
all_metrics.head()

# Stability

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(
    x='rank', y='Stability', hue='model',
    data=all_metrics.query('rank in @ display_ranks')
)

# HR

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(
    x='rank', y='HR', hue='model',
    data=all_metrics.query('rank in @ display_ranks')
)

In [None]:
sns.lmplot(x='HR', y='Stability', hue='model', data=all_metrics, height=10)

## MRR

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(
    x='rank', y='MRR', hue='model',
    data=all_metrics.query('rank in @ display_ranks')
)

In [None]:
sns.lmplot(x='MRR', y='Stability', hue='model', data=all_metrics, height=10)

## Coverage

In [None]:
plt.figure(figsize=(12, 5))
sns.boxplot(
    x='rank', y='COV', hue='model',
    data=all_metrics.query('rank in @ display_ranks')
)

In [None]:
sns.lmplot(x='COV', y='Stability', hue='model', data=all_metrics, height=10)

# Other views

## HR

In [None]:
metric = 'HR'

plt.figure(figsize=(12, 8))
# sns.lineplot(data=mrr_data, x='rank', y='mrr', hue='model', err_style='bars', ci=95, err_kws=dict(capsize=10, capthick=2))
sns.lineplot(data=scores[metric]['long'], x='rank', y=metric, hue='model')

In [None]:
metric = 'HR'

plt.figure(figsize=(12, 8))
ax = sns.regplot(data=scores[metric]['wide'], x='PureSVD', y='PSI')
ax.plot([0.001, 0.06], [0.001, 0.06])
ax.set_title(metric);

In [None]:
metric = 'HR'

g = sns.lmplot(
    data=scores[metric]['wide'].loc[[10, 30, 50, 70]].reset_index(),
    x="PureSVD", y="PSI", hue="rank",
    height=10
)
g.ax.plot([0.0, 0.04], [0.0, 0.04], ls=':', lw=5)
g.ax.set_title(metric);
# Use more informative axis labels than are provided by default
# g.set_axis_labels("Snoot length (mm)", "Snoot depth (mm)")

## MRR

In [None]:
metric = 'MRR'

plt.figure(figsize=(12, 8))
# sns.lineplot(data=mrr_data, x='rank', y='mrr', hue='model', err_style='bars', ci=95, err_kws=dict(capsize=10, capthick=2))
sns.lineplot(data=scores[metric]['long'], x='rank', y=metric, hue='model')

In [None]:
plt.figure(figsize=(12, 8))
ax = sns.regplot(data=scores[metric]['wide'], x='PureSVD', y='PSI')
ax.plot([0.00, 0.04], [0.00, 0.04])
ax.set_title(metric);

In [None]:
# Plot sepal width as a function of sepal_length across days
g = sns.lmplot(
    data=scores[metric]['wide'].loc[[10, 30, 50, 70]].reset_index(),
    x="PureSVD", y="PSI", hue="rank",
    height=10
)
g.ax.plot([0.0, 0.04], [0.0, 0.04], ls=':', lw=5)
g.ax.set_title(metric);
# Use more informative axis labels than are provided by default
# g.set_axis_labels("Snoot length (mm)", "Snoot depth (mm)")

## Coverage

In [None]:
metric = 'COV'

plt.figure(figsize=(12, 8))
# sns.lineplot(data=mrr_data, x='rank', y='mrr', hue='model', err_style='bars', ci=95, err_kws=dict(capsize=10, capthick=2))
sns.lineplot(data=scores[metric]['long'], x='rank', y=metric, hue='model')

In [None]:
plt.figure(figsize=(12, 8))
ax = sns.regplot(data=scores[metric]['wide'], x='PureSVD', y='PSI')
ax.plot([0.005, 0.03], [0.005, 0.03])
ax.set_title(metric);

In [None]:
# Plot sepal width as a function of sepal_length across days
g = sns.lmplot(
    data=scores[metric]['wide'].loc[[10, 30, 50, 70]].reset_index(),
    x="PureSVD", y="PSI", hue="rank",
    height=10
)
g.ax.plot([0.005, 0.03], [0.005, 0.03], ls=':', lw=5)
g.ax.set_title(metric);
# Use more informative axis labels than are provided by default
# g.set_axis_labels("Snoot length (mm)", "Snoot depth (mm)")

## Stability

In [None]:
stab_avg = pd.concat(
    [
        data[alg]['Stability_df'].groupby(['rank', 'step'])[['Stability']].mean()
        for alg in algs
    ],
    keys = algs,
    axis=0
).rename_axis(index=['model', 'rank', 'step']).reset_index()
stab_avg.head()

In [None]:
fig, axes = plt.subplots(len(algs), 1, figsize=(12, len(algs)*8))
for ax, alg in zip(axes, algs):
    sns.boxplot(ax=ax, x="rank", y="Stability", hue="step", showfliers=False,
                data=data[alg]['Stability_df'].query('rank in @display_ranks'))
    ax.set_title(alg);
# sns.despine(offset=10, trim=True)

In [None]:
plt.figure(figsize=(16, 12))
plt.scatter(
    x=data[algs[0]]['Stability_df']['Stability'].sort_index(), # SVD
    y=data[algs[1]]['Stability_df']['Stability'].sort_index(), # PSI
    alpha=0.2
)
plt.plot([0, 1], [0, 1], c='r');

## UPD