In [None]:
import json
import matplotlib.pyplot as plt
import pandas as pd
import wandb
import os

# Plot Wikipedia Dataset

In [None]:
run = wandb.init(job_type="evaluation", project="wiki-workload")
pageview_dir = run.use_artifact('pageviews:latest').download()
questions_dir = run.use_artifact('questions:latest').download()

In [None]:
pageview_df = pd.read_csv(f"{pageview_dir}/pageviews.csv")
pageview_df

In [None]:
df = pd.DataFrame({
    "edit_frequency": pageview_df.edit_count / pageview_df.edit_count.sum(),
    "query_frequency": pageview_df["2021080600"] / pageview_df["2021080600"].sum()
})

df.plot()

# Plot DPR Model Accuracy Results 

In [None]:
run = wandb.init(job_type="evaluation", project="wiki-workload")
artifact = run.use_artifact('prediction_results:latest')
artifact_dir = artifact.download()

In [None]:
artifact_dir

In [None]:
constants = [0.01, 0.05, 0.1, 1, 5, 10]
policies = ["lifo"]
key_policies = ["weighted_round_robin", "weighted_random", "random", "round_robin"]
d = artifact_dir
metric = 'top5'

In [None]:
all_results = {}
for policy in policies: 
    for key_policy in key_policies: 
        scores = []
        name = f"plan-{key_policy}_{policy}-always_process"
        for constant in constants: 
            with open(f'{d}/{name}-{constant}-100.json') as results_file:
                results = json.load(results_file)
                scores.append(results[metric])
        all_results[name] = scores
all_results

In [None]:
all_results.keys()

In [None]:
plan_weighted_random_lifo = []
for constant in constants:
    with open(f'{d}/plan-weighted_random_lifo-always_process-{constant}-100.json') as results_file:
        results = json.load(results_file)
        plan_weighted_random_lifo.append(results[metric])
print(plan_weighted_random_lifo)
        
plan_weighted_longest_queue_lifo = []
for constant in constants:
    with open(f'{d}/plan-weighted_longest_queue_lifo-always_process-{constant}-100.json') as results_file:
        results = json.load(results_file)
        plan_weighted_longest_queue_lifo.append(results[metric])
print(plan_weighted_longest_queue_lifo)

plan_longest_queue_lifo = []
for constant in constants:
    with open(f'{d}/plan-longest_queue_lifo-always_process-{constant}-100.json') as results_file:
        results = json.load(results_file)
        plan_longest_queue_lifo.append(results[metric])
print(plan_longest_queue_lifo)

plan_random_lifo = []
for constant in constants:
    with open(f'{d}/plan-random_lifo-always_process-{constant}-100.json') as results_file:
        results = json.load(results_file)
        plan_random_lifo.append(results[metric])
print(plan_random_lifo)

plan_round_robin_lifo = []
for constant in constants:
    with open(f'{d}/plan-round_robin_lifo-always_process-{constant}-100.json') as results_file:
        results = json.load(results_file)
        plan_round_robin_lifo.append(results[metric])
print(plan_round_robin_lifo)


In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 12, 10

In [None]:
import matplotlib.pyplot as plt
import seaborn

df = pd.DataFrame({
    'Factor': resources, 
    **all_results
})
fig, ax1 = plt.subplots(figsize=(10, 5))
tidy = df.melt(id_vars='Factor').rename(columns=str.title)
seaborn.barplot(x='Factor', y='Value', hue='Variable', data=tidy, ax=ax1)
seaborn.despine(fig)

In [None]:
fig, ax = plt.subplots()
ax.set_xscale('log')
#resources = [10/c for c in constants]
resources = constants 
print(resources)
ax.plot(resources, plan_weighted_longest_queue_lifo, label="LIFO Weighted Queue", c='coral', marker='.')
ax.plot(resources, plan_longest_queue_lifo, label="LIFO Queue", c='coral', marker='.', linestyle='dashed')

ax.plot(resources, plan_weighted_random_lifo, label="LIFO Weighted Random", c='red', marker='.')
ax.plot(resources, plan_random_lifo, label="LIFO Random", c='red', marker='.', linestyle='dashed')

#ax.plot(resources, plan_lifo_sample_half, label="LIFO Sample Half", c='dodgerblue', marker='.', linestyle='dashed')
#ax.plot(resources, plan_lifo_always_process, label="LIFO Always", c='dodgerblue', marker='.')

#ax.plot(resources, plan_round_robin_lifo, label="LIFO Round Robin", c='blue', marker='.', linestyle='dashed')

ax.grid()
ax.set(xlabel='resource constraint', ylabel=f'{metric} accuracy', title='Passage Retriever')
plt.legend()