In [1]:
# import everything as needed
%matplotlib inline
# %matplotlib notebook

import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mpl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D, axes3d
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import math

from operator import itemgetter

from irkit.trec import qrels
from irkit.trec import results
from irkit.trec import run

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf')

#Set general plot properties
sns.set()
sns.set_context("paper")
sns.set_color_codes("pastel")

sns.set_context({"figure.figsize": (16, 10)})
plt.style.use('grayscale')
plt.style.use('seaborn-white')

def fill_empty_row(df):
    topic_not_found = set()
    for i in range(1, 101):
        topic_not_found.add(i)

    for row in df.iterrows():
        topic_not_found.remove(row[0])
        
    num_colls = len(df.columns)
    for row in topic_not_found: 
        df.loc[row] = (0) * num_colls


In [2]:
runs = ['plmp', 'plms']
#change this back to 11 to get the 3d plot working
proportion = list(range(1,12))
smoothing = list(range(1,11))
results = {}
for run in runs:
    z = []
    for p in proportion:
        z_i = []
        for s in smoothing:
            with open('res-files/{}{}_{}_standard.csv'.format(run, p, s), 'r') as f:
                df = pd.DataFrame.from_csv(f)
                fill_empty_row(df)
                z_i.append(np.mean(df[' map'].round(4)))
        z.append(z_i)
    results[run] = z

x = np.array([[x]*10 for x in np.arange(0.0,1.1,0.1)])
y = np.array([list(np.arange(0.0,1.1,0.1))] * 10)
zs = np.array(results['plms'])
zp = np.array(results['plmp'])

In [3]:
def extract_runs(runs):
    results = {}
    proportion = range(1,12)
    for run in runs:
        y = []
        for p in proportion:
            with open('res-files/{}{}_standard.csv'.format(run, p), 'r') as f:
                df = pd.DataFrame.from_csv(f)
                fill_empty_row(df)
                y.append(np.mean(df[' map']).round(4))
        results[run] = y
    return results
    
para_runs = ['klip', 'idfp']
sent_runs = ['klis', 'idfs']

para_results = extract_runs(para_runs)
sent_results = extract_runs(sent_runs)

para_df = pd.DataFrame({
    'idf': para_results['idfp'],
                        'kli': para_results['klip'],
                        'plm': [x[4] for x in results['plmp']]}, np.arange(0.0,1.1,0.1))

sent_df = pd.DataFrame({'idf': sent_results['idfs'],
                        'kli': sent_results['klis'],
                        'plm': [x[4] for x in results['plms']]}, np.arange(0.0,1.1,0.1))


In [4]:
# Plots for 4.3 of paper. AP@5 compared to r where lamda has been set to 0.5


# Paragraph
ax = para_df.plot(use_index=True, linewidth=8, marker='o', markersize=12)
ax.set_xlabel("$r$",fontsize=30)
ax.set_ylabel("AP@5",fontsize=30)
ax.set_ylim((0, 0.35))
ax.tick_params(labelsize=20)
# ax.axes.get_xaxis().set_ticks([0, 1])
plt.legend(fontsize=18)
plt.show()

# Sentence
ax = sent_df.plot(use_index=True, linewidth=8, marker='o', markersize=12)
ax.set_xlabel("$r$",fontsize=30)
ax.set_ylabel("AP@5",fontsize=30)
ax.set_ylim((0, 0.35))
# ax.axes.get_xaxis().set_ticks([])
ax.tick_params(labelsize=20)
plt.legend(fontsize=18)
plt.show()

<matplotlib.figure.Figure at 0x10a043630>

<matplotlib.figure.Figure at 0x10495b470>