In [4]:
# import everything as needed
%matplotlib inline

import os 
import math

from plotlib.loaders import *
from plotlib.plotters import *

from phdconf import config 
from phdconf.config import *

In [5]:
display_names = ['flattened']
index_names = ['flattened'] #, 'sigir']
qrel_paths = [config.AUS_QREL_PATH, ]

In [9]:
class ValLookup: 
    
    def __init__(self):
        self.lookup = {}
        self.data = []
        self._min = math.inf
        self._max = 0.0 
        self.normalized = None
        self.probs = None 
        
    def add(self, k, v):
        self.data.append(v)
        if v > self._max: 
            self._max = v         
        if v < self._min:
            self._min = v
        
        self.lookup[k] = len(self.data)-1
        
    def get(self, k, which):
        if k in self.lookup:
            if which == 1: 
                return self.normalized[self.lookup[k]]
            elif which == 2:     
                return self.probs[self.lookup[k]]
            else:
                return self.data[self.lookup[k]]
        
        return 0.0 
    
    def present(self, k):
        return k in self.lookup
    
    def provide_probs(self, probs):
        self.probs = probs 
    
    def normalize(self):
        diff = self._max - self._min 
        
        self.normalized = [0.0] * len(self.data)
        for i, v in enumerate(self.data): 
            self.normalized[i] = (v - self._min) / diff
            
    
def load_degree_file(path: str, summed=False):
    lookup = ValLookup()
    with open(path, 'r', encoding='utf-8') as f:
        for line in f: 
            parts = line.strip().split()
            # 0 is id, 1 is cnt and 2 is total ref cnt 
            if summed:
                lookup.add(parts[0], int(float(parts[2])))
            else:
                lookup.add(parts[0], float(parts[1]))
            
    return lookup


lens = load_degree_file(os.path.join('../features', 'flattened-id-doclens.txt'))

In [10]:
def get_doc_lens_for_res(path: str, lookup):
    query_lens = {}
    
    with open(path) as f:
        for line in f: 
            parts = line.strip().split()
            #  29 Q0 2013QSC331 0 10.78345959 flattened-unigram_dir_mu_2400.00
            if parts[0] not in query_lens:
                query_lens[parts[0]] = []
            
            query_lens[parts[0]].append(lookup.get(parts[2], 0))
                                        
    return query_lens
    

def load_1d(index_names, results_path, run_format, iterator, lookup):
    dfs = []
    for i, ind in enumerate(index_names):
        temp = []
        for l in iterator:
            lens = get_doc_lens_for_res(os.path.join(results_path, run_format.format(ind, l)), lookup)
            temp.append(lens)
        dfs.append(temp)
    return dfs 


In [11]:
result_lens = load_1d(['flattened'], os.path.join(BASE_DIR,'jelinek_mercer'), 'case-topics-{0}-unigram_jm_lambda_{1:0.2f}.run', np.arange(0.0, 1.1, 0.1), lens)

In [12]:
import statistics

def get_avg_lens(results, depth=100, per_query=False):
    
    all_av_lens = []
    for i, res in enumerate(results):
        av_lens = []
        for k in range(len(res)):
            if per_query:
                av_lens.append({x: statistics.mean(vals[:depth]) for x, vals in res[k].items()})
            else: 
                av_lens.append(statistics.mean([statistics.mean(x[:depth]) for x in res[k].values()]))
                
        all_av_lens.append(av_lens)
    
    return all_av_lens 

In [13]:
fig, axs = plt.subplots()
fig.set_size_inches(16, 10)
# axs.plot(np.arange(0.0, 1.1, 0.1), get_avg_lens(result_lens)[0], color='b', linewidth=2)
# axs.plot(np.arange(0.0, 1.1, 0.1), [get_avg_lens(load_1d(index_names, os.path.join(BASE_DIR, EXPERIMENT_DIR, 'tfidf'), '{0}-unigram_tfidf.run', range(1, 2, 1), lens))[0]] * 11, color='g', linewidth=2)
axs.plot(np.arange(1.2, 3.05, 0.05), [get_avg_lens(load_1d(index_names, os.path.join(BASE_DIR, 'tfidf'), 'case-topics-{0}-unigram_tfidf.run', range(1, 2, 1), lens))[0]] * 37, color='g', linewidth=2)
axs.plot(np.arange(1.2, 3.05, 0.05), get_avg_lens(load_1d(index_names, os.path.join(BASE_DIR, 'bm25'), 'case-topics-{0}-unigram_bm25_k1_{1:.2f}_b_0.70.run', np.arange(1.2, 3.05, 0.05), lens))[0], color='b', linewidth=2)
axs2 = axs.twiny()
axs2.plot(range(1000, 3050, 50), get_avg_lens(load_1d(index_names, os.path.join(BASE_DIR, 'dirichlet_prior'), 'case-topics-{0}-unigram_dir_mu_{1:0.2f}.run', range(1000, 3050, 50), lens))[0], color='r', linewidth=2)
axs3 = axs.twiny()
axs3.plot(get_avg_lens(load_1d(['flattened'], os.path.join(BASE_DIR,'jelinek_mercer'), 'case-topics-{0}-unigram_jm_lambda_{1:0.2f}.run', np.arange(0.0, 1.1, 0.1), lens))[0], linewidth=2)



[<matplotlib.lines.Line2D at 0x12af0c490>]

<Figure size 1152x720 with 3 Axes>

In [14]:
num_x = len(all_av_lens[0][0])
num_y = len(all_av_lens[0])

x = np.array([[i]*num_x for i in range(0, 11)])
y = np.array([list(np.arange(0.0, 1.1, 0.1))] * num_y)


fig = plt.figure()
fig.set_size_inches(16, 10)
ax = fig.gca(projection='3d')

df = pd.DataFrame(all_av_lens[0])
z = np.array(df)
print(z.shape, x.shape, y.shape)

ax.plot_surface(x, y, z, cmap=cm.gray)               

NameError: name 'all_av_lens' is not defined