In [9]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import average_precision_score, precision_recall_curve
import matplotlib.pyplot as plt

dataset_type = 'dev'

_basepath = '/home/jackalhan/Development/github/more_meaningful_representations/squad/dev/comparisions/dev_voc_and_dev_q_p_ELMO_with_IDF_NOT_SMOOTH_NOT_SUBLINEARED'
datadir = os.path.join(_basepath)

_neighbors_file_name = '{}_neighbors.csv'.format(dataset_type)
neighbors_file = os.path.join(datadir, _neighbors_file_name)


def recall_at_k(r, k):
    """Score is recall @ k
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> recall_at(r, 1)
    0.0
    >>> recall_at(r, 2)
    0.0
    >>> recall_at(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Precision @ k
    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r_ = np.asarray(r)
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.sum(r[r[:]>0])/np.sum(r_[r_[:]>0])

def precision_at_k(r, k):
    """Score is precision @ k
    Relevance is binary (nonzero is relevant).
    >>> r = [0, 0, 1]
    >>> precision_at_k(r, 1)
    0.0
    >>> precision_at_k(r, 2)
    0.0
    >>> precision_at_k(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Precision @ k
    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)


def average_precision(r):
    """Score is average precision (area under PR curve)
    Relevance is binary (nonzero is relevant).
    >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
    >>> delta_r = 1. / sum(r)
    >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
    0.7833333333333333
    >>> average_precision(r)
    0.78333333333333333
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Average precision
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    return np.mean(out)


def mean_average_precision(rs):
    """Score is mean average precision
    Relevance is binary (nonzero is relevant).
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
    >>> mean_average_precision(rs)
    0.78333333333333333
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
    >>> mean_average_precision(rs)
    0.39166666666666666
    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean average precision
    """
    return np.mean([average_precision(r) for r in rs])


def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum(np.subtract(np.power(2, r), 1) / np.log2(np.arange(2, r.size + 2)))
    return 0.


def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    idcg = dcg_at_k(sorted(r, reverse=True), k)
    if not idcg:
        return 0.
    return dcg_at_k(r, k) / idcg

def dcg2_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum(np.subtract(np.power(2, r), 1) / np.log2(np.arange(2, r.size + 2)))
    return 0.


def ndcg2_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    idcg = dcg2_at_k(sorted(r, reverse=True), k)
    if not idcg:
        return 0.
    return dcg2_at_k(r, k) / idcg

In [None]:
neighbors = pd.read_csv(neighbors_file)

In [25]:
mypath = '/home/jackalhan/Development/github/more_meaningful_representations/squad/dev/comparisions/Weights'
for (dirpath, dirnames, filenames) in os.walk(mypath):    
    performance = []
    for each_file in filenames:
        print(each_file)
        neighbors = pd.read_csv(os.path.join(dirpath, each_file))
        neighbors['Is_Actual_Paragraph'] = (neighbors['neighbor_paragraph'] == neighbors['actual_paragraph']).astype('int')
        neighbors.sort_values(by=['neighbor_cos_similarity'], ascending=[False], inplace=True)
        #total_relevant_docs = neighbors[neighbors['Is_Actual_Paragraph'] == 1]['Is_Actual_Paragraph'].count()
        
        for k in [1,2,5,10,20,50]:
            top_k = neighbors[neighbors['neighbor_order'] <= k]
            len_of_received_relevant_doc = top_k[top_k['Is_Actual_Paragraph'] == 1]['Is_Actual_Paragraph'].count()
    #         recall_k = len_of_received_relevant_doc/total_relevant_docs 

    #         top_k_grouped = top_k.groupby('question').head(k)
    #         sub_recall_k, sub_precision_k, sub_dcg_k , sub_ndcg_k, sub_ndcg2_k,sub_avg_precision_i = 0,0,0,0,0,0
    #         print(10*'-')
    #         print('K: {}'.format(k))
    #         for _ in range(total_relevant_docs):

    #             each_group_set = top_k_grouped[_*k:_*k+k]
    #             #sub_recall_k += recall_at_k(each_group_set['Is_Actual_Paragraph'].values, k)
    #             sub_precision_k += precision_at_k(each_group_set['Is_Actual_Paragraph'].values, k)    
    #             sub_dcg_k += dcg_at_k(each_group_set['Is_Actual_Paragraph'].values, k)
    #             sub_ndcg_k += ndcg_at_k(each_group_set['Is_Actual_Paragraph'].values, k)
    #             #sub_ndcg2_k += ndcg2_at_k(each_group_set['Is_Actual_Paragraph'].values, k)
    #             sub_avg_precision_i = average_precision(each_group_set['Is_Actual_Paragraph'].values)

            performance.append((k, each_file.replace('_output_neighbors.csv', ''), len_of_received_relevant_doc
                                ))


        #df_documents_recall_precision = pd.DataFrame(data=documents_recall_precision, columns=['top_n', 'recall', 'precision'])    
    df_performance_model = pd.DataFrame(data=performance, columns=['top_n','conf', 'recall'])
    df_performance_model.sort_values(by=['top_n', 'recall'], ascending=[True, False])
    df_performance_model.to_csv(os.path.join(dirpath, 'performances.csv'))
#         ax = df_performance_model.plot(kind='bar', title=each_file.replace('_output_neighbors.csv', ''));
#         for p in ax.patches: 
#             ax.annotate(np.round(p.get_height(),decimals=2), (p.get_x()+p.get_width()/2., p.get_height()), ha='center', va='center', xytext=(0, 10), textcoords='offset points')
#         fig = ax.get_figure()
#         fig.savefig(os.path.join(dirpath, 'performance_'+each_file + '.png'))

elmo_with_idf_weights_a_0.42_b_0.43_c0.15_output_neighbors.csv
elmo_with_idf_weights_a_1_b_0_c0_output_neighbors.csv
elmo_only_weights_a_0.48_b_0.35_c0.17_output_neighbors.csv
elmo_with_idf_weights_a_0.09_b_0.07_c0.84_output_neighbors.csv
elmo_only_weights_a_0.09_b_0.02_c0.89_output_neighbors.csv
elmo_only_weights_a_0.31_b_0.22_c0.47_output_neighbors.csv
elmo_with_idf_weights_a_0_b_1_c0_output_neighbors.csv
elmo_only_weights_a_0.43_b_0.24_c0.33_output_neighbors.csv
elmo_with_idf_weights_a_0.65_b_0.08_c0.27_output_neighbors.csv
elmo_with_idf_weights_a_0.53_b_0.41_c0.06_output_neighbors.csv
elmo_with_idf_weights_a_0.07_b_0.51_c0.42_output_neighbors.csv
elmo_with_idf_weights_a_0.3_b_0.08_c0.62_output_neighbors.csv
elmo_only_weights_a_0.32_b_0.08_c0.6_output_neighbors.csv
elmo_only_weights_a_0.32_b_0.58_c0.1_output_neighbors.csv
elmo_with_idf_weights_a_0.68_b_0.15_c0.17_output_neighbors.csv
elmo_with_idf_weights_a_0.60_b_0.25_c0.15_output_neighbors.csv
elmo_with_idf_weights_a_0.49_b_0.32_c

In [3]:
mypath = '/home/jackalhan/Development/github/more_meaningful_representations/squad/dev/comparisions/Weighs'
for (dirpath, dirnames, filenames) in os.walk(mypath):
    for each_file in filenames:
        print(each_file)
        neighbors = pd.read_csv(os.path.join(dirpath,each_file))
        neighbors['Is_Actual_Paragraph'] = (neighbors['neighbor_paragraph'] == neighbors['actual_paragraph']).astype('int')
        neighbors.sort_values(by=['neighbor_cos_similarity'], ascending=[False])
        top_k = neighbors[neighbors['Is_Actual_Paragraph'] == 1]
        for i
        top_k_grouped = top_k.groupby(['Is_Actual_Paragraph']).count()
        total_zeros =top_k_grouped[top_k_grouped.index == 0]['slice_type'].values[0]
        total_1s =top_k_grouped[top_k_grouped.index == 1]['slice_type'].values[0]
#         for k in [1,2,5,10,20,50]:
#             top_k = neighbors[neighbors['Is_Actual_Paragraph'] <= 1]
#             top_k_grouped = top_k.groupby(['Is_Actual_Paragraph']).count()
#             total_zeros =top_k_grouped[top_k_grouped.index == 0]['slice_type'].values[0]
#             total_1s =top_k_grouped[top_k_grouped.index == 1]['slice_type'].values[0]                        
        performances = []
        min_cutoff = min(top_k['neighbor_cos_similarity'])
        for _cut_off in [float(x/10) for x in range(int(min_cutoff)*10,11)]:
            grouped_greater = top_k[top_k['neighbor_cos_similarity'] >= _cut_off].groupby(['Is_Actual_Paragraph']).count()
            try:
                true_positive = grouped_greater[grouped_greater.index == 1]['slice_type'].values[0]
            except:
                true_positive = 0

            grouped_smaller = top_k[top_k['neighbor_cos_similarity'] < _cut_off].groupby(['Is_Actual_Paragraph']).count()
            try:
                true_negative = grouped_smaller[grouped_smaller.index == 0]['slice_type'].values[0]
            except:
                true_negative = 0 

            try:
                false_positive = grouped_greater[grouped_greater.index == 0]['slice_type'].values[0]
            except:
                false_positive = 0

            try:
                false_negative = grouped_smaller[grouped_smaller.index == 1]['slice_type'].values[0]
            except:
                false_negative = 0

            true_negative_rate = (true_negative/total_zeros)
            false_positive_rate = 1 - true_negative_rate
            true_positive_rate = (true_positive/total_1s)
            try:
                precision = true_positive / (true_positive + false_positive)
            except:
                precision = 0

            try:
                recall = true_positive / (true_positive + false_negative)
            except:
                recall = 0
            k_performance = (_cut_off, true_positive, true_negative, false_positive, false_negative, precision, recall, true_negative_rate, false_positive_rate, true_positive_rate)
            performances.append(k_performance)
            
        df_prediction_model = pd.DataFrame(data=performances, columns=['cut_off', 'True Positive', 'True Negative', 'False_Positive', 'False_Negative','Precision', 'Recall', 'True Negative Rate', 'False Positive Rate', 'True Positive Rate'])
        ap = 0
        previous_recall = 0
        for i, each_row in df_prediction_model.iterrows():
            print('Current Recall: {}'.format(each_row['Recall']))
            print('Previous Recall: {}'.format(previous_recall))
            current_ap = (each_row['Recall'] - previous_recall) * each_row['Precision']
            print('Current AP: {}'.format(current_ap))

            ap += current_ap
            print('Total AP: {}'.format(ap))    
            print(10*'-')
            previous_recall = each_row['Recall']
        print(ap)
            #df_prediction_model['ap'] = ap
        df_prediction_model.to_csv(os.path.join(dirpath, 'performance_k_'+ str(k) + '_'+each_file))
        #average_precision = average_precision_score(df_prediction_model['Recall'], df_prediction_model['Precision'])
# 
        plt.step(df_prediction_model['Recall'], df_prediction_model['Precision'], color='b', alpha=0.2,
                 where='post')
        plt.fill_between(df_prediction_model['Recall'], df_prediction_model['Precision'], step='post', alpha=0.2,
                         color='b')
#             plt.plot(df_prediction_model['Recall'], df_prediction_model['Recall'])
#             plt.plot(df_prediction_model['Precision'], df_prediction_model['Precision'])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
#             plt.legend(['Recall', 'precision'],loc='upper left')
        plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
                  ap))
        plt.savefig(os.path.join(dirpath, 'performance_k_'+ str(k) + '_'+each_file+'.png'))
# ---------------------------------------------------
# ---------------------------------------------------
# print('Top {} Items: {}'.format(i, top_k.shape[0]))
# average_precision = average_precision_score(top_k['Is_Actual_Paragraph'], top_k['neighbor_cos_similarity'])
# print('Average precision-recall score: {0:0.2f}'.format(
#       average_precision))

# precision, recall, _ = precision_recall_curve(top_k['Is_Actual_Paragraph'], top_k['neighbor_cos_similarity'])

# plt.step(recall, precision, color='b', alpha=0.2,
#          where='post')
# plt.fill_between(recall, precision, step='post', alpha=0.2,
#                  color='b')

# plt.xlabel('Recall')
# plt.ylabel('Precision')
# plt.ylim([0.0, 1.05])
# plt.xlim([0.0, 1.0])
# plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
#           average_precision))

SyntaxError: invalid syntax (<ipython-input-3-b48451ddb3ef>, line 9)

In [8]:
mypath = '/home/jackalhan/Development/github/more_meaningful_representations/squad/dev/comparisions/Weighs'
for (dirpath, dirnames, filenames) in os.walk(mypath):
    for each_file in filenames:
        print(each_file)
        neighbors = pd.read_csv(os.path.join(dirpath,each_file))
        neighbors['Is_Actual_Paragraph'] = (neighbors['neighbor_paragraph'] == neighbors['actual_paragraph']).astype('int')
        neighbors.sort_values(by=['neighbor_cos_similarity'], ascending=[False], inplace=True)
        top_50 = neighbors.copy() #neighbors[neighbors['neighbor_order'] <= 50]
        
        #print('Top {} Items: {}'.format(i, top_k.shape[0]))
        average_precision = average_precision_score(top_50['Is_Actual_Paragraph'], top_50['neighbor_cos_similarity'])
        print('Average precision-recall score: {0:0.2f}'.format(
              average_precision))

        precision, recall, _ = precision_recall_curve(top_50['Is_Actual_Paragraph'], top_50['neighbor_cos_similarity'])

        plt.step(recall, precision, color='b', alpha=0.2,
                 where='post')
        plt.fill_between(recall, precision, step='post', alpha=0.2,
                         color='b')   
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.legend([each_file.replace('_output_neighbors.csv', '')],loc="lower right")
        plt.title('2-class Precision-Recall for curve: AP={0:0.2f}'.format(average_precision))
#         number_of_relavent_docs = len(top_50[top_50['Is_Actual_Paragraph'] == 1])
#         precision_recalls =[]
#         retrieved_counter =0
#         print('number_of_relavent_docs:{}'.format(number_of_relavent_docs))
#         print('top_50:{}'.format(len(top_50)))
#         top_50.sort_values(by=['neighbor_cos_similarity'], ascending=[False], inplace=True)
#         for i, rows in enumerate(top_50.iterrows()):
#             index = rows[0]
#             row = rows[1]
#             if row['Is_Actual_Paragraph'] == 1:
#                 retrieved_counter +=1
#             precision_recalls.append((i+1, 
#                                       retrieved_counter/(i+1), 
#                                       retrieved_counter/number_of_relavent_docs, 
#                                       row['Is_Actual_Paragraph']))
#         df_precision_recalls = pd.DataFrame(data=precision_recalls, columns=['k', 
#                                                                                  'Precision',
#                                                                                  'Recall', 
#                                                                                  'Is_True_Pair'])
#         prec_records = df_precision_recalls[df_precision_recalls['Is_True_Pair'] == 1]
#         ap = prec_records['Precision'].sum() / len(prec_records)
#         df_precision_recalls.to_csv(os.path.join(dirpath, 'performance_' +each_file))
# #         plt.step(df_precision_recalls['Recall'], df_precision_recalls['Precision'], color='b', alpha=0.2,
# #                  where='post')
# #         plt.fill_between(df_precision_recalls['Recall'], df_precision_recalls['Precision'], step='post', alpha=0.2,
# #                          color='b')
#         #plt.plot(df_precision_recalls['Recall'], df_precision_recalls['Recall'], 'bo',
#                  #df_precision_recalls['Precision'],df_precision_recalls['Precision'], 'k')
#         #plt.plot(, df_precision_recalls['Precision'])
  
#         plt.plot(df_precision_recalls['Recall'], df_precision_recalls['Precision'], label='area = %0.2f' % ap, color="green")
#         plt.xlim([0.0, 1.0])
#         plt.ylim([0.0, 1.05])
#         plt.xlabel('Recall')
#         plt.ylabel('Precision')
#         plt.title('Precision Recall Curve for {}'.format(each_file.replace('_output_neighbors.csv', '')))
#         plt.legend(loc="lower right")
#         #plt.show()
# #         plt.xlabel('Recall')
# #         plt.ylabel('Precision')
# #         plt.ylim([0.0, 1.05])
# #         plt.xlim([0.0, 1.0])
# #         plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
# #                   ap))
# #         plt.legend(['Recall', 'Precision'],loc='upper left')
        plt.savefig(os.path.join(dirpath, 'performance_'+each_file+'.png'))
        plt.clf()
#         min_ = neighbors[neighbors['Is_Actual_Paragraph'] == 1]
#         precision_recalls =[]
#         min_cutoff = min(min_['neighbor_cos_similarity'])
#         top_k = neighbors[neighbors['neighbor_cos_similarity'] >= min_cutoff]
#         number_of_relavent_docs = len(top_k[top_k['Is_Actual_Paragraph'] == 1])
#         retrieved_counter = 0
#         for i, row in top_k.iterrows():
#             if row['Is_Actual_Paragraph'] == 1:
#                     retrieved_counter +=1
#             precision_recalls.append((i+1, 
#                                       retrieved_counter/len(top_k), 
#                                       retrieved_counter/number_of_relavent_docs, 
#                                       row['Is_Actual_Paragraph']))

#         df_precision_recalls = pd.DataFrame(data=precision_recalls, columns=['k', 'Precision', 'Recall', 'Is_True_Pair'])
#         ap = df_precision_recalls['Precision'].sum() / sum(df_precision_recalls[df_precision_recalls['Is_True_Pair'] == 1])
        
# #         for _ in range(1,max_k+1):
# #             top_k_ = top_k[top_k['neighbor_order'] <= _]
# #             retrieved_counter = top_k_[top_k_['Is_Actual_Paragraph'] == 1]['Is_Actual_Paragraph'].count()
# #             precision_recalls.append((k, 
# #                                       retrieved_counter/(max_k+1), 
# #                                       retrieved_counter/q, 
# #                                       retrieved_counter))

# #         for i, row in top_k.iterrows():
# #             if row['Is_Actual_Paragraph'] == 1:
# #                 retrieved_counter +=1
# #             precision_recalls.append((i, retrieved_counter/(i+1), retrieved_counter/len(top_k), row['Is_Actual_Paragraph']))
                    
        
#         df_prediction_model.to_csv(os.path.join(dirpath, 'performance_k_'+ str(k) + '_'+each_file))
#         plt.step(df_precision_recalls['Recall'], df_precision_recalls['Precision'], color='b', alpha=0.2,
#                  where='post')
#         plt.fill_between(df_precision_recalls['Recall'], df_precision_recalls['Precision'], step='post', alpha=0.2,
#                          color='b')
# #             plt.plot(df_prediction_model['Recall'], df_prediction_model['Recall'])
# #             plt.plot(df_prediction_model['Precision'], df_prediction_model['Precision'])
#         plt.xlabel('Recall')
#         plt.ylabel('Precision')
#         plt.ylim([0.0, 1.05])
#         plt.xlim([0.0, 1.0])
# #             plt.legend(['Recall', 'precision'],loc='upper left')
#         plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
#                   ap))
#         plt.savefig(os.path.join(dirpath, 'performance_k_'+ str(k) + '_'+each_file+'.png'))
#         plt.clf()

elmo_only_weights_a_1_b_1_output_neighbors.csv
Average precision-recall score: 0.02
elmo_only_weights_a_0.8_b_0.19999999999999996_output_neighbors.csv
Average precision-recall score: 0.02
elmo_with_idf_weights_a_0.7_b_0.30000000000000004_output_neighbors.csv
Average precision-recall score: 0.03
elmo_with_idf_weights_a_0.4_b_0.6_output_neighbors.csv
Average precision-recall score: 0.03
elmo_only_weights_a_0.9_b_0.09999999999999998_output_neighbors.csv
Average precision-recall score: 0.02
elmo_with_idf_weights_a_1_b_1_output_neighbors.csv
Average precision-recall score: 0.02
elmo_with_idf_weights_a_0.2_b_0.8_output_neighbors.csv
Average precision-recall score: 0.03
elmo_with_idf_weights_a_0.6_b_0.4_output_neighbors.csv
Average precision-recall score: 0.03
elmo_with_idf_weights_a_0.3_b_0.7_output_neighbors.csv
Average precision-recall score: 0.03
elmo_only_weights_a_0.1_b_0.9_output_neighbors.csv
Average precision-recall score: 0.02
elmo_with_idf_weights_a_0.1_b_0.9_output_neighbors.csv
A

<matplotlib.figure.Figure at 0x7eff5550ca58>

In [None]:
top_50.head()

In [None]:
row

In [None]:
mypath = '/home/jackalhan/Development/github/more_meaningful_representations/squad/dev/comparisions/Weighs'
for (dirpath, dirnames, filenames) in os.walk(mypath):
    for each_file in filenames:
        print(each_file)
        neighbors = pd.read_csv(os.path.join(dirpath,each_file))
        neighbors['Is_Actual_Paragraph'] = (neighbors['neighbor_paragraph'] == neighbors['actual_paragraph']).astype('int')
        neighbors.sort_values(by=['neighbor_cos_similarity'], ascending=[False], inplace=True)
        top_50 = neighbors.copy() #neighbors[neighbors['neighbor_order'] <= 50]
        q = 10570
        p = 2067
        
        top50_grouped = top_50.groupby('question')["neighbor_cos_similarity"].nlargest(p)
        for name, grouped in top50_grouped:
            i, = np.where( grouped['Is_Actual_Paragraph']==1 )
            precision = 1/(i+1)
            recall = 1/
            (i+1, 
#                                       retrieved_counter/(i+1), 
#                                       retrieved_counter/number_of_relavent_docs, 
#                                       row['Is_Actual_Paragraph']))
        
        #print('Top {} Items: {}'.format(i, top_k.shape[0]))
        average_precision = average_precision_score(top_50['Is_Actual_Paragraph'], top_50['neighbor_cos_similarity'])
        print('Average precision-recall score: {0:0.2f}'.format(
              average_precision))

        precision, recall, _ = precision_recall_curve(top_50['Is_Actual_Paragraph'], top_50['neighbor_cos_similarity'])

        plt.step(recall, precision, color='b', alpha=0.2,
                 where='post')
        plt.fill_between(recall, precision, step='post', alpha=0.2,
                         color='b')   
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.legend([each_file.replace('_output_neighbors.csv', '')],loc="lower right")
        plt.title('2-class Precision-Recall for curve: AP={0:0.2f}'.format(average_precision))
#         number_of_relavent_docs = len(top_50[top_50['Is_Actual_Paragraph'] == 1])
#         precision_recalls =[]
#         retrieved_counter =0
#         print('number_of_relavent_docs:{}'.format(number_of_relavent_docs))
#         print('top_50:{}'.format(len(top_50)))
#         top_50.sort_values(by=['neighbor_cos_similarity'], ascending=[False], inplace=True)
#         for i, rows in enumerate(top_50.iterrows()):
#             index = rows[0]
#             row = rows[1]
#             if row['Is_Actual_Paragraph'] == 1:
#                 retrieved_counter +=1
#             precision_recalls.append((i+1, 
#                                       retrieved_counter/(i+1), 
#                                       retrieved_counter/number_of_relavent_docs, 
#                                       row['Is_Actual_Paragraph']))
#         df_precision_recalls = pd.DataFrame(data=precision_recalls, columns=['k', 
#                                                                                  'Precision',
#                                                                                  'Recall', 
#                                                                                  'Is_True_Pair'])
#         prec_records = df_precision_recalls[df_precision_recalls['Is_True_Pair'] == 1]
#         ap = prec_records['Precision'].sum() / len(prec_records)
#         df_precision_recalls.to_csv(os.path.join(dirpath, 'performance_' +each_file))
# #         plt.step(df_precision_recalls['Recall'], df_precision_recalls['Precision'], color='b', alpha=0.2,
# #                  where='post')
# #         plt.fill_between(df_precision_recalls['Recall'], df_precision_recalls['Precision'], step='post', alpha=0.2,
# #                          color='b')
#         #plt.plot(df_precision_recalls['Recall'], df_precision_recalls['Recall'], 'bo',
#                  #df_precision_recalls['Precision'],df_precision_recalls['Precision'], 'k')
#         #plt.plot(, df_precision_recalls['Precision'])
  
#         plt.plot(df_precision_recalls['Recall'], df_precision_recalls['Precision'], label='area = %0.2f' % ap, color="green")
#         plt.xlim([0.0, 1.0])
#         plt.ylim([0.0, 1.05])
#         plt.xlabel('Recall')
#         plt.ylabel('Precision')
#         plt.title('Precision Recall Curve for {}'.format(each_file.replace('_output_neighbors.csv', '')))
#         plt.legend(loc="lower right")
#         #plt.show()
# #         plt.xlabel('Recall')
# #         plt.ylabel('Precision')
# #         plt.ylim([0.0, 1.05])
# #         plt.xlim([0.0, 1.0])
# #         plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
# #                   ap))
# #         plt.legend(['Recall', 'Precision'],loc='upper left')
        plt.savefig(os.path.join(dirpath, 'performance_'+each_file+'.png'))
        plt.clf()
#         min_ = neighbors[neighbors['Is_Actual_Paragraph'] == 1]
#         precision_recalls =[]
#         min_cutoff = min(min_['neighbor_cos_similarity'])
#         top_k = neighbors[neighbors['neighbor_cos_similarity'] >= min_cutoff]
#         number_of_relavent_docs = len(top_k[top_k['Is_Actual_Paragraph'] == 1])
#         retrieved_counter = 0
#         for i, row in top_k.iterrows():
#             if row['Is_Actual_Paragraph'] == 1:
#                     retrieved_counter +=1
#             precision_recalls.append((i+1, 
#                                       retrieved_counter/len(top_k), 
#                                       retrieved_counter/number_of_relavent_docs, 
#                                       row['Is_Actual_Paragraph']))

#         df_precision_recalls = pd.DataFrame(data=precision_recalls, columns=['k', 'Precision', 'Recall', 'Is_True_Pair'])
#         ap = df_precision_recalls['Precision'].sum() / sum(df_precision_recalls[df_precision_recalls['Is_True_Pair'] == 1])
        
# #         for _ in range(1,max_k+1):
# #             top_k_ = top_k[top_k['neighbor_order'] <= _]
# #             retrieved_counter = top_k_[top_k_['Is_Actual_Paragraph'] == 1]['Is_Actual_Paragraph'].count()
# #             precision_recalls.append((k, 
# #                                       retrieved_counter/(max_k+1), 
# #                                       retrieved_counter/q, 
# #                                       retrieved_counter))

# #         for i, row in top_k.iterrows():
# #             if row['Is_Actual_Paragraph'] == 1:
# #                 retrieved_counter +=1
# #             precision_recalls.append((i, retrieved_counter/(i+1), retrieved_counter/len(top_k), row['Is_Actual_Paragraph']))
                    
        
#         df_prediction_model.to_csv(os.path.join(dirpath, 'performance_k_'+ str(k) + '_'+each_file))
#         plt.step(df_precision_recalls['Recall'], df_precision_recalls['Precision'], color='b', alpha=0.2,
#                  where='post')
#         plt.fill_between(df_precision_recalls['Recall'], df_precision_recalls['Precision'], step='post', alpha=0.2,
#                          color='b')
# #             plt.plot(df_prediction_model['Recall'], df_prediction_model['Recall'])
# #             plt.plot(df_prediction_model['Precision'], df_prediction_model['Precision'])
#         plt.xlabel('Recall')
#         plt.ylabel('Precision')
#         plt.ylim([0.0, 1.05])
#         plt.xlim([0.0, 1.0])
# #             plt.legend(['Recall', 'precision'],loc='upper left')
#         plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
#                   ap))
#         plt.savefig(os.path.join(dirpath, 'performance_k_'+ str(k) + '_'+each_file+'.png'))
#         plt.clf()

In [None]:
ap

In [None]:

# roc_auc = auc(df_prediction_model['False Positive Rate'], df_prediction_model['True Positive Rate'])
# plt.figure()
# lw = 2
# plt.plot(df_prediction_model['False Positive Rate'].values, df_prediction_model['True Positive Rate'].values, color='darkorange',
#          lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
# plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Receiver operating characteristic')
# plt.legend(loc="lower right")
# plt.rcParams["figure.figsize"] = [15,15]
# plt.show()