In [1]:
import networkx as nx
import pandas as pd

from frequency.Frequency import Frequency
from most_premises.MostPremises import calculcate_most_premises
from pagerank.NetworkXPageRank import NetworkXPageRank
from pagerank.OriginalPageRank import OriginalPageRank
from pagerank.PaperPageRank import PaperPageRank
from randomized.RandomScore import calculate_random_score
from sentiment.NeuralNetworkSentimentClassification import NeuralNetworkSentiment
from sentiment.Sentiment import SentiWordNet
from similarity.SemanticSimilarity import SemanticSimilarity, GloveSemanticSimilarity
from similarity.Similarity import caclulate_jaccard_similarity
from similarity.WordNetSentenceSimilarity import wordnet_knowledge_similarity, wordnet_knowledge_similarity_averaged
from utils.Aggregation import min_aggregation, max_aggregation, sum_aggregation, avg_aggregation, \
    max_sentiment_aggregation, min_sentiment_aggregation
from utils.Correlation import calculate_kendall_correlation, compute_best_and_worst

In [2]:
# Run once if missing data error occours
#import nltk
#nltk.download('wordnet_ic')
#nltk.download('vader_lexicon')

In [3]:
## Calculate tau values for Ranking and Best and Worst Comparison of  PageRank Paper

In [4]:
PATH_GROUND_TRUTH_LIST = "../Webis-ArgRank-17-Dataset/ground-truth-list.csv"
PATH_GROUND_TRUTH_JSON = "../Graph-Construction/groundtruth.json"
PATH_NODE_MAPPING_JSON = "../Graph-Construction/node_mapping.json"
PATH_ARGUMENT_LIST = "../Webis-ArgRank-17-Dataset/argument-list.csv"
PATH_ARGUMENT_UNIT_LIST = "../Webis-ArgRank-17-Dataset/argument-unit-list.csv"
PATH_GENERATED_PAGE_RANK_DATA = "./pagerank/data"
PATH_GENERATED_EMBEDDINGS_DATA = "./similarity/embeddings/"
PATH_SENTIWORDNET = "./sentiment/data/SentiWordNet_3.0.0_20130122.txt"
PATH_MODEL = "./sentiment/data/nn_sentiment_model.pt"

## Calculate tau values for Ranking and Best and Worst Comparison of  PageRank Paper

In [5]:
# Calculate frequency based scores
frequency = Frequency(path_argument_unit_list=PATH_ARGUMENT_UNIT_LIST)
frequency_tau_value_min, frequency_tau_dict_min = calculate_kendall_correlation(frequency.get_frequency_score,
                                                                                    min_aggregation,
                                                                               path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                               path_argument_list = PATH_ARGUMENT_LIST)
frequency_tau_value_avg, frequency_tau_dict_avg = calculate_kendall_correlation(frequency.get_frequency_score,
                                                                                    avg_aggregation,
                                                                               path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                               path_argument_list = PATH_ARGUMENT_LIST)
frequency_tau_value_max, frequency_tau_dict_max = calculate_kendall_correlation(frequency.get_frequency_score,
                                                                                    max_aggregation,
                                                                                    path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                               path_argument_list = PATH_ARGUMENT_LIST)
frequency_tau_value_sum, frequency_tau_dict_sum = calculate_kendall_correlation(frequency.get_frequency_score,
                                                                               sum_aggregation,
                                                                               path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                               path_argument_list = PATH_ARGUMENT_LIST)
print("Done")

# Calculate jacard similarity based scores
jaccard_tau_value_sum, jaccard_tau_dict_sum = calculate_kendall_correlation(caclulate_jaccard_similarity,
                                                                              sum_aggregation,
                                                                         path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                         path_argument_list = PATH_ARGUMENT_LIST)
jaccard_tau_value_avg, jaccard_tau_dict_avg = calculate_kendall_correlation(caclulate_jaccard_similarity,
                                                                              avg_aggregation,
                                                                          path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                         path_argument_list = PATH_ARGUMENT_LIST)
jaccard_tau_value_max, jaccard_tau_dict_max = calculate_kendall_correlation(caclulate_jaccard_similarity,
                                                                              max_aggregation,
                                                                          path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                         path_argument_list = PATH_ARGUMENT_LIST)
jaccard_tau_value_min, jaccard_tau_dict_min = calculate_kendall_correlation(caclulate_jaccard_similarity,
                                                                              min_aggregation,
                                                                          path_ground_truth_list = PATH_GROUND_TRUTH_LIST,
                                                                         path_argument_list = PATH_ARGUMENT_LIST)
print("Done")

# Calculate PaperPageRank based scores
pagerank = PaperPageRank(path_argument_unit_list=PATH_ARGUMENT_UNIT_LIST)
paper_page_rank_tau_value_sum, paper_page_rank_tau_dict_sum = calculate_kendall_correlation(
        pagerank.calculate_paper_page_rank, sum_aggregation, path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
paper_page_rank_tau_value_avg, paper_page_rank_tau_dict_avg = calculate_kendall_correlation(
        pagerank.calculate_paper_page_rank, avg_aggregation, path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
paper_page_rank_tau_value_max, paper_page_rank_tau_dict_max = calculate_kendall_correlation(
        pagerank.calculate_paper_page_rank, max_aggregation, path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
paper_page_rank_tau_value_min, paper_page_rank_tau_dict_min = calculate_kendall_correlation(
        pagerank.calculate_paper_page_rank, min_aggregation, path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
print("Done")

# Calculated most premises based scores
most_premises_tau_value, most_premises_tau_dict = calculate_kendall_correlation(calculcate_most_premises,
                                                                                    sum_aggregation,
                                                                               path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
print("Done")

# Calculate SentiWordNet based scores
senti_word_net = SentiWordNet(path_sentiwordnet = PATH_SENTIWORDNET)
sentiment_tau_value_sum, sentiment_tau_dict_sum = calculate_kendall_correlation(
        senti_word_net.calculate_sentiment,
        sum_aggregation,
    path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
sentiment_tau_value_avg, sentiment_tau_dict_avg = calculate_kendall_correlation(
        senti_word_net.calculate_sentiment,
        avg_aggregation,
    path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
sentiment_tau_value_max, sentiment_tau_dict_max = calculate_kendall_correlation(
        senti_word_net.calculate_sentiment,
        max_sentiment_aggregation,
    path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
sentiment_tau_value_min, sentiment_tau_dict_min = calculate_kendall_correlation(
        senti_word_net.calculate_sentiment,
        min_sentiment_aggregation,
    path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
print("Done")

# Calculate random based scores
random_tau_value, random_tau_dict = calculate_kendall_correlation(calculate_random_score, min_aggregation,
                                                                 path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)

# Collect tau results for every aggregation method with best and worst counter
baseline = [[random_tau_dict, 0, 0], [frequency_tau_dict_min, 0, 0], [frequency_tau_dict_max, 0, 0],
                [frequency_tau_dict_sum, 0, 0], [frequency_tau_dict_avg, 0, 0],
                [jaccard_tau_dict_avg, 0, 0], [jaccard_tau_dict_sum, 0, 0], [jaccard_tau_dict_max, 0, 0],
                [jaccard_tau_dict_min, 0, 0], [most_premises_tau_dict, 0, 0], [sentiment_tau_dict_avg, 0, 0],
                [sentiment_tau_dict_max, 0, 0],
                [sentiment_tau_dict_sum, 0, 0], [sentiment_tau_dict_min, 0, 0]]
page_rank_approach = [[paper_page_rank_tau_dict_avg, 0, 0], [paper_page_rank_tau_dict_sum, 0, 0],
                          [paper_page_rank_tau_dict_max, 0, 0], [paper_page_rank_tau_dict_min, 0, 0]]
print("Done")
#Compute best and worst 
page_rank_approach, baseline = compute_best_and_worst(page_rank_approach, baseline)
print("Done")

Done
Done
Done
Done
Done
Done
Done


In [6]:
import numpy as np
from tabulate import tabulate

paper_results = np.array([
    ["PageRank", (paper_page_rank_tau_value_min, page_rank_approach[3][1], page_rank_approach[3][2]), 
     (paper_page_rank_tau_value_avg, page_rank_approach[0][1],page_rank_approach[0][2]),
     (paper_page_rank_tau_value_max, page_rank_approach[2][1], page_rank_approach[2][2]),
     (paper_page_rank_tau_value_sum, page_rank_approach[1][1], page_rank_approach[1][2])],
    
    ["Frequency", (frequency_tau_value_min, baseline[1][1], baseline[1][2]),
    (frequency_tau_value_avg, baseline[4][1], baseline[4][2]),
    (frequency_tau_value_max, baseline[2][1], baseline[2][2]),
    (frequency_tau_value_sum, baseline[3][1], baseline[3][2])],
    
    ["Similarity", (jaccard_tau_value_min, baseline[8][1], baseline[8][2]),
     (jaccard_tau_value_avg, baseline[5][1], baseline[5][2]),
     (jaccard_tau_value_max, baseline[7][1], baseline[7][2]),
     (jaccard_tau_value_sum, baseline[6][1], baseline[6][2])],
    
    ["Sentiment", (sentiment_tau_value_min,baseline[13][1], baseline[13][2]),
    (sentiment_tau_value_avg, baseline[10][1], baseline[10][2]),
    (sentiment_tau_value_max, baseline[11][1], baseline[11][2]),
    (sentiment_tau_value_sum, baseline[12][1], baseline[12][2])],
    
    ["Most premises", (most_premises_tau_value, baseline[9][1], baseline[9][1]),
    (most_premises_tau_value, baseline[9][1], baseline[9][1]),
    (most_premises_tau_value, baseline[9][1], baseline[9][1]),
    (most_premises_tau_value, baseline[9][1], baseline[9][1])],
    
    ["Random", (random_tau_value, baseline[0][1], baseline[0][2]), 
    (random_tau_value, baseline[0][1], baseline[0][2]),
    (random_tau_value, baseline[0][1], baseline[0][2]),
    (random_tau_value, baseline[0][1], baseline[0][2])]
])
headers = ["Approach", "Minimum\ntau, best, worst", "Average\ntau, best, worst", "Maximum\ntau, best, worst",
          "Sum\ntau, best, worst"]

# tabulate data
table = tabulate(paper_results, headers, tablefmt="simple")

# output
print(table)

Approach       Minimum             Average             Maximum             Sum
               tau, best, worst    tau, best, worst    tau, best, worst    tau, best, worst
-------------  ------------------  ------------------  ------------------  ------------------
PageRank       (0.01, 11, 9)       (0.02, 12, 9)       (0.11, 11, 7)       (0.28, 15, 5)
Frequency      (-0.1, 5, 11)       (-0.03, 8, 12)      (-0.01, 8, 11)      (0.1, 11, 10)
Similarity     (-0.13, 7, 13)      (-0.05, 8, 12)      (0.01, 9, 11)       (0.02, 9, 11)
Sentiment      (0.01, 8, 8)        (0.11, 12, 5)       (0.12, 10, 5)       (0.12, 12, 5)
Most premises  (0.19, 6, 6)        (0.19, 6, 6)        (0.19, 6, 6)        (0.19, 6, 6)
Random         (0.0, 7, 8)         (0.0, 7, 8)         (0.0, 7, 8)         (0.0, 7, 8)


In [7]:
print("PageRank")
print("Min aggreagation: ", paper_page_rank_tau_value_min, "Best: ", page_rank_approach[3][1], "Worst: ", page_rank_approach[3][2]) 
print("Max aggreagation: ", paper_page_rank_tau_value_max,  "Best: ", page_rank_approach[2][1], "Worst: ", page_rank_approach[2][2])
print("Average aggreagation: ", paper_page_rank_tau_value_avg,  "Best: ", page_rank_approach[0][1], "Worst: ", page_rank_approach[0][2])
print("Sum aggreagation: ", paper_page_rank_tau_value_sum,  "Best: ", page_rank_approach[1][1], "Worst: ", page_rank_approach[1][2])
print("\n")
print("Frequency")
print("Min aggreagation: ", frequency_tau_value_min,  "Best: ", baseline[1][1], "Worst: ", baseline[1][2])
print("Max aggreagation: ", frequency_tau_value_max,  "Best: ", baseline[2][1], "Worst: ", baseline[2][2])
print("Average aggreagation: ", frequency_tau_value_avg,  "Best: ", baseline[4][1], "Worst: ", baseline[4][2])
print("Sum aggreagation: ", frequency_tau_value_sum,  "Best: ", baseline[3][1], "Worst: ", baseline[3][2])
print("\n")
print("Similarity with Jacard Similarity")
print("Min aggreagation: ", jaccard_tau_value_min, baseline[8][1], "Worst: ", baseline[8][2])
print("Max aggreagation: ", jaccard_tau_value_max, baseline[7][1], "Worst: ", baseline[7][2])
print("Average aggreagation: ", jaccard_tau_value_avg, baseline[5][1], "Worst: ", baseline[5][2])
print("Sum aggreagation: ", jaccard_tau_value_sum, baseline[6][1], "Worst: ", baseline[6][2])
print("\n")
print("Sentiment with SentiWordNet")
print("Min aggreagation: ", sentiment_tau_value_min, baseline[13][1], "Worst: ", baseline[13][2])
print("Max aggreagation: ", sentiment_tau_value_max, baseline[11][1], "Worst: ", baseline[11][2])
print("Average aggreagation: ", sentiment_tau_value_avg, baseline[10][1], "Worst: ", baseline[10][2])
print("Sum aggreagation: ", sentiment_tau_value_sum, baseline[12][1], "Worst: ", baseline[12][2])

PageRank
Min aggreagation:  0.01 Best:  11 Worst:  9
Max aggreagation:  0.11 Best:  11 Worst:  7
Average aggreagation:  0.02 Best:  12 Worst:  9
Sum aggreagation:  0.28 Best:  15 Worst:  5


Frequency
Min aggreagation:  -0.1 Best:  5 Worst:  11
Max aggreagation:  -0.01 Best:  8 Worst:  11
Average aggreagation:  -0.03 Best:  8 Worst:  12
Sum aggreagation:  0.1 Best:  11 Worst:  10


Similarity with Jacard Similarity
Min aggreagation:  -0.13 7 Worst:  13
Max aggreagation:  0.01 9 Worst:  11
Average aggreagation:  -0.05 8 Worst:  12
Sum aggreagation:  0.02 9 Worst:  11


Sentiment with SentiWordNet
Min aggreagation:  0.01 8 Worst:  8
Max aggreagation:  0.12 10 Worst:  5
Average aggreagation:  0.11 12 Worst:  5
Sum aggreagation:  0.12 12 Worst:  5


## Compute PageRank with different implementations

In [8]:
page_rank_alpha_results = {}
# Calculate pagerank with different alpha values and different implementations
for alpha in np.arange(0, 1.2, 0.2):
    original_page_rank = OriginalPageRank(path='./pagerank/data',alpha=round(alpha, 1), epochs=1, suffix='_fixed_epochs', path_argument_unit_list = PATH_ARGUMENT_UNIT_LIST, path_ground_truth = PATH_GROUND_TRUTH_JSON, path_node_mapping = PATH_NODE_MAPPING_JSON)
    pagerank = PaperPageRank(path_argument_unit_list=PATH_ARGUMENT_UNIT_LIST)
    networkx_page_rank = NetworkXPageRank(nx.pagerank, alpha=alpha, path_argument_unit_list = PATH_ARGUMENT_UNIT_LIST,
                 path_ground_truth = PATH_GROUND_TRUTH_JSON)   
    networkx_page_rank_scipy = NetworkXPageRank(nx.pagerank_scipy, alpha=alpha, path_argument_unit_list = PATH_ARGUMENT_UNIT_LIST,
                 path_ground_truth = PATH_GROUND_TRUTH_JSON)   
    implementations = {
        "networkx_pagerank": networkx_page_rank.calculate_original_page_rank,
        "networkx_pagerank_scipy": networkx_page_rank_scipy.calculate_original_page_rank,
        "original": original_page_rank.calculate_original_page_rank,
        "paperPageRank": pagerank.calculate_paper_page_rank
    }

    page_rank_results = {}

    for implementation_name in implementations.keys():
        implementation = implementations[implementation_name]
        page_rank_min_tau_value, page_rank_min_tau_dict = calculate_kendall_correlation(implementation,
                                                                              min_aggregation, path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
        page_rank_avg_tau_value, page_rank_avg_tau_dict = calculate_kendall_correlation(implementation,
                                                                              avg_aggregation, path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
        page_rank_max_tau_value, page_rank_max_tau_dict = calculate_kendall_correlation(implementation,
                                                                              max_aggregation, path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
        page_rank_sum_tau_value, page_rank_sum_tau_dict = calculate_kendall_correlation(implementation,
                                                                              sum_aggregation ,path_ground_truth_list = PATH_GROUND_TRUTH_LIST, path_argument_list = PATH_ARGUMENT_LIST)
        
        result_dict = {
            "min": {"tau": page_rank_min_tau_value,
                   "dict": page_rank_min_tau_dict},
            "max": {"tau": page_rank_min_tau_value,
                   "dict": page_rank_max_tau_dict},
            "avg": {"tau": page_rank_avg_tau_value,
                   "dict": page_rank_avg_tau_dict},
            "sum": {"tau": page_rank_sum_tau_value,
               "dict": page_rank_sum_tau_dict}
        }
        page_rank_results[implementation_name] = result_dict
    page_rank_alpha_results[str(alpha)] = page_rank_results

In [9]:
page_rank_alpha_results

{'0.0': {'networkx_pagerank': {'min': {'tau': 0.01,
    'dict': {23: -0.8366600265340756,
     162: 0.816496580927726,
     215: 1.0,
     249: -0.816496580927726,
     254: -0.6666666666666669,
     401: 0.816496580927726,
     435: 0.33333333333333337,
     586: 1.0,
     687: 0.0,
     947: 0.31622776601683794,
     188: 0.5976143046671968,
     2522: -1.0,
     2546: -0.49999999999999994,
     2617: -0.816496580927726,
     2656: 1.0,
     3786: 0.0,
     3788: 0.0,
     3809: 0.0,
     4087: -0.2335496832484569,
     4249: 0.0,
     4334: 0.0,
     4617: 0.0,
     4604: 0.0,
     8370: -0.816496580927726,
     8549: 0.0,
     25530: 0.0,
     26224: -0.7071067811865477,
     26860: -0.8164965809277261,
     27319: 0.5,
     27339: 0.0,
     28309: 0.0,
     28527: 1.0}},
   'max': {'tau': 0.01,
    'dict': {23: -0.8366600265340756,
     162: 0.816496580927726,
     215: 1.0,
     249: -0.816496580927726,
     254: -0.6666666666666669,
     401: 0.816496580927726,
     435: 0.33333

## Compute Similarity with different methods without puncuation

In [10]:
# Load embeddings
elmo_with_punctuation = SemanticSimilarity(path_to_embedding = PATH_GENERATED_EMBEDDINGS_DATA,
                                           file_name="elmo_embeddings_with_punctuation.json")
elmo_without_punctuation = SemanticSimilarity(path_to_embedding = PATH_GENERATED_EMBEDDINGS_DATA,
                                           file_name="elmo_embeddings_without_punctuation.json")

bert_with_punctuation = SemanticSimilarity(path_to_embedding = PATH_GENERATED_EMBEDDINGS_DATA,
                                           file_name="bert_embeddings_with_punctuation.json")
bert_without_punctuation = SemanticSimilarity(path_to_embedding = PATH_GENERATED_EMBEDDINGS_DATA,
                                           file_name="bert_embeddings_without_punctuation.json")

glove_with_punctuation = GloveSemanticSimilarity(remove_punctuation=False)
glove_without_punctuation = GloveSemanticSimilarity(remove_punctuation=True)

similarity_embeddings = {
    "glove_with_punctuation": glove_with_punctuation.calculate_similarity,
    "glove_without_punctuation": glove_without_punctuation.calculate_similarity,
    "elmo_with_punctuation": elmo_with_punctuation.calculate_similarity,
    "elmo_without_punctuation": elmo_without_punctuation.calculate_similarity ,
    "bert_with_punctuation": bert_with_punctuation.calculate_similarity,
    "bert_without_punctuation": bert_without_punctuation.calculate_similarity,
    "wordnet_average" : wordnet_knowledge_similarity_averaged,
    "wordnet": wordnet_knowledge_similarity}

similarity_methods_results = {}

for embedding_name in similarity_embeddings.keys():
    embedding_with_method = similarity_embeddings[embedding_name]
    similarity_min_tau_value, similarity_min_tau_dict = calculate_kendall_correlation(embedding_with_method,
                                                                              min_aggregation, 
                                                                            path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                                path_argument_list=PATH_ARGUMENT_LIST)
    similarity_avg_tau_value, similarity_avg_tau_dict = calculate_kendall_correlation(embedding_with_method,
                                                                              avg_aggregation,
                                                                        path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                        path_argument_list=PATH_ARGUMENT_LIST)
    similarity_max_tau_value, similarity_max_tau_dict = calculate_kendall_correlation(embedding_with_method,
                                                                              max_aggregation,
                                                                         path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                        path_argument_list=PATH_ARGUMENT_LIST)
    similarity_sum_tau_value, similarity_sum_tau_dict = calculate_kendall_correlation(embedding_with_method,
                                                                              sum_aggregation,
                                                                    path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                        path_argument_list=PATH_ARGUMENT_LIST)
    result_dict = {
        "min": {"tau": similarity_min_tau_value,
               "dict": similarity_min_tau_dict},
        "max": {"tau": similarity_max_tau_value,
               "dict": similarity_max_tau_dict},
        "avg": {"tau": similarity_avg_tau_value,
               "dict": similarity_avg_tau_dict},
        "sum": {"tau": similarity_sum_tau_value,
               "dict": similarity_sum_tau_dict}
    }
    similarity_methods_results[embedding_name] = result_dict
       


In [11]:
similarity_methods_results

{'glove_with_punctuation': {'min': {'tau': 0.22,
   'dict': {23: 0.9999999999999999,
    162: 1.0,
    215: 1.0,
    249: 0.33333333333333337,
    254: 0.0,
    401: -0.33333333333333337,
    435: 0.33333333333333337,
    586: 1.0,
    687: 1.0,
    947: 0.0,
    188: 0.19999999999999998,
    2522: -1.0,
    2546: 0.816496580927726,
    2617: -0.33333333333333337,
    2656: 1.0,
    3786: 0.0,
    3788: 0.6,
    3809: 0.0,
    4087: -0.2,
    4249: 1.0,
    4334: -0.3333333333333334,
    4617: 1.0,
    4604: 0.6666666666666669,
    8370: -1.0,
    8549: 1.0,
    25530: -1.0,
    26224: -0.3333333333333334,
    26860: 0.9128709291752769,
    27319: -0.5270462766947299,
    27339: -0.9128709291752769,
    28309: -1.0,
    28527: 1.0}},
  'max': {'tau': 0.39,
   'dict': {23: 0.9999999999999999,
    162: 1.0,
    215: 1.0,
    249: 0.33333333333333337,
    254: 0.0,
    401: -0.33333333333333337,
    435: 0.33333333333333337,
    586: 1.0,
    687: 1.0,
    947: 0.0,
    188: 0.19999999999

## Compute Sentiment with different methods 

In [12]:
# Load sentimens methods
neural_network = NeuralNetworkSentiment(model_path=PATH_MODEL)
sentiment_methods = {
    "neural_net": neural_network.predict_sentiment}

sentiment_methods_results = {}

for method_name in sentiment_methods.keys():
    method = sentiment_methods[method_name]
    sentiment_min_tau_value, sentiment_min_tau_dict = calculate_kendall_correlation(method,
                                                                              min_aggregation,
                                                                        path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                        path_argument_list=PATH_ARGUMENT_LIST)
    sentiment_avg_tau_value, sentiment_avg_tau_dict = calculate_kendall_correlation(method,
                                                                              avg_aggregation,
                                                                        path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                        path_argument_list=PATH_ARGUMENT_LIST)
    sentiment_max_tau_value, sentiment_max_tau_dict = calculate_kendall_correlation(method,
                                                                              max_aggregation,
                                                                        path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                        path_argument_list=PATH_ARGUMENT_LIST)
    sentiment_sum_tau_value, sentiment_sum_tau_dict = calculate_kendall_correlation(method,
                                                                              sum_aggregation,
                                                                        path_ground_truth_list=PATH_GROUND_TRUTH_LIST,
                                                                        path_argument_list=PATH_ARGUMENT_LIST)
    result_dict = {
        "min": {"tau": sentiment_min_tau_value,
               "dict": sentiment_min_tau_dict},
        "max": {"tau": sentiment_max_tau_value,
               "dict": sentiment_max_tau_dict},
        "avg": {"tau": sentiment_avg_tau_value,
               "dict": sentiment_avg_tau_dict},
        "sum": {"tau": sentiment_sum_tau_value,
               "dict": sentiment_sum_tau_dict}
    }
    sentiment_methods_results[method_name] = result_dict



downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:25<00:00, 3.26MB/s]
.vector_cache/glove.6B.zip: 862MB [06:44, 2.13MB/s]                               
100%|█████████▉| 399999/400000 [00:38<00:00, 10399.48it/s]


In [13]:
baselines = [[jaccard_tau_dict_avg, 0, 0], 
             [jaccard_tau_dict_sum, 0, 0], 
             [jaccard_tau_dict_max, 0, 0],
             [jaccard_tau_dict_min, 0, 0],
             [most_premises_tau_dict, 0, 0], #4
             [sentiment_tau_dict_avg, 0, 0],
              [sentiment_tau_dict_max, 0, 0],
              [sentiment_tau_dict_sum, 0, 0], 
             [sentiment_tau_dict_min, 0, 0],
              [sentiment_methods_results['neural_net']['min']['dict'], 0,0], #9
              [sentiment_methods_results['neural_net']['max']['dict'], 0,0],
              [sentiment_methods_results['neural_net']['avg']['dict'], 0,0],
              [sentiment_methods_results['neural_net']['sum']['dict'], 0,0],
              [similarity_methods_results['glove_with_punctuation']['min']['dict'], 0 ,0], #13
              [similarity_methods_results['glove_with_punctuation']['max']['dict'], 0 ,0],
              [similarity_methods_results['glove_with_punctuation']['avg']['dict'], 0 ,0],
              [similarity_methods_results['glove_with_punctuation']['sum']['dict'], 0 ,0],
              [similarity_methods_results['glove_without_punctuation']['min']['dict'], 0 ,0], #17
              [similarity_methods_results['glove_without_punctuation']['max']['dict'], 0 ,0],
              [similarity_methods_results['glove_without_punctuation']['avg']['dict'], 0 ,0],
              [similarity_methods_results['glove_without_punctuation']['sum']['dict'], 0 ,0],
              [similarity_methods_results['elmo_with_punctuation']['min']['dict'], 0 ,0], #21
              [similarity_methods_results['elmo_with_punctuation']['max']['dict'], 0 ,0],
              [similarity_methods_results['elmo_with_punctuation']['avg']['dict'], 0 ,0],
              [similarity_methods_results['elmo_with_punctuation']['sum']['dict'], 0 ,0],
              [similarity_methods_results['elmo_without_punctuation']['min']['dict'], 0 ,0], #25
              [similarity_methods_results['elmo_without_punctuation']['max']['dict'], 0 ,0],
              [similarity_methods_results['elmo_without_punctuation']['avg']['dict'], 0 ,0],
              [similarity_methods_results['elmo_without_punctuation']['sum']['dict'], 0 ,0],
              [similarity_methods_results['bert_with_punctuation']['min']['dict'], 0 ,0], #29
              [similarity_methods_results['bert_with_punctuation']['max']['dict'], 0 ,0],
              [similarity_methods_results['bert_with_punctuation']['avg']['dict'], 0 ,0],
              [similarity_methods_results['bert_with_punctuation']['sum']['dict'], 0 ,0],
              [similarity_methods_results['bert_without_punctuation']['min']['dict'], 0 ,0], #33
              [similarity_methods_results['bert_without_punctuation']['max']['dict'], 0 ,0],
              [similarity_methods_results['bert_without_punctuation']['avg']['dict'], 0 ,0],
              [similarity_methods_results['bert_without_punctuation']['sum']['dict'], 0 ,0],
              [frequency_tau_dict_min,0,0], #37
              [frequency_tau_dict_max,0,0],
              [frequency_tau_dict_avg, 0,0],
              [frequency_tau_dict_sum, 0,0],
              [random_tau_dict, 0,0],
              [similarity_methods_results['wordnet']['min']['dict'], 0 ,0], #41
              [similarity_methods_results['wordnet']['max']['dict'], 0 ,0],
              [similarity_methods_results['wordnet']['avg']['dict'], 0 ,0],
              [similarity_methods_results['wordnet']['sum']['dict'], 0 ,0],
              [similarity_methods_results['wordnet_average']['min']['dict'], 0 ,0], #45
              [similarity_methods_results['wordnet_average']['max']['dict'], 0 ,0],
              [similarity_methods_results['wordnet_average']['avg']['dict'], 0 ,0],
              [similarity_methods_results['wordnet_average']['sum']['dict'], 0 ,0]]

page_rank_approach = [[page_rank_alpha_results['0.0']["paperPageRank"]['min']['dict'], 0,0],
                     [page_rank_alpha_results['0.0']["paperPageRank"]['max']['dict'], 0,0],
                     [page_rank_alpha_results['0.0']["paperPageRank"]['avg']['dict'], 0,0],
                     [page_rank_alpha_results['0.0']["paperPageRank"]['sum']['dict'], 0,0]]

page_rank_approach, base_lines = compute_best_and_worst(page_rank_approach, baselines)

In [14]:
baselines

[[{23: 0.6,
   162: 0.33333333333333337,
   215: -1.0,
   249: 0.33333333333333337,
   254: 0.6666666666666669,
   401: -0.33333333333333337,
   435: 0.33333333333333337,
   586: -1.0,
   687: 0.0,
   947: -0.5270462766947298,
   188: 0.6,
   2522: 1.0,
   2546: 0.0,
   2617: -1.0,
   2656: -1.0,
   3786: -0.3333333333333334,
   3788: -0.31622776601683794,
   3809: 0.19999999999999998,
   4087: 0.3333333333333333,
   4249: 1.0,
   4334: 0.3333333333333334,
   4617: -1.0,
   4604: 0.2357022603955159,
   8370: -1.0,
   8549: -1.0,
   25530: -1.0,
   26224: 0.6666666666666669,
   26860: -0.3333333333333334,
   27319: -0.2222222222222222,
   27339: -0.18257418583505539,
   28309: 1.0,
   28527: 1.0},
  5,
  11],
 [{23: 0.6,
   162: 0.33333333333333337,
   215: -1.0,
   249: 0.33333333333333337,
   254: 0.6666666666666669,
   401: -0.33333333333333337,
   435: 0.33333333333333337,
   586: -1.0,
   687: 0.0,
   947: -0.5270462766947298,
   188: 0.6,
   2522: 1.0,
   2546: 0.0,
   2617: -1.0,

In [15]:
results = pd.DataFrame({
    'Approach':['PageRank','Frequency', 'Simialrity', 'Sentiment', 'Most premises', 'Random', 'Neural Network', 
                'Glove with punct.', 'Glove without punct.', 'ELMo with punct.', 'ElMo without punct.', 
                'BERT with punct.', 'BERT without punct.', 'Wordnet', 'Wordnet avg.'],
    'Minimum (tau, best, worst)': [(paper_page_rank_tau_value_min, page_rank_approach[0][1], page_rank_approach[0][2]),
                                   (frequency_tau_value_min, baselines[37][1], baselines[37][2]),
                                   (jaccard_tau_value_min, baselines[3][1], baselines[3][2]),
                                   (sentiment_tau_value_min,baselines[8][1], baselines[8][2]),
                                   (most_premises_tau_value, baselines[4][1], baselines[4][1]),
                                   (random_tau_value, baselines[41][1], baselines[41][2]),
                                   (sentiment_methods_results['neural_net']['min']['tau'], baselines[9][1], baselines[9][2]),
                                   (similarity_methods_results['glove_with_punctuation']['min']['tau'],
                                    baselines[13][1], baselines[13][2]),
                                   (similarity_methods_results['glove_without_punctuation']['min']['tau'],
                                    baselines[17][1], baselines[17][2]),
                                   (similarity_methods_results['elmo_with_punctuation']['min']['tau'],
                                    baselines[21][1], baselines[21][2]),
                                    (similarity_methods_results['elmo_without_punctuation']['min']['tau'],
                                    baselines[25][1], baselines[25][2]),
                                   (similarity_methods_results['bert_with_punctuation']['min']['tau'],
                                    baselines[29][1], baselines[29][2]),
                                   (similarity_methods_results['bert_without_punctuation']['min']['tau'],
                                    baselines[33][1], baselines[33][2]),
                                   (similarity_methods_results['wordnet']['min']['tau'],
                                    baselines[41][1], baselines[41][2]),
                                   (similarity_methods_results['wordnet_average']['min']['tau'],
                                    baselines[45][1], baselines[45][2])
                                   ],
    'Average (tau, best, worst)': [(paper_page_rank_tau_value_avg, page_rank_approach[2][1], page_rank_approach[2][2]),
                                   (frequency_tau_value_avg, baselines[39][1], baselines[39][2]),
                                   (jaccard_tau_value_avg, baselines[0][1], baselines[0][2]),
                                   (sentiment_tau_value_avg,baselines[5][1], baselines[5][2]),
                                   (most_premises_tau_value, baselines[4][1], baselines[4][1]),
                                   (random_tau_value, baselines[41][1], baselines[41][2]),
                                   (sentiment_methods_results['neural_net']['avg']['tau'],
                                    baselines[11][1], baselines[11][2]),
                                   (similarity_methods_results['glove_with_punctuation']['avg']['tau'],
                                    baselines[15][1], baselines[15][2]),
                                   (similarity_methods_results['glove_without_punctuation']['avg']['tau'],
                                    baselines[19][1], baselines[19][2]),
                                   (similarity_methods_results['elmo_with_punctuation']['avg']['tau'],
                                    baselines[23][1], baselines[23][2]),
                                    (similarity_methods_results['elmo_without_punctuation']['avg']['tau'],
                                    baselines[27][1], baselines[27][2]),
                                   (similarity_methods_results['bert_with_punctuation']['avg']['tau'],
                                    baselines[31][1], baselines[31][2]),
                                   (similarity_methods_results['bert_without_punctuation']['avg']['tau'],
                                    baselines[35][1], baselines[35][2]),
                                   (similarity_methods_results['wordnet']['avg']['tau'],
                                    baselines[43][1], baselines[43][2]),
                                   (similarity_methods_results['wordnet_average']['avg']['tau'],
                                    baselines[47][1], baselines[47][2])
                                   ],
    'Maximum (tau, best, worst)': [(paper_page_rank_tau_value_max, page_rank_approach[1][1], page_rank_approach[1][2]),
                                   (frequency_tau_value_max, baselines[38][1], baselines[38][2]),
                                   (jaccard_tau_value_max, baselines[2][1], baselines[2][2]),
                                   (sentiment_tau_value_max,baselines[6][1], baselines[6][2]),
                                   (most_premises_tau_value, baselines[4][1], baselines[4][1]),
                                   (random_tau_value, baselines[41][1], baselines[41][2]),
                                   (sentiment_methods_results['neural_net']['max']['tau'],
                                    baselines[10][1], baselines[10][2]),
                                   (similarity_methods_results['glove_with_punctuation']['max']['tau'],
                                    baselines[14][1], baselines[14][2]),
                                   (similarity_methods_results['glove_without_punctuation']['max']['tau'],
                                    baselines[18][1], baselines[18][2]),
                                   (similarity_methods_results['elmo_with_punctuation']['max']['tau'],
                                    baselines[22][1], baselines[22][2]),
                                    (similarity_methods_results['elmo_without_punctuation']['max']['tau'],
                                    baselines[26][1], baselines[26][2]),
                                   (similarity_methods_results['bert_with_punctuation']['max']['tau'],
                                    baselines[30][1], baselines[30][2]),
                                   (similarity_methods_results['bert_without_punctuation']['max']['tau'],
                                    baselines[34][1], baselines[34][2]),
                                   (similarity_methods_results['wordnet']['max']['tau'],
                                    baselines[42][1], baselines[42][2]),
                                   (similarity_methods_results['wordnet_average']['max']['tau'],
                                    baselines[46][1], baselines[46][2])
                                   ],
    'Sum (tau, best, worst)': [(paper_page_rank_tau_value_sum, page_rank_approach[3][1], page_rank_approach[3][2]),
                                   (frequency_tau_value_sum, baselines[40][1], baselines[40][2]),
                                   (jaccard_tau_value_sum, baselines[1][1], baselines[1][2]),
                                   (sentiment_tau_value_sum,baselines[7][1], baselines[7][2]),
                                   (most_premises_tau_value, baselines[4][1], baselines[4][1]),
                                   (random_tau_value, baselines[41][1], baselines[41][2]),
                                   (sentiment_methods_results['neural_net']['sum']['tau'], 
                                    baselines[12][1], baselines[12][2]),
                                   (similarity_methods_results['glove_with_punctuation']['sum']['tau'],
                                    baselines[16][1], baselines[16][2]),
                                   (similarity_methods_results['glove_without_punctuation']['sum']['tau'],
                                    baselines[20][1], baselines[20][2]),
                                   (similarity_methods_results['elmo_with_punctuation']['sum']['tau'],
                                    baselines[24][1], baselines[24][2]),
                                    (similarity_methods_results['elmo_without_punctuation']['sum']['tau'],
                                    baselines[28][1], baselines[28][2]),
                                   (similarity_methods_results['bert_with_punctuation']['sum']['tau'],
                                    baselines[32][1], baselines[32][2]),
                                   (similarity_methods_results['bert_without_punctuation']['sum']['tau'],
                                    baselines[36][1], baselines[36][2]),
                                   (similarity_methods_results['wordnet']['sum']['tau'],
                                    baselines[44][1], baselines[44][2]),
                                   (similarity_methods_results['wordnet_average']['sum']['tau'],
                                    baselines[48][1], baselines[48][2])
                                   ],
})


In [16]:
results


Unnamed: 0,Approach,"Minimum (tau, best, worst)","Average (tau, best, worst)","Maximum (tau, best, worst)","Sum (tau, best, worst)"
0,PageRank,"(0.01, 8, 6)","(0.02, 9, 7)","(0.11, 8, 6)","(0.28, 11, 5)"
1,Frequency,"(-0.1, 2, 8)","(-0.03, 3, 9)","(-0.01, 2, 8)","(0.1, 6, 8)"
2,Simialrity,"(-0.13, 4, 11)","(-0.05, 5, 11)","(0.01, 6, 10)","(0.02, 6, 10)"
3,Sentiment,"(0.01, 6, 7)","(0.11, 9, 4)","(0.12, 6, 4)","(0.12, 9, 4)"
4,Most premises,"(0.19, 3, 3)","(0.19, 3, 3)","(0.19, 3, 3)","(0.19, 3, 3)"
5,Random,"(0.0, 5, 7)","(0.0, 5, 7)","(0.0, 5, 7)","(0.0, 5, 7)"
6,Neural Network,"(0.12, 10, 6)","(0.24, 11, 5)","(0.31, 12, 5)","(0.3, 13, 5)"
7,Glove with punct.,"(0.22, 12, 5)","(0.28, 13, 3)","(0.39, 14, 2)","(0.47, 16, 1)"
8,Glove without punct.,"(-0.06, 5, 9)","(0.0, 6, 7)","(0.14, 8, 6)","(0.2, 8, 4)"
9,ELMo with punct.,"(0.03, 6, 9)","(0.08, 7, 8)","(0.11, 8, 8)","(0.28, 9, 5)"


In [17]:
results.to_latex()

'\\begin{tabular}{llllll}\n\\toprule\n{} &              Approach & Minimum (tau, best, worst) & Average (tau, best, worst) & Maximum (tau, best, worst) & Sum (tau, best, worst) \\\\\n\\midrule\n0  &              PageRank &               (0.01, 8, 6) &               (0.02, 9, 7) &               (0.11, 8, 6) &          (0.28, 11, 5) \\\\\n1  &             Frequency &               (-0.1, 2, 8) &              (-0.03, 3, 9) &              (-0.01, 2, 8) &            (0.1, 6, 8) \\\\\n2  &            Simialrity &             (-0.13, 4, 11) &             (-0.05, 5, 11) &              (0.01, 6, 10) &          (0.02, 6, 10) \\\\\n3  &             Sentiment &               (0.01, 6, 7) &               (0.11, 9, 4) &               (0.12, 6, 4) &           (0.12, 9, 4) \\\\\n4  &         Most premises &               (0.19, 3, 3) &               (0.19, 3, 3) &               (0.19, 3, 3) &           (0.19, 3, 3) \\\\\n5  &                Random &                (0.0, 5, 7) &                (0.0, 5,

In [18]:
print("PageRank")
print("Minimum Best: ", page_rank_approach[0][1], "Worst: ", page_rank_approach[0][2])
print("Average Best: ", page_rank_approach[2][1], "Worst: ", page_rank_approach[2][2])
print("Maximum Best: ", page_rank_approach[1][1], "Worst: ", page_rank_approach[1][2])
print("Sum Best: ", page_rank_approach[3][1], "Worst: ", page_rank_approach[3][2])
print("\n")

print("Frequency")
print("Minimum Best: ", baselines[8][1], "Worst: ", baselines[8][2])
print("Average Best: ", baselines[10][1], "Worst: ", baselines[10][2])
print("Maximum Best: ", baselines[9][1], "Worst: ", baselines[9][2])
print("Sum Best: ", baselines[11][1], "Worst: ", baselines[11][2])
print("\n")

print("Similarity")
print("Minimum Best: ", baselines[4][1], "Worst: ", baselines[4][2])
print("Average Best: ", baselines[6][1], "Worst: ", baselines[6][2])
print("Maximum Best: ", baselines[5][1], "Worst: ", baselines[5][2])
print("Sum Best: ", baselines[7][1], "Worst: ", baselines[7][2])
print("\n")

print("Sentiment")
print("Minimum Best: ", baselines[0][1], "Worst: ", baselines[0][2])
print("Average Best: ", baselines[2][1], "Worst: ", baselines[2][2])
print("Maximum Best: ", baselines[1][1], "Worst: ", baselines[1][2])
print("Sum Best: ", baselines[3][1], "Worst: ", baselines[3][2])
print("\n")

print("Most premises")
print("Minimum Best: ", baselines[13][1], "Worst: ", baselines[13][2])
print("\n")

print("Random")
print("Minimum Best: ", baselines[12][1], "Worst: ", baselines[12][2])

PageRank
Minimum Best:  8 Worst:  6
Average Best:  9 Worst:  7
Maximum Best:  8 Worst:  6
Sum Best:  11 Worst:  5


Frequency
Minimum Best:  6 Worst:  7
Average Best:  12 Worst:  5
Maximum Best:  10 Worst:  6
Sum Best:  11 Worst:  5


Similarity
Minimum Best:  3 Worst:  1
Average Best:  6 Worst:  4
Maximum Best:  9 Worst:  4
Sum Best:  9 Worst:  4


Sentiment
Minimum Best:  5 Worst:  11
Average Best:  6 Worst:  10
Maximum Best:  6 Worst:  10
Sum Best:  4 Worst:  11


Most premises
Minimum Best:  12 Worst:  5


Random
Minimum Best:  13 Worst:  5


In [19]:
for row in results.iterrows():
    number = row[0]
    row = row[1]
    result_line = "\small " + str(number + 1) + " " + row['Approach'] + " "
    scores = []
    bests = []
    worsts = []
    for srow in row[1:]:
        score = srow[0]
        scores += [float(score)]
        best = srow[1]
        bests += [int(best)]
        worst = srow[2]
        worsts += [int(worst)]
        result_line += "& {:.2f} & {} & {} & ".format(score, best, worst)
    result_line += "& {:.2f} & {} & {} \\\\".format(max(scores), max(bests), min(worsts))
    print(result_line)


\small 1 PageRank & 0.01 & 8 & 6 & & 0.02 & 9 & 7 & & 0.11 & 8 & 6 & & 0.28 & 11 & 5 & & 0.28 & 11 & 5 \\
\small 2 Frequency & -0.10 & 2 & 8 & & -0.03 & 3 & 9 & & -0.01 & 2 & 8 & & 0.10 & 6 & 8 & & 0.10 & 6 & 8 \\
\small 3 Simialrity & -0.13 & 4 & 11 & & -0.05 & 5 & 11 & & 0.01 & 6 & 10 & & 0.02 & 6 & 10 & & 0.02 & 6 & 10 \\
\small 4 Sentiment & 0.01 & 6 & 7 & & 0.11 & 9 & 4 & & 0.12 & 6 & 4 & & 0.12 & 9 & 4 & & 0.12 & 9 & 4 \\
\small 5 Most premises & 0.19 & 3 & 3 & & 0.19 & 3 & 3 & & 0.19 & 3 & 3 & & 0.19 & 3 & 3 & & 0.19 & 3 & 3 \\
\small 6 Random & 0.00 & 5 & 7 & & 0.00 & 5 & 7 & & 0.00 & 5 & 7 & & 0.00 & 5 & 7 & & 0.00 & 5 & 7 \\
\small 7 Neural Network & 0.12 & 10 & 6 & & 0.24 & 11 & 5 & & 0.31 & 12 & 5 & & 0.30 & 13 & 5 & & 0.31 & 13 & 5 \\
\small 8 Glove with punct. & 0.22 & 12 & 5 & & 0.28 & 13 & 3 & & 0.39 & 14 & 2 & & 0.47 & 16 & 1 & & 0.47 & 16 & 1 \\
\small 9 Glove without punct. & -0.06 & 5 & 9 & & 0.00 & 6 & 7 & & 0.14 & 8 & 6 & & 0.20 & 8 & 4 & & 0.20 & 8 & 4 \\
\small 