### Imports relevantes

In [3]:
import json
import pandas as pd
import numpy as np

### Inputs od usuario

In [4]:
models = ["tfidf", "cbow", "doc2vec"]

### Define função para carregar dados

In [5]:
def load_summary_result(model_name, cluster=True, pagerank=True, rouge=True):
    
    if cluster:
        with open(f"outputs/summary_{model_name}_cl.json", "r") as file:
            summary_cl = json.load(file)

    if pagerank:
        with open(f"outputs/summary_{model_name}_pr.json", "r") as file:
            summary_pr = json.load(file)
    
    if rouge:
        with open(f"outputs/rouge_{model_name}.json", "r") as file:
            rouge_results = json.load(file)
            
    return set(summary_cl.keys()), summary_cl, summary_pr, rouge_results

def load_data():
     
    with open('storage/all_sents.txt', 'r', encoding='utf8') as file:
        all_sents = [sent.split(" ") for sent in file.read().split("\n")]
    
    with open("storage/sents_reference.json", "r") as file:
        sents_reference = json.load(file)
        
    with open("storage/orig_sents.json", "r") as file:
        orig_sents = json.load(file)    

    with open("storage/highlights.json", "r") as file:
        highlights = json.load(file)
        
    return all_sents, sents_reference, orig_sents, highlights

### Coleta os dados

In [6]:
available_ids = set()
summary_cls = {}
summary_prs = {}
rouge_results = {}
for model in models:

    available_id, summary_cl, summary_pr, rouge_result = load_summary_result(model)
    available_ids.update(available_id)
    summary_cls[model] = summary_cl
    summary_prs[model] = summary_pr
    rouge_results[model] = rouge_result
    
available_id = list(available_id)

In [7]:
all_sents, sents_reference, orig_text, highlights = load_data()

### Olha os textos

In [8]:
text_id = available_id[np.random.randint(0, len(available_id))]

In [11]:
print("Highlight:")
print(highlights[text_id])
print()
print()

for model in models:
    
    print(f"{model}, Cluster, precision:", round(rouge_results[model][text_id]["Cluster"]["rouge-1"]['p'], 3))
    print(summary_cls[model][text_id])
    print()
    print(f"{model}, PageRank, precision:", round(rouge_results[model][text_id]["PageRank"]["rouge-1"]['p'], 3))
    print(summary_prs[model][text_id])
    print()
    print()

Highlight:
 . The G20 is being held in St Petersburg , but focus will be on tensions between Vladimir Putin and Barack Obama . Discussions are meant to be around economics , but will be overshadowed by Syria . Emerging markets have been suffering as demand for commodities drop , and currencies have collapsed . John Defterios writes the G20 has become an unwieldy group of countries with different priorities


tfidf, Cluster, precision: 0.171
['According to Brown , who I interviewed earlier this year , America `` should actually be more alert to the possibilities of international cooperation in both trade and agreements for growth', 'Since Federal Reserve Chairman Ben Bernanke uttered word of that change in strategy back in May , money has been flooding out of emerging markets', 'The tendency is for investors to make comparisons to the 1998/1999 Asian financial crisis , when those economies had wide current account deficits and a mountain of foreign currency government debt']

tfidf, Pag