# Visualiador

Esse notebook tem como objetivo mostrar alguns exemplos de textos prviamente sumarizados no notebook Sumarizador.

### Imports relevantes

In [2]:
import json
import pandas as pd
import numpy as np

### Inputs do usuario

In [3]:
models = ["tfidf", "cbow", "doc2vec"]

### Define função para carregar dados

In [4]:
def load_summary_result(model_name, cluster=True, pagerank=True, rouge=True):
    
    if cluster:
        with open(f"outputs/summary_{model_name}_cl.json", "r") as file:
            summary_cl = json.load(file)

    if pagerank:
        with open(f"outputs/summary_{model_name}_pr.json", "r") as file:
            summary_pr = json.load(file)
    
    if rouge:
        with open(f"outputs/rouge_{model_name}.json", "r") as file:
            rouge_results = json.load(file)
            
    return set(summary_cl.keys()), summary_cl, summary_pr, rouge_results

def load_data():
     
    with open('storage/all_sents.txt', 'r', encoding='utf8') as file:
        all_sents = [sent.split(" ") for sent in file.read().split("\n")]
    
    with open("storage/sents_reference.json", "r") as file:
        sents_reference = json.load(file)
        
    with open("storage/orig_sents.json", "r") as file:
        orig_sents = json.load(file)    

    with open("storage/highlights.json", "r") as file:
        highlights = json.load(file)
        
    return all_sents, sents_reference, orig_sents, highlights

### Coleta os dados

In [5]:
available_ids = set()
summary_cls = {}
summary_prs = {}
rouge_results = {}
for model in models:

    available_id, summary_cl, summary_pr, rouge_result = load_summary_result(model)
    available_ids.update(available_id)
    summary_cls[model] = summary_cl
    summary_prs[model] = summary_pr
    rouge_results[model] = rouge_result
    
available_id = list(available_id)

In [6]:
all_sents, sents_reference, orig_text, highlights = load_data()

### Olha os textos e compara com o Highlight

In [8]:
text_id = available_id[np.random.randint(0, len(available_id))]

print("Highlight:")
print(highlights[text_id])
print()
print()

for model in models:
    
    print(f"{model}, Cluster, precision:", round(rouge_results[model][text_id]["Cluster"]["rouge-1"]['p'], 3))
    print(summary_cls[model][text_id])
    print()
    print(f"{model}, PageRank, precision:", round(rouge_results[model][text_id]["PageRank"]["rouge-1"]['p'], 3))
    print(summary_prs[model][text_id])
    print()
    print()

Highlight:
 . Spread of the virus is entering an `` acceleration period , '' official says . Countries may see a doubling of cases every three to four days , he says . More than 1,490 people have died from the virus since last spring . H1N1 is the first influenza pandemic in more than 40 years


tfidf, Cluster, precision: 0.167
["BEIJING , China -LRB- CNN -RRB- -- The world will soon see an `` explosion '' of swine flu cases as the H1N1 virus spreads rapidly around the world , a top World Health Organization official said Friday", "'' Any widespread resistance to antiviral drugs , expected to be available this fall , could make the situation worse , he said", 'The public needs to comply with these health messages , and everyone needs to be able to recognize symptoms early and get timely medical care , he said']

tfidf, PageRank, precision: 0.422
['`` At a certain point , there will seem to be an explosion in case numbers', 'I believe it is very likely that all countries will see commun