In [1]:
# import pandas as pd
import csv
import numpy as np
import os
from os.path import join as pjoin
from glob import iglob

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
#!/usr/bin/env python
from __future__ import division

import argparse
import glob
import os
import random
import signal
import time

import torch

import distributed
from models import data_loader, model_builder
from models.data_loader import load_dataset
from models.model_builder import ExtSummarizer
from models.trainer_ext import build_trainer
from others.logging import logger, init_logger

model_flags = ['hidden_size', 'ff_size', 'heads', 'inter_layers', 'encoder', 'ff_actv', 'use_interval', 'rnn_size']


In [6]:
root_path = '/data/ksb/'
bert_root_path = pjoin(root_path, 'BertSum/PreSumm')
bert_model_dir = pjoin(bert_root_path, 'models')

data_dir = pjoin(root_path, 'cnn-dailymail/finished_files')

#### Loss function 비교  

*Trained Model parameter 필요*


In [34]:
def get_cos_similarity(inputs, summaries):
    tfidf_vectorizer = TfidfVectorizer()

    cos_similarity_list = []
    for input_, summary_ in zip(inputs, summaries):
        try:
            tfidf_matrix = tfidf_vectorizer.fit_transform([input_, summary_])

            similarity = cosine_similarity(tfidf_matrix[0] , tfidf_matrix[1])[0][0]
        except ValueError:
            similarity = 0.0
            
        cos_similarity_list.append(similarity)

    return cos_similarity_list

In [35]:
import jsonlines
import json

with open(pjoin(root_path, 'three-sample.jsonl'),'r',encoding='utf-8') as f:
    data = json.load(f)
 
    article = data["article"]
    candidate = data["candidates"]        
    abstract = data["abstract"]

In [44]:
article

["club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was not through prodigious scoring .",
 'the venezuelan icon arango sank his teeth into the shoulder of jesus zavela as his temper flared in the defeat .',
 'he was not booked by the referee but could face a heavy retrospective ban .',
 'juan arango ( left ) bites the shoulder of opponent jesus zavela in a moment of madness',
 "zavala holds his shoulder after being bitten by arango , in the game zavala 's side won 4-3 in mexico",
 'zavala shows the referee the mark on his shoulder after being bittern by arango',
 'arango ( right ) earlier scored a magnificent free kick to bring his club tijuana team level against monterrey',
 'arango had earlier curled in a magnificent free kick for his team to bring them level after falling 2-0 down early on in the encounter .',
 'but the 34-year-old overshadowed his goal with the bite as television cameras picked up the 

### Origin candidate set

In [36]:
candidate

[[["club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was not through prodigious scoring .",
   'he was not booked by the referee but could face a heavy retrospective ban .',
   'juan arango ( left ) bites the shoulder of opponent jesus zavela in a moment of madness'],
  0.40032206119162644],
 [["juan arango bites jesus zavela in a moment of madness in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
   'the venezuelan icon sank his teeth into the shoulder of the opponent as his temper flared in the defeat .',
   'he was not booked by the referee but could face a heavy retrospective ban .',
   'arango had earlier curled in a magnificent free kick for his team to bring them level after falling 2-0 down .'],
  0.40383502768823876],
 [["juan arango bites jesus zavela in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
   'the venezuelan icon sank his teeth into the shoulder of

In [37]:
doc_sim_list = [round(np.mean(get_cos_similarity(article, cand[0])),3) for cand in candidate]
ref_sim_list = [round(np.mean(get_cos_similarity(abstract, cand[0])),3) for cand in candidate]

print("Cosine similarity between document and summaries : {}".format(doc_sim_list))
print("Cosine similarity between reference and summaries : {}".format(ref_sim_list))

Cosine similarity between document and summaries : [0.375, 0.593, 0.711]
Cosine similarity between reference and summaries : [0.171, 0.11, 0.105]


In [38]:
from rouge import Rouge 
rouge = Rouge()

In [39]:
doc_rouge_list = [round(rouge.get_scores('\n'.join(cand[0]), '\n'.join(article))[0]['rouge-l']['f'],3) for cand in candidate]
ref_rouge_list = [round(rouge.get_scores('\n'.join(cand[0]), '\n'.join(abstract))[0]['rouge-l']['f'],3) for cand in candidate]
rouge_list = [round(cand[1],3) for cand in candidate]

print("Rouge score between document and summaries : {}".format(doc_rouge_list))
print("Rouge score between reference and summaries : {}".format(ref_rouge_list))
print("Rouge score between reference and summaries(written) : {}".format(rouge_list))


Rouge score between document and summaries : [0.571, 0.618, 0.453]
Rouge score between reference and summaries : [0.447, 0.512, 0.478]
Rouge score between reference and summaries(written) : [0.4, 0.404, 0.387]


In [40]:
from transformers import BertTokenizer

def bert_encode(x, max_len=-1):
    tok = BertTokenizer.from_pretrained('bert-base-uncased', verbose=False)
    cls_token_id = tok.cls_token_id
    sep_token_id = tok.sep_token_id

    _ids = tok.encode(x, add_special_tokens=False)
    ids = [cls_token_id] # [CLS]
    if max_len > 0:
        ids.extend(_ids[:max_len - 2])
    else:
        ids.extend(_ids[:512 - 2])
    ids.append(sep_token_id) # [SEP], meaning end of sentence
    return ids

In [41]:
def bert_decode(x):
    tok = BertTokenizer.from_pretrained('bert-base-uncased', verbose=False)

    result = tok.decode(x, skip_special_tokens=True)
    return result

In [51]:
n_gram = 3
scores = []
copy_summaries = [cand[0] for cand in candidate]

for i in range(len(copy_summaries)): # candidate summary set 개수
    for j in range(len(copy_summaries[i])): # sentence 개수
        ext_tok = copy_summaries[i][j].split()
        window = len(ext_tok) // n_gram
        
        sent_id = sum([len(prev) for prev in copy_summaries[:i]]) + j
        for w in range(window):
            s = rouge.get_scores(' '.join(ext_tok[w:w+n_gram]), '\n'.join(abstract))[0]['rouge-l']['f']

            score = (sent_id, w, ' '.join(ext_tok[w:w+n_gram]), s)
            scores.append(score)
dscored = sorted(scores,key=lambda x: x[-1], reverse=True)
dscored

[(0, 9, 'in his team', 0.19354838534859523),
 (3, 3, 'jesus zavela in', 0.19354838534859523),
 (3, 4, 'zavela in a', 0.19354838534859523),
 (6, 0, 'arango had earlier', 0.19354838534859523),
 (7, 3, 'jesus zavela in', 0.19354838534859523),
 (0, 2, 'star juan arango', 0.12903225631633716),
 (0, 3, 'juan arango conjured', 0.12903225631633716),
 (0, 8, 'suarez in his', 0.12903225631633716),
 (1, 4, 'by the referee', 0.12903225631633716),
 (2, 0, 'juan arango (', 0.12903225631633716),
 (3, 0, 'juan arango bites', 0.12903225631633716),
 (3, 1, 'arango bites jesus', 0.12903225631633716),
 (3, 2, 'bites jesus zavela', 0.12903225631633716),
 (3, 5, 'in a moment', 0.12903225631633716),
 (5, 4, 'by the referee', 0.12903225631633716),
 (6, 1, 'had earlier curled', 0.12903225631633716),
 (6, 2, 'earlier curled in', 0.12903225631633716),
 (6, 3, 'curled in a', 0.12903225631633716),
 (6, 4, 'in a magnificent', 0.12903225631633716),
 (6, 5, 'a magnificent free', 0.12903225631633716),
 (6, 6, 'magnifi

In [52]:
dscored = [sc for sc in dscored if sc[-1]==0.0]
dscored

[(0, 0, 'club tijuana star', 0.0),
 (0, 5, 'conjured memories luis', 0.0),
 (0, 6, 'memories luis suarez', 0.0),
 (1, 1, 'was not booked', 0.0),
 (1, 2, 'not booked by', 0.0),
 (2, 2, '( left )', 0.0),
 (2, 3, 'left ) bites', 0.0),
 (4, 1, 'venezuelan icon sank', 0.0),
 (5, 1, 'was not booked', 0.0),
 (5, 2, 'not booked by', 0.0),
 (8, 1, 'venezuelan icon sank', 0.0),
 (9, 1, 'was not booked', 0.0),
 (9, 2, 'not booked by', 0.0)]

In [53]:
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig

model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

In [54]:
encoded_cand_set = [[bert_encode(s, 180) for s in cs] for cs in copy_summaries]

In [107]:
def rewrite(cand_set, sent_id, window_id, tok, model, n_gram='trigram', tok_max_length=512, max_length=5):
        
    if n_gram =='trigram':
        n_gram = 3
    elif n_gram =='4-gram':
        n_gram = 4
    elif n_gram == '5-gram':
        n_gram = 5
    else:
        n_gram = None
    
    def _map_sent_id(cand_set):
        mapping_ = []
        for i, cand in enumerate(cand_set) :
            summ_ = []
            for j, s in enumerate(cand):
                idx = sum([len(prev) for prev in cand_set[:i]]) + j
                summ_.append((idx, s))
            
            mapping_.append(summ_)
            
        return mapping_
    
    def _get_sent_fromId(cand_set, idx):
        dscored = [(i, j, sent[1]) for i, cand in enumerate(cand_set) for j, sent in enumerate(cand) if sent[0]==idx]
        return dscored[0]
    
    mapped_cand_set = _map_sent_id(cand_set)
    set_idx, sent_idx, sent = _get_sent_fromId(mapped_cand_set, sent_id)
    
    sent_tok = sent.split()
    n_gram = n_gram if n_gram is not None else len(sent_tok)
    
    origin_phase_txt = ' '.join(sent_tok[window_id : window_id + n_gram])    
    print(origin_phase_txt)
    
    bart_tokenized = tok([origin_phase_txt], max_length=tok_max_length, return_tensors='pt')
    bart_summarized = model.generate(bart_tokenized['input_ids'], num_beams=4, max_length=max_length, early_stopping=True)
    result = [tok.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in bart_summarized]
    
    print(result)
        
    sent_tok[window_id:window_id + n_gram] = result
    cand_set[set_idx][sent_idx] = ' '.join(sent_tok)
    
    return cand_set

In [108]:
dscored

[(0, 0, 'club tijuana star', 0.0),
 (0, 5, 'conjured memories luis', 0.0),
 (0, 6, 'memories luis suarez', 0.0),
 (1, 1, 'was not booked', 0.0),
 (1, 2, 'not booked by', 0.0),
 (2, 2, '( left )', 0.0),
 (2, 3, 'left ) bites', 0.0),
 (4, 1, 'venezuelan icon sank', 0.0),
 (5, 1, 'was not booked', 0.0),
 (5, 2, 'not booked by', 0.0),
 (8, 1, 'venezuelan icon sank', 0.0),
 (9, 1, 'was not booked', 0.0),
 (9, 2, 'not booked by', 0.0)]

In [109]:
import copy

rewrited_summaries = copy.deepcopy(copy_summaries)

for sent_id, w, ph, _ in dscored:
    
    mapped_cand_set = map_sent_id(encoded_cand_set)
    tmp = [(i, j, sent[1]) for i, cand in enumerate(mapped_cand_set) 
                       for j, sent in enumerate(cand) if sent[0]==sent_id]
    _, _, sent = tmp[0]
    print(len(sent))
    rewrited_summaries = rewrite(cand_set=rewrited_summaries, sent_id=sent_id, \
                                 window_id=window_id, tok=tokenizer, model=model, n_gram='trigram',
                                tok_max_length=512, max_length=7)

42
club tijuana star
['club tijuana star']
42
club tijuana star
['club tijuana star']
42
club tijuana star
['club tijuana star']
17
he was not
['he was not']
17
he was not
['he was not']
22
juan arango (
['juan arango']
22
juan arango left
['juan arango']
22
the venezuelan icon
['the venezuelan']
17
he was not
['he was not']
17
he was not
['he was not']
22
the venezuelan icon
['the venezuelan']
17
he was not
['he was not']
17
he was not
['he was not']


In [110]:
rewrited_summaries

[["club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was not through prodigious scoring .",
  'he was not booked by the referee but could face a heavy retrospective ban .',
  'juan arango ) bites the shoulder of opponent jesus zavela in a moment of madness'],
 ["juan arango bites jesus zavela in a moment of madness in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
  'the venezuelan sank his teeth into the shoulder of the opponent as his temper flared in the defeat .',
  'he was not booked by the referee but could face a heavy retrospective ban .',
  'arango had earlier curled in a magnificent free kick for his team to bring them level after falling 2-0 down .'],
 ["juan arango bites jesus zavela in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
  'the venezuelan sank his teeth into the shoulder of jesus zavala in a moment of madness .',
  'he was not booked by the refer

### cosine similarity 하락

In [111]:
doc_sim_list = [round(np.mean(get_cos_similarity(article, cand)),3) for cand in rewrited_summaries]
ref_sim_list = [round(np.mean(get_cos_similarity(abstract, cand)),3) for cand in rewrited_summaries]

print("Cosine similarity between document and summaries : {}".format(doc_sim_list))
print("Cosine similarity between reference and summaries : {}".format(ref_sim_list))

Cosine similarity between document and summaries : [0.375, 0.586, 0.7]
Cosine similarity between reference and summaries : [0.172, 0.111, 0.106]


### Reference Summary와의 ROUGE score 향상

In [112]:
rouge = Rouge()

doc_rouge_list = [round(rouge.get_scores('\n'.join(cand), '\n'.join(article))[0]['rouge-l']['f'],3) for cand in rewrited_summaries]
ref_rouge_list = [round(rouge.get_scores('\n'.join(cand), '\n'.join(abstract))[0]['rouge-l']['f'],3) for cand in rewrited_summaries]
rouge_list = [round(cand[1],3) for cand in candidate]

print("Rouge score between document and summaries : {}".format(doc_rouge_list))
print("Rouge score between reference and summaries : {}".format(ref_rouge_list))
print("Rouge score between reference and summaries(written) : {}".format(rouge_list))


Rouge score between document and summaries : [0.554, 0.61, 0.443]
Rouge score between reference and summaries : [0.459, 0.518, 0.485]
Rouge score between reference and summaries(written) : [0.4, 0.404, 0.387]


## 문장 단위 Rewrite

In [113]:
def _map_sent_id(cand_set):
    mapping_ = []
    for i, cand in enumerate(cand_set) :
        summ_ = []
        for j, s in enumerate(cand):
            idx = sum([len(prev) for prev in cand_set[:i]]) + j
            summ_.append((idx, s))
            
        mapping_.append(summ_)
            
    return mapping_
    
def _get_sent_fromId(cand_set, idx):
    dscored = [(i, j, sent[1]) for i, cand in enumerate(cand_set) for j, sent in enumerate(cand) if sent[0]==idx]
    return dscored[0]

In [114]:
rewrited_summaries

[["club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was not through prodigious scoring .",
  'he was not booked by the referee but could face a heavy retrospective ban .',
  'juan arango ) bites the shoulder of opponent jesus zavela in a moment of madness'],
 ["juan arango bites jesus zavela in a moment of madness in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
  'the venezuelan sank his teeth into the shoulder of the opponent as his temper flared in the defeat .',
  'he was not booked by the referee but could face a heavy retrospective ban .',
  'arango had earlier curled in a magnificent free kick for his team to bring them level after falling 2-0 down .'],
 ["juan arango bites jesus zavela in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
  'the venezuelan sank his teeth into the shoulder of jesus zavala in a moment of madness .',
  'he was not booked by the refer

In [115]:
def map_sent_id(cand_set):
    mapping_ = []
    for i, cand in enumerate(cand_set) :
        summ_ = []
        for j, s in enumerate(cand):
            idx = sum([len(prev) for prev in cand_set[:i]]) + j
            summ_.append((idx, s))
            
        mapping_.append(summ_)
            
    return mapping_

In [116]:
import copy

rewrited_summaries = copy.deepcopy(copy_summaries)

for sent_id, w, ph, s in dscored:
    window_id = 0
    
    mapped_cand_set = map_sent_id(encoded_cand_set)
    tmp = [(i, j, sent[1]) for i, cand in enumerate(mapped_cand_set) 
                       for j, sent in enumerate(cand) if sent[0]==sent_id]
    _, _, sent = tmp[0]
    print(len(sent))

    rewrited_summaries = rewrite(cand_set=rewrited_summaries, sent_id=sent_id, \
                                 window_id=window_id, tok=tokenizer, model=model, n_gram=None,
                                tok_max_length=512, max_length=len(sent))
rewrited_summaries

42
club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was not through prodigious scoring .
["Club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was"]
42
Club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was
["Club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league. Club tijuana"]
42
Club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league. Club tijuana
["Club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league. Club tijuana"]
17
he was not booked by the referee but could face a heavy retrospective ban .
['he was not booked by the referee but could face a heavy re

[["Club tijuana star juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league. Club tijuana",
  'he was not booked by the referee but could face a heavy retrospective ban',
  'juan arango bites the shoulder of opponent jesus zavela in a moment'],
 ["juan arango bites jesus zavela in a moment of madness in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
  'the venezuelan icon sank his teeth into the shoulder of the opponent as his temper flared in',
  'he was not booked by the referee but could face a heavy retrospective ban',
  'arango had earlier curled in a magnificent free kick for his team to bring them level after falling 2-0 down .'],
 ["juan arango bites jesus zavela in club tijuana 's 4-3 defeat by monterrey in the mexican league .",
  'the venezuelan icon sank his teeth into the shoulder of jesus zavala in',
  'he was not booked by the referee but could face a heavy retrospective ban']]

### cosine similarity 일부 하락 & 향상

In [117]:
doc_sim_list = [round(np.mean(get_cos_similarity(article, cand)),3) for cand in rewrited_summaries]
ref_sim_list = [round(np.mean(get_cos_similarity(abstract, cand)),3) for cand in rewrited_summaries]

print("Cosine similarity between document and summaries : {}".format(doc_sim_list))
print("Cosine similarity between reference and summaries : {}".format(ref_sim_list))

Cosine similarity between document and summaries : [0.294, 0.586, 0.739]
Cosine similarity between reference and summaries : [0.179, 0.102, 0.11]


### ROUGE score 일부 하락 & 향상

In [118]:
rouge = Rouge()

doc_rouge_list = [round(rouge.get_scores('\n'.join(cand), '\n'.join(article))[0]['rouge-l']['f'],3) for cand in rewrited_summaries]
ref_rouge_list = [round(rouge.get_scores('\n'.join(cand), '\n'.join(abstract))[0]['rouge-l']['f'],3) for cand in rewrited_summaries]
rouge_list = [round(cand[1],3) for cand in candidate]

print("Rouge score between document and summaries : {}".format(doc_rouge_list))
print("Rouge score between reference and summaries : {}".format(ref_rouge_list))
print("Rouge score between reference and summaries(written) : {}".format(rouge_list))


Rouge score between document and summaries : [0.478, 0.629, 0.446]
Rouge score between reference and summaries : [0.478, 0.512, 0.462]
Rouge score between reference and summaries(written) : [0.4, 0.404, 0.387]


## Phase Masking & Rewriting

In [119]:
def mask_rewrite(cand_set, sent_id, window_id, tok, model, n_gram='trigram', tok_max_length=512, max_length=5):
        
    if n_gram =='trigram':
        n_gram = 3
    elif n_gram =='4-gram':
        n_gram = 4
    elif n_gram == '5-gram':
        n_gram = 5
    else:
        n_gram = None
    
    def _map_sent_id(cand_set):
        mapping_ = []
        for i, cand in enumerate(cand_set) :
            summ_ = []
            for j, s in enumerate(cand):
                idx = sum([len(prev) for prev in cand_set[:i]]) + j
                summ_.append((idx, s))
            
            mapping_.append(summ_)
            
        return mapping_
    
    def _get_sent_fromId(cand_set, idx):
        dscored = [(i, j, sent[1]) for i, cand in enumerate(cand_set) for j, sent in enumerate(cand) if sent[0]==idx]
        return dscored[0]
    
    mapped_cand_set = _map_sent_id(cand_set)
    set_idx, sent_idx, sent = _get_sent_fromId(mapped_cand_set, sent_id)
    
    sent_tok = sent.split()
    sent_tok[window_id:window_id + n_gram] = ['<mask>']
    n_gram = n_gram if n_gram is not None else len(sent_tok)
    
    origin_phase_txt = sent    
    print(' '.join(sent_tok))
    
    bart_tokenized = tok(origin_phase_txt, return_tensors='pt')
    bart_summarized = model.generate(bart_tokenized['input_ids'], max_length=max_length)
    result = tok.batch_decode(bart_summarized, skip_special_tokens=True)
    
    print(result)
        
    sent_tok[window_id:window_id + n_gram] = result
    cand_set[set_idx][sent_idx] = result[0]
    
    return cand_set

In [120]:
import copy

rewrited_summaries = copy.deepcopy(copy_summaries)

for sent_id, w, ph, _ in dscored:
    
    mapped_cand_set = map_sent_id(encoded_cand_set)
    tmp = [(i, j, sent[1]) for i, cand in enumerate(mapped_cand_set) 
                       for j, sent in enumerate(cand) if sent[0]==sent_id]
    _, _, sent = tmp[0]
    print(len(sent))
    
    rewrited_summaries = mask_rewrite(cand_set=rewrited_summaries, sent_id=sent_id, \
                                 window_id=window_id, tok=tokenizer, model=model, n_gram='trigram',
                                tok_max_length=512, max_length=len(sent))

42
<mask> juan arango conjured memories luis suarez in his team 's 4-3 defeat by monterrey in the mexican league - but it was not through prodigious scoring .
["Club tijuana star juan arango conjured memories luis suarez in his team's 4-3 defeat by monterrey in the mexican league - but it was"]
42
<mask> juan arango conjured memories luis suarez in his team's 4-3 defeat by monterrey in the mexican league - but it was
["Club tijuana star juan arango conjured memories luis suarez in his team's 4-3 defeat by monterrey in the mexican league. But it was not"]
42
<mask> juan arango conjured memories luis suarez in his team's 4-3 defeat by monterrey in the mexican league. But it was not
["Club tijuana star juan arango conjured memories luis suarez in his team's 4-3 defeat by monterrey in the mexican league. But it was not"]
17
<mask> booked by the referee but could face a heavy retrospective ban .
['he was not booked by the referee but could face a heavy retrospective ban']
17
<mask> booked b

### cosine similarity 하락

In [121]:
doc_sim_list = [round(np.mean(get_cos_similarity(article, cand)),3) for cand in rewrited_summaries]
ref_sim_list = [round(np.mean(get_cos_similarity(abstract, cand)),3) for cand in rewrited_summaries]

print("Cosine similarity between document and summaries : {}".format(doc_sim_list))
print("Cosine similarity between reference and summaries : {}".format(ref_sim_list))

Cosine similarity between document and summaries : [0.344, 0.586, 0.739]
Cosine similarity between reference and summaries : [0.18, 0.102, 0.11]


### ROUGE score 하락

In [122]:
rouge = Rouge()

doc_rouge_list = [round(rouge.get_scores('\n'.join(cand), '\n'.join(article))[0]['rouge-l']['f'],3) for cand in rewrited_summaries]
ref_rouge_list = [round(rouge.get_scores('\n'.join(cand), '\n'.join(abstract))[0]['rouge-l']['f'],3) for cand in rewrited_summaries]
rouge_list = [round(cand[1],3) for cand in candidate]

print("Rouge score between document and summaries : {}".format(doc_rouge_list))
print("Rouge score between reference and summaries : {}".format(ref_rouge_list))
print("Rouge score between reference and summaries(written) : {}".format(rouge_list))


Rouge score between document and summaries : [0.462, 0.629, 0.446]
Rouge score between reference and summaries : [0.412, 0.512, 0.462]
Rouge score between reference and summaries(written) : [0.4, 0.404, 0.387]
