In [1]:
import sys
sys.path.append('../src')

import numpy as np
import pandas as pd
import data
import utils
from main import extract_summary, report_rouge_scores

%load_ext autoreload
%autoreload 2

In [2]:
# Get list of titles, reference summaries, and body text
outlook_titles, outlook_refs, outlook_text = data.get_outlook_data()
total = len(outlook_text)
print(total)

44


### Summarization

- Summarization algorithms includes: 
    - SMRS (TF-IDF matrix)
    - Franke-Wolfe (TF-IDF matrix)
    - Franke-Wolfe (Sentence embeddings matrix)
- *Matlab* and *Python for matlab engine* is required to run the SMRS method. Remove `'SMRS'` from the `methods` list below if matlab is not installed.

- Main function: `extract_summary()`

```python
# Arguments:
#     - doc: string; article body text
#     - ref: string; reference summary
#     - title: string; title of the article
#     - k: number of extracted examplars
#     - print_summary: print summary text for each algorithm
#     - report_rouge: report rouge score (need to pass in ref argument)
#     - rouge_embed: use word embedding to calculate rouge score
#     - vectorize_scores: return scores in np.ndarray instead of in a dictionary
#     - methods: summarization algorithms to be used
# Return:
#     - summary: dictionary; extracted summary sentences using each algorithm
#     - word_count: dictionary; number of words in the extracted summary
#     - runtime: computation time of each algorithm
#     - scores: rouge score of each algorithm
        
summary, word_count, runtime, scores = extract_summary(doc, ref=None, title=None, k=5, print_summary=False, 
                                                       report_rouge=False, print_rouge=True, rouge_embed=False, 
                                                       vectorize_scores=False, methods=['random', 'SMRS', 'tfidf', 'embed']);

```

In [4]:
# 9
doc_idx = 1
doc = outlook_text[doc_idx]
ref = outlook_refs[doc_idx]
title = outlook_titles[doc_idx]
print(ref)

In today’s late-cycle US monetary tightening phase, we recommend a variety of actions to prepare portfolios for the coming year and beyond. Staying positive, but building up our defenses. Regular portfolio rebalancing is essential for 2019 and beyond. What central bank policy tightening means for portfolios. A second headwind. Buying value is better than chasing momentum. Managing late-cycle risks. A time to hedge. It is the right time to take portfolio actions.


In [7]:
k=5
# ratio=0.2
methods = ['first-k', 'SMRS', 'TextRank', 'tfidf', 'embed']
extract_summary(doc, ref, title, k=k, report_rouge=False, methods=methods, print_summary=True);

Soruce Text: 60 sentences, 516 distinct vocab
# of selected sentences: 5

Title: Building stronger portfolios for turbulent times

In today’s late-cycle US monetary tightening phase, we recommend a variety of actions to prepare portfolios for the coming year and beyond. Staying positive, but building up our defenses. Regular portfolio rebalancing is essential for 2019 and beyond. What central bank policy tightening means for portfolios. A second headwind. Buying value is better than chasing momentum. Managing late-cycle risks. A time to hedge. It is the right time to take portfolio actions.
-----
Word count:72

Global investors have routinely doubted the durability of the economic expansion that began in 2009. however, their doubts have been repeatedly contradicted by the breadth and persistence of the recovery.
By the second half 2019, we believe the us expansion will likely have become the longest since records began in 1854.that means we need to understand what allowed the expansion

### ROUGE Score

In [8]:
%%time
# k=5
# ratio=0.3
methods = ['first-k', 'SMRS', 'TextRank', 'tfidf', 'embed']
extract_summary(doc, ref, title, report_rouge=True, rouge_embed=False, 
                methods=methods, print_summary=False, print_rouge=True);



first-k
Overlap 1-gram 			F1: 0.191
Overlap 1-gram 			Precision: 0.138
Overlap 1-gram 			Recall: 0.310
Overlap bi-gram 		F1: 0.000
Overlap bi-gram 		Precision: 0.000
Overlap bi-gram 		Recall: 0.000
Longest Common Subsequence 	F1: 0.141
Longest Common Subsequence 	Precision: 0.128
Longest Common Subsequence 	Recall: 0.286

SMRS
Overlap 1-gram 			F1: 0.132
Overlap 1-gram 			Precision: 0.091
Overlap 1-gram 			Recall: 0.238
Overlap bi-gram 		F1: 0.000
Overlap bi-gram 		Precision: 0.000
Overlap bi-gram 		Recall: 0.000
Longest Common Subsequence 	F1: 0.089
Longest Common Subsequence 	Precision: 0.082
Longest Common Subsequence 	Recall: 0.214

TextRank
Overlap 1-gram 			F1: 0.181
Overlap 1-gram 			Precision: 0.124
Overlap 1-gram 			Recall: 0.333
Overlap bi-gram 		F1: 0.000
Overlap bi-gram 		Precision: 0.000
Overlap bi-gram 		Recall: 0.000
Longest Common Subsequence 	F1: 0.115
Longest Common Subsequence 	Precision: 0.106
Longest Common Subsequence 	Recall: 0.286

tfidf
Overlap 1-gram 			F1: 

### Word Embedding ROUGE Score

In [9]:
%%time
_ = extract_summary(doc, ref, title, report_rouge=True, rouge_embed=True, 
                    methods=methods, print_summary=False, print_rouge=True);



first-k
Overlap 1-gram 			F1: 0.453
Overlap 1-gram 			Precision: 0.426
Overlap 1-gram 			Recall: 0.484
Overlap bi-gram 		F1: 0.606
Overlap bi-gram 		Precision: 0.569
Overlap bi-gram 		Recall: 0.649
Longest Common Subsequence 	F1: 0.238
Longest Common Subsequence 	Precision: 0.216
Longest Common Subsequence 	Recall: 0.484

SMRS
Overlap 1-gram 			F1: 0.407
Overlap 1-gram 			Precision: 0.373
Overlap 1-gram 			Recall: 0.448
Overlap bi-gram 		F1: 0.533
Overlap bi-gram 		Precision: 0.485
Overlap bi-gram 		Recall: 0.593
Longest Common Subsequence 	F1: 0.186
Longest Common Subsequence 	Precision: 0.171
Longest Common Subsequence 	Recall: 0.448

TextRank
Overlap 1-gram 			F1: 0.451
Overlap 1-gram 			Precision: 0.419
Overlap 1-gram 			Recall: 0.488
Overlap bi-gram 		F1: 0.585
Overlap bi-gram 		Precision: 0.539
Overlap bi-gram 		Recall: 0.640
Longest Common Subsequence 	F1: 0.197
Longest Common Subsequence 	Precision: 0.182
Longest Common Subsequence 	Recall: 0.488

tfidf
Overlap 1-gram 			F1: 

### ROUGE Score Across Documents

In [3]:
start = 0
num_articles = total
articles = outlook_text[start : start + num_articles]
references = outlook_refs[start : start + num_articles]
titles = outlook_titles[start : start + num_articles]

In [4]:
%%time
methods = ['first-k', 'SMRS', 'TextRank', 'tfidf', 'embed']
rouge_mean, rouge_median, rouge_std = report_rouge_scores(articles, references, titles, k=5, methods=methods)

index =  ['1-gram F1', '1-gram Precision', '1-gram Recall', 'bi-gram F1', 'bi-gram Precision', 'bi-gram Recall', 
          'longest common F1', 'longest common Precision', 'longest common Recall', 'runtime', 'word count']

print('=' * 22 + ' Mean ' + '=' * 22)
rouge_mean.index = index
display(rouge_mean)

print('=' * 21 + ' Median ' + '=' * 21)
rouge_median.index = index
display(rouge_median)

print('=' * 15 + ' Standard Deviation ' + '=' * 15)
rouge_std.index = index
display(rouge_std)



Unnamed: 0,first-k,SMRS,TextRank,tfidf,embed
1-gram F1,0.180089,0.150821,0.206136,0.189309,0.16738
1-gram Precision,0.157147,0.133729,0.16563,0.133763,0.166246
1-gram Recall,0.256285,0.210668,0.321829,0.388483,0.20259
bi-gram F1,0.030311,0.022535,0.045729,0.054971,0.02807
bi-gram Precision,0.025658,0.018969,0.035888,0.038467,0.030669
bi-gram Recall,0.051811,0.034134,0.074517,0.126004,0.028646
longest common F1,0.145559,0.12366,0.159359,0.136443,0.141019
longest common Precision,0.144221,0.125495,0.150229,0.126385,0.156799
longest common Recall,0.235956,0.19694,0.294907,0.369419,0.188172
runtime,3e-06,0.595515,0.014482,1.444031,1.589632




Unnamed: 0,first-k,SMRS,TextRank,tfidf,embed
1-gram F1,0.175493,0.144848,0.192325,0.171531,0.149981
1-gram Precision,0.137243,0.122391,0.1467,0.112135,0.153883
1-gram Recall,0.213526,0.19434,0.297436,0.379582,0.179798
bi-gram F1,0.018465,0.0,0.020912,0.034785,0.0
bi-gram Precision,0.014395,0.0,0.012421,0.020114,0.0
bi-gram Recall,0.026491,0.0,0.033908,0.076511,0.0
longest common F1,0.136303,0.113586,0.142161,0.113098,0.129606
longest common Precision,0.128116,0.119322,0.132767,0.10308,0.128854
longest common Recall,0.199103,0.170833,0.267054,0.357322,0.167671
runtime,2e-06,0.484999,0.01198,1.06795,1.156734




Unnamed: 0,first-k,SMRS,TextRank,tfidf,embed
1-gram F1,0.074047,0.064032,0.08528,0.078732,0.078373
1-gram Precision,0.081958,0.061931,0.086413,0.071639,0.097023
1-gram Recall,0.117988,0.112734,0.106969,0.113993,0.097328
bi-gram F1,0.048948,0.05079,0.060834,0.066181,0.064003
bi-gram Precision,0.043135,0.042527,0.050513,0.053036,0.075039
bi-gram Recall,0.084002,0.082519,0.091388,0.122446,0.05782
longest common F1,0.070119,0.057047,0.085054,0.076437,0.077556
longest common Precision,0.0768,0.059525,0.081734,0.068773,0.096009
longest common Recall,0.113028,0.108217,0.107621,0.11625,0.08604
runtime,2e-06,0.328381,0.009316,0.893141,1.037771


CPU times: user 8min 12s, sys: 23.7 s, total: 8min 36s
Wall time: 11min 4s


### Word Embedding ROUGE Score Across Documents

In [5]:
%%time
methods = ['first-k', 'SMRS', 'TextRank', 'tfidf', 'embed']
rouge_mean_embed, rouge_median_embed, rouge_std_embed = report_rouge_scores(articles, references, titles, 
                                                                            rouge_embed=True, methods=methods)

index =  ['1-gram F1', '1-gram Precision', '1-gram Recall', 'bi-gram F1', 'bi-gram Precision', 'bi-gram Recall', 
          'longest common F1', 'longest common Precision', 'longest common Recall', 'runtime', 'word count']

print('=' * 22 + ' Mean ' + '=' * 22)
rouge_mean_embed.index = index
display(rouge_mean_embed)

print('=' * 21 + ' Median ' + '=' * 21)over
rouge_median_embed.index = index
display(rouge_median_embed)

print('=' * 15 + ' Standard Deviation ' + '=' * 15)
rouge_std_embed.index = index
display(rouge_std_embed)



Unnamed: 0,first-k,SMRS,TextRank,tfidf,embed
1-gram F1,0.467491,0.434429,0.482807,0.502398,0.434919
1-gram Precision,0.445079,0.418774,0.44912,0.456457,0.414689
1-gram Recall,0.495726,0.458959,0.526042,0.563471,0.46302
bi-gram F1,0.598991,0.556885,0.61428,0.61801,0.555869
bi-gram Precision,0.568489,0.530179,0.572396,0.5557,0.52733
bi-gram Recall,0.636544,0.59217,0.665999,0.699024,0.592559
longest common F1,0.29724,0.288009,0.261931,0.190561,0.353035
longest common Precision,0.278988,0.345735,0.239341,0.177486,0.398802
longest common Recall,0.495726,0.458959,0.526042,0.563471,0.46302
runtime,3e-06,0.590307,0.014785,1.463431,1.627546




Unnamed: 0,first-k,SMRS,TextRank,tfidf,embed
1-gram F1,0.47075,0.439098,0.481203,0.511206,0.430007
1-gram Precision,0.452084,0.424603,0.442769,0.460902,0.420579
1-gram Recall,0.493076,0.464912,0.537365,0.580349,0.462472
bi-gram F1,0.60554,0.562863,0.602328,0.616251,0.541089
bi-gram Precision,0.568701,0.528042,0.562435,0.561471,0.516982
bi-gram Recall,0.636069,0.605606,0.67037,0.702977,0.595509
longest common F1,0.296643,0.277689,0.24678,0.192703,0.360533
longest common Precision,0.268101,0.256347,0.225695,0.17886,0.333133
longest common Recall,0.493077,0.464912,0.537365,0.580349,0.462472
runtime,3e-06,0.464839,0.011561,1.086547,1.195969




Unnamed: 0,first-k,SMRS,TextRank,tfidf,embed
1-gram F1,0.058773,0.058208,0.062631,0.062276,0.066275
1-gram Precision,0.05542,0.055796,0.058498,0.055692,0.062479
1-gram Recall,0.073951,0.075459,0.080903,0.086221,0.079382
bi-gram F1,0.051984,0.049551,0.051223,0.050448,0.05919
bi-gram Precision,0.051734,0.044493,0.048372,0.048613,0.057448
bi-gram Recall,0.069663,0.074502,0.070132,0.067462,0.075729
longest common F1,0.086806,0.079413,0.088085,0.066429,0.097781
longest common Precision,0.094091,0.359402,0.078056,0.057948,0.334798
longest common Recall,0.073951,0.075459,0.080903,0.086221,0.079382
runtime,1e-06,0.336695,0.010265,0.917372,1.089394


CPU times: user 1h 44min 17s, sys: 5min 31s, total: 1h 49min 48s
Wall time: 1h 52min 49s


In [22]:
def conf_int(mu, sigma):
    return mu - sigma, mu + sigma

r1f1_mu = np.array([0.449120, 0.456457, 0.442609])
r1f1_sig = np.array([0.058498, 0.055692, 0.060811])
conf_int(r1f1_mu, r1f1_sig)

(array([0.390622, 0.400765, 0.381798]), array([0.507618, 0.512149, 0.50342 ]))

In [4]:
def F1(p, r):
    return 2 * (p * r) / (p + r)


#p = np.array([0.278988, 0.345735, 0.239341, 0.177486, 0.453966])
#r = np.array([0.495726, 0.458959, 0.526042, 0.563471, 0.483673])
#p = np.array([0.144221, 0.125495, 0.150229, 0.126385, 0.174409])
#r = np.array([0.235956, 0.196940, 0.294907, 0.369419, 0.223644])
print(F1(p, r))

[0.17902088 0.15330212 0.19905639 0.1883366  0.19598157]


In [7]:
def F1_LCS(p_lcs, r_lcs):
    beta = p_lcs / (r_lcs + 1e-12)
    num = (1 + (beta**2)) * r_lcs * p_lcs
    denom = r_lcs + ((beta**2) * p_lcs)
    return num / (denom + 1e-12)

# p = np.array([0.278988, 0.345735, 0.239341, 0.177486, 0.453966])
# r = np.array([0.495726, 0.458959, 0.526042, 0.563471, 0.483673])
p = np.array([0.144221, 0.125495, 0.150229, 0.126385, 0.174409])
r = np.array([0.235956, 0.196940, 0.294907, 0.369419, 0.223644])
F1_LCS(p, r)

array([0.16127424, 0.14018123, 0.16712128, 0.13574217, 0.19024804])