# Importing

In [1]:
import pandas as pd
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.sum_basic import SumBasicSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer

In [2]:
from main import replace_dots, preprocess_text, summarizer, calc_rouge, avg_rouge, generate_ensemble_summary

# Reading Data from CSV

In [3]:
data = pd.read_csv("./data/bbc_data.csv") #source: https://www.kaggle.com/datasets/pariza/bbc-news-summary/data

In [4]:
data.head()

Unnamed: 0,label,text,summary
0,business,UK economy facing 'major risks'..The UK manufa...,"""Despite some positive news for the export sec..."
1,business,Aids and climate top Davos agenda..Climate cha...,"At the same time, about 100,000 people are exp..."
2,business,Asian quake hits European shares..Shares in Eu...,The unfolding scale of the disaster in south A...
3,business,India power shares jump on debut..Shares in In...,"Shares in India's largest power producer, Nati..."
4,business,Lacroix label bought by US firm..Luxury goods ...,LVMH said the French designer's haute couture ...


In [5]:
data.shape

(2225, 3)

# Data Preprocessing

In [6]:
data['text'][1]

'Aids and climate top Davos agenda..Climate change and the fight against Aids are leading the list of concerns for the first day of the World Economic Forum in the Swiss resort of Davos...Some 2,000 business and political leaders from around the globe will listen to UK Prime Minister Tony Blair\'s opening speech on Wednesday. Mr Blair will focus on Africa\'s development plans and global warming. Earlier in the day came an update on efforts to have 3 million people on anti-Aids drugs by the end of 2005. The World Health Organisation (WHO) said 700,000 people in poor countries were on life-extending drugs - up from 440,000 six months earlier but amounting to only 12% of the 5.8 million who needed them. A $2bn "funding gap" still stood in the way of hitting the 2005 target, the WHO said...The themes to be stressed by Mr Blair - whose attendance was announced at the last minute - are those he wants to dominate the UK\'s chairmanship of the G8 group of industrialised states. Other issues to

In [7]:
data['text'] = data['text'].apply(lambda x: replace_dots(x)) # replace_dots function replaces sequences of dots with a single dot followed by a space. It also ensures that there is a space after a dot that is preceded by a digit.

In [8]:
data['text'][1]

'Aids and climate top Davos agenda. Climate change and the fight against Aids are leading the list of concerns for the first day of the World Economic Forum in the Swiss resort of Davos. Some 2,000 business and political leaders from around the globe will listen to UK Prime Minister Tony Blair\'s opening speech on Wednesday.  Mr Blair will focus on Africa\'s development plans and global warming.  Earlier in the day came an update on efforts to have 3 million people on anti-Aids drugs by the end of 2005.  The World Health Organisation (WHO) said 700,000 people in poor countries were on life-extending drugs - up from 440,000 six months earlier but amounting to only 12% of the 5.8 million who needed them.  A $2bn "funding gap" still stood in the way of hitting the 2005 target, the WHO said. The themes to be stressed by Mr Blair - whose attendance was announced at the last minute - are those he wants to dominate the UK\'s chairmanship of the G8 group of industrialised states.  Other issues

In [9]:
data['summary'] = data['summary'].apply(lambda x: replace_dots(x))

In [10]:
data = preprocess_text(data)

In [11]:
data.head()

Unnamed: 0,label,text,summary
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec..."
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp..."
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati..."
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...


# Base Extractive Summarization Techniques

## 1) LSA Summarizer

In [12]:
data['lsa_summary'] = data['text'].apply(lambda x: summarizer(x, 3))

In [13]:
data.head()

Unnamed: 0,label,text,summary,lsa_summary
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec...",Employment in manufacturing also fell and job ...
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp...","Some 2,000 business and political leaders from..."
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...,"More than 23,000 people have been killed follo..."
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati..."
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...,The Paris-based firm has been shedding non-cor...


### Rogue Scores

In [14]:
results_lsa = calc_rouge(data, 'summary', 'lsa_summary')
results_lsa.columns = ['rouge-1_lsa', 'rouge-2_lsa', 'rouge-l_lsa']

#### Average

In [15]:
avg_r1, avg_r2, avg_rL = avg_rouge(results_lsa, 'rouge-1_lsa', 'rouge-2_lsa', 'rouge-l_lsa')
print('LSA')
print('Average rouge-1 score: ', avg_r1)
print('Average rouge-2 score: ', avg_r2)
print('Average rouge-l score: ', avg_rL)

LSA
Average rouge-1 score:  0.63
Average rouge-2 score:  0.49
Average rouge-l score:  0.62


## 2) LexRank Summarizer

In [16]:
data['lexrank_summary'] = data['text'].apply(lambda x: summarizer(x, 3, LexRankSummarizer))

In [17]:
data.head()

Unnamed: 0,label,text,summary,lsa_summary,lexrank_summary
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec...",Employment in manufacturing also fell and job ...,The group's quarterly survey of companies foun...
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp...","Some 2,000 business and political leaders from...",Climate change and the fight against Aids are ...
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...,"More than 23,000 people have been killed follo...",But the disaster has little impact on stock ma...
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati..."
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...,The Paris-based firm has been shedding non-cor...,Lacroix label bought by US firm. The Paris-bas...


### Rogue Scores

In [18]:
results_lexrank = calc_rouge(data, 'summary', 'lexrank_summary')
results_lexrank.columns = ['rouge-1_lexrank', 'rouge-2_lexrank', 'rouge-l_lexrank']

#### Average

In [19]:
avg_r1, avg_r2, avg_rL = avg_rouge(results_lexrank, 'rouge-1_lexrank', 'rouge-2_lexrank', 'rouge-l_lexrank')
print("LexRank")
print('Average rouge-1 score: ', avg_r1)
print('Average rouge-2 score: ', avg_r2)
print('Average rouge-l score: ', avg_rL)

LexRank
Average rouge-1 score:  0.79
Average rouge-2 score:  0.69
Average rouge-l score:  0.78


## 3) L.U.H.N Summarizer

In [20]:
data['luhn_summary'] = data['text'].apply(lambda x: summarizer(x, 3, LuhnSummarizer))

In [21]:
data.head()

Unnamed: 0,label,text,summary,lsa_summary,lexrank_summary,luhn_summary
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec...",Employment in manufacturing also fell and job ...,The group's quarterly survey of companies foun...,The group's quarterly survey of companies foun...
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp...","Some 2,000 business and political leaders from...",Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...,"More than 23,000 people have been killed follo...",But the disaster has little impact on stock ma...,Shares in Europe's leading reinsurers and trav...
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","The 865 million share offer, a mix of new shar..."
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...,The Paris-based firm has been shedding non-cor...,Lacroix label bought by US firm. The Paris-bas...,LVMH said the French designer's haute couture ...


### Rogue Scores

In [22]:
results_luhn = calc_rouge(data, 'summary', 'luhn_summary')
results_luhn.columns =['rouge-1_luhn', 'rouge-2_luhn', 'rouge-l_luhn']

#### Average

In [23]:
avg_r1, avg_r2, avg_rL = avg_rouge(results_luhn, 'rouge-1_luhn', 'rouge-2_luhn', 'rouge-l_luhn')
print("LUHN")
print('Average rouge-1 score: ', avg_r1)
print('Average rouge-2 score: ', avg_r2)
print('Average rogue-l score:', avg_rL)

LUHN
Average rouge-1 score:  0.8
Average rouge-2 score:  0.7
Average rogue-l score: 0.78


## 4) SumBasic Summarizer Summarizer

In [24]:
data['sum_basic_summary'] = data['text'].apply(lambda x: summarizer(x, 3, SumBasicSummarizer))

In [25]:
data.head()

Unnamed: 0,label,text,summary,lsa_summary,lexrank_summary,luhn_summary,sum_basic_summary
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec...",Employment in manufacturing also fell and job ...,The group's quarterly survey of companies foun...,The group's quarterly survey of companies foun...,UK economy facing 'major risks'. The rise came...
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp...","Some 2,000 business and political leaders from...",Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...,"More than 23,000 people have been killed follo...",But the disaster has little impact on stock ma...,Shares in Europe's leading reinsurers and trav...,Insurers and reinsurance firms were also under...
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","The 865 million share offer, a mix of new shar...",India power shares jump on debut. The governme...
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...,The Paris-based firm has been shedding non-cor...,Lacroix label bought by US firm. The Paris-bas...,LVMH said the French designer's haute couture ...,Lacroix label bought by US firm. The Falic Gro...


### Rogue Scores

In [26]:
results_sum_basic = calc_rouge(data, 'summary', 'sum_basic_summary')
results_sum_basic.columns = ['rouge-1_sum_basic', 'rouge-2_sum_basic', 'rouge-l_sum_basic']

#### Average

In [27]:
avg_r1, avg_r2, avg_rL = avg_rouge(results_sum_basic, 'rouge-1_sum_basic', 'rouge-2_sum_basic', 'rouge-l_sum_basic')
print('SumBasic')
print('Average rouge-1 score: ', avg_r1)
print('Average rogue-2 score:', avg_r2)
print('Average rogue-l score:', avg_rL)

SumBasic
Average rouge-1 score:  0.68
Average rogue-2 score: 0.49
Average rogue-l score: 0.66


## 5) TextRank Summarizer

In [28]:
data['text_rank_summary'] = data['text'].apply(lambda x: summarizer(x, 3, TextRankSummarizer))

In [29]:
data.head()

Unnamed: 0,label,text,summary,lsa_summary,lexrank_summary,luhn_summary,sum_basic_summary,text_rank_summary
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec...",Employment in manufacturing also fell and job ...,The group's quarterly survey of companies foun...,The group's quarterly survey of companies foun...,UK economy facing 'major risks'. The rise came...,"The outlook for the service sector was ""uncert..."
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp...","Some 2,000 business and political leaders from...",Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...,"More than 23,000 people have been killed follo...",But the disaster has little impact on stock ma...,Shares in Europe's leading reinsurers and trav...,Insurers and reinsurance firms were also under...,Shares in Europe's leading reinsurers and trav...
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","The 865 million share offer, a mix of new shar...",India power shares jump on debut. The governme...,Analysts say the success of the NTPC flotation...
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...,The Paris-based firm has been shedding non-cor...,Lacroix label bought by US firm. The Paris-bas...,LVMH said the French designer's haute couture ...,Lacroix label bought by US firm. The Falic Gro...,LVMH said the French designer's haute couture ...


### Rogue Scores

In [30]:
results_text_rank = calc_rouge(data, 'summary', 'text_rank_summary')
results_text_rank.columns = ['rouge-1_text_rank', 'rouge-2_text_rank', 'rouge-l_text_rank']

#### Average

In [31]:
avg_r1, avg_r2, avg_rL = avg_rouge(results_text_rank, 'rouge-1_text_rank', 'rouge-2_text_rank', 'rouge-l_text_rank')
print('TextRank')
print('Average rouge-1 score: ', avg_r1)
print('Average rogue-2 score:', avg_r2)
print('Average rogue-l score:', avg_rL)

TextRank
Average rouge-1 score:  0.69
Average rogue-2 score: 0.58
Average rogue-l score: 0.68


# Joining Tables

In [32]:
results = pd.concat([results_lsa, results_lexrank, results_luhn, results_sum_basic, results_text_rank], axis=1)
data_con = pd.concat([data, results], axis = 1)

In [33]:
data_con.head()

Unnamed: 0,label,text,summary,lsa_summary,lexrank_summary,luhn_summary,sum_basic_summary,text_rank_summary,rouge-1_lsa,rouge-2_lsa,...,rouge-l_lexrank,rouge-1_luhn,rouge-2_luhn,rouge-l_luhn,rouge-1_sum_basic,rouge-2_sum_basic,rouge-l_sum_basic,rouge-1_text_rank,rouge-2_text_rank,rouge-l_text_rank
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec...",Employment in manufacturing also fell and job ...,The group's quarterly survey of companies foun...,The group's quarterly survey of companies foun...,UK economy facing 'major risks'. The rise came...,"The outlook for the service sector was ""uncert...","{'r': 0.26666666666666666, 'p': 0.125, 'f': 0....","{'r': 0.0, 'p': 0.0, 'f': 0.0}",...,"{'r': 0.7115384615384616, 'p': 0.3854166666666...","{'r': 0.5573770491803278, 'p': 0.3541666666666...","{'r': 0.3875, 'p': 0.2421875, 'f': 0.298076918...","{'r': 0.4918032786885246, 'p': 0.3125, 'f': 0....","{'r': 0.926829268292683, 'p': 0.39583333333333...","{'r': 0.8863636363636364, 'p': 0.3046875, 'f':...","{'r': 0.926829268292683, 'p': 0.39583333333333...","{'r': 0.7142857142857143, 'p': 0.4166666666666...","{'r': 0.6712328767123288, 'p': 0.3828125, 'f':...","{'r': 0.7142857142857143, 'p': 0.4166666666666..."
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp...","Some 2,000 business and political leaders from...",Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,"{'r': 0.8611111111111112, 'p': 0.3463687150837...","{'r': 0.8023255813953488, 'p': 0.2727272727272...",...,"{'r': 1.0, 'p': 0.4245810055865922, 'f': 0.596...","{'r': 1.0, 'p': 0.37988826815642457, 'f': 0.55...","{'r': 0.9787234042553191, 'p': 0.3636363636363...","{'r': 1.0, 'p': 0.37988826815642457, 'f': 0.55...","{'r': 0.6976744186046512, 'p': 0.1675977653631...","{'r': 0.54, 'p': 0.1067193675889328, 'f': 0.17...","{'r': 0.6744186046511628, 'p': 0.1620111731843...","{'r': 0.7142857142857143, 'p': 0.2793296089385...","{'r': 0.6421052631578947, 'p': 0.2411067193675...","{'r': 0.7, 'p': 0.2737430167597765, 'f': 0.393..."
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...,"More than 23,000 people have been killed follo...",But the disaster has little impact on stock ma...,Shares in Europe's leading reinsurers and trav...,Insurers and reinsurance firms were also under...,Shares in Europe's leading reinsurers and trav...,"{'r': 0.5660377358490566, 'p': 0.1764705882352...","{'r': 0.3620689655172414, 'p': 0.0813953488372...",...,"{'r': 1.0, 'p': 0.2647058823529412, 'f': 0.418...","{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62...","{'r': 0.9897959183673469, 'p': 0.3759689922480...","{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62...","{'r': 1.0, 'p': 0.16470588235294117, 'f': 0.28...","{'r': 0.9285714285714286, 'p': 0.1007751937984...","{'r': 1.0, 'p': 0.16470588235294117, 'f': 0.28...","{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62...","{'r': 0.9897959183673469, 'p': 0.3759689922480...","{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62..."
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","The 865 million share offer, a mix of new shar...",India power shares jump on debut. The governme...,Analysts say the success of the NTPC flotation...,"{'r': 0.723404255319149, 'p': 0.58620689655172...","{'r': 0.6078431372549019, 'p': 0.4305555555555...",...,"{'r': 0.5, 'p': 0.43103448275862066, 'f': 0.46...","{'r': 0.5, 'p': 0.39655172413793105, 'f': 0.44...","{'r': 0.3620689655172414, 'p': 0.2916666666666...","{'r': 0.5, 'p': 0.39655172413793105, 'f': 0.44...","{'r': 0.34375, 'p': 0.1896551724137931, 'f': 0...","{'r': 0.02564102564102564, 'p': 0.013888888888...","{'r': 0.34375, 'p': 0.1896551724137931, 'f': 0...","{'r': 0.7906976744186046, 'p': 0.5862068965517...","{'r': 0.6666666666666666, 'p': 0.5277777777777...","{'r': 0.7906976744186046, 'p': 0.5862068965517..."
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...,The Paris-based firm has been shedding non-cor...,Lacroix label bought by US firm. The Paris-bas...,LVMH said the French designer's haute couture ...,Lacroix label bought by US firm. The Falic Gro...,LVMH said the French designer's haute couture ...,"{'r': 0.40384615384615385, 'p': 0.5, 'f': 0.44...","{'r': 0.2, 'p': 0.23404255319148937, 'f': 0.21...",...,"{'r': 0.5, 'p': 0.42857142857142855, 'f': 0.46...","{'r': 0.45614035087719296, 'p': 0.619047619047...","{'r': 0.2898550724637681, 'p': 0.4255319148936...","{'r': 0.43859649122807015, 'p': 0.595238095238...","{'r': 0.5454545454545454, 'p': 0.4285714285714...","{'r': 0.2631578947368421, 'p': 0.2127659574468...","{'r': 0.5454545454545454, 'p': 0.4285714285714...","{'r': 0.46551724137931033, 'p': 0.642857142857...","{'r': 0.2898550724637681, 'p': 0.4255319148936...","{'r': 0.4482758620689655, 'p': 0.6190476190476..."


In [34]:
data_con.columns

Index(['label', 'text', 'summary', 'lsa_summary', 'lexrank_summary',
       'luhn_summary', 'sum_basic_summary', 'text_rank_summary', 'rouge-1_lsa',
       'rouge-2_lsa', 'rouge-l_lsa', 'rouge-1_lexrank', 'rouge-2_lexrank',
       'rouge-l_lexrank', 'rouge-1_luhn', 'rouge-2_luhn', 'rouge-l_luhn',
       'rouge-1_sum_basic', 'rouge-2_sum_basic', 'rouge-l_sum_basic',
       'rouge-1_text_rank', 'rouge-2_text_rank', 'rouge-l_text_rank'],
      dtype='object')

# Proposed Ensemble Extractive Approach using Weighted Voting

In [35]:
df_for_ensemble = data_con.copy()
df_for_ensemble['ensemble_summary'] = df_for_ensemble.apply(generate_ensemble_summary, axis=1)

In [36]:
df_for_ensemble.head()

Unnamed: 0,label,text,summary,lsa_summary,lexrank_summary,luhn_summary,sum_basic_summary,text_rank_summary,rouge-1_lsa,rouge-2_lsa,...,rouge-1_luhn,rouge-2_luhn,rouge-l_luhn,rouge-1_sum_basic,rouge-2_sum_basic,rouge-l_sum_basic,rouge-1_text_rank,rouge-2_text_rank,rouge-l_text_rank,ensemble_summary
0,business,UK economy facing 'major risks'. The UK manufa...,"""Despite some positive news for the export sec...",Employment in manufacturing also fell and job ...,The group's quarterly survey of companies foun...,The group's quarterly survey of companies foun...,UK economy facing 'major risks'. The rise came...,"The outlook for the service sector was ""uncert...","{'r': 0.26666666666666666, 'p': 0.125, 'f': 0....","{'r': 0.0, 'p': 0.0, 'f': 0.0}",...,"{'r': 0.5573770491803278, 'p': 0.3541666666666...","{'r': 0.3875, 'p': 0.2421875, 'f': 0.298076918...","{'r': 0.4918032786885246, 'p': 0.3125, 'f': 0....","{'r': 0.926829268292683, 'p': 0.39583333333333...","{'r': 0.8863636363636364, 'p': 0.3046875, 'f':...","{'r': 0.926829268292683, 'p': 0.39583333333333...","{'r': 0.7142857142857143, 'p': 0.4166666666666...","{'r': 0.6712328767123288, 'p': 0.3828125, 'f':...","{'r': 0.7142857142857143, 'p': 0.4166666666666...",The BCC found confidence increased in the qua...
1,business,Aids and climate top Davos agenda. Climate cha...,"At the same time, about 100,000 people are exp...","Some 2,000 business and political leaders from...",Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,Climate change and the fight against Aids are ...,"{'r': 0.8611111111111112, 'p': 0.3463687150837...","{'r': 0.8023255813953488, 'p': 0.2727272727272...",...,"{'r': 1.0, 'p': 0.37988826815642457, 'f': 0.55...","{'r': 0.9787234042553191, 'p': 0.3636363636363...","{'r': 1.0, 'p': 0.37988826815642457, 'f': 0.55...","{'r': 0.6976744186046512, 'p': 0.1675977653631...","{'r': 0.54, 'p': 0.1067193675889328, 'f': 0.17...","{'r': 0.6744186046511628, 'p': 0.1620111731843...","{'r': 0.7142857142857143, 'p': 0.2793296089385...","{'r': 0.6421052631578947, 'p': 0.2411067193675...","{'r': 0.7, 'p': 0.2737430167597765, 'f': 0.393...",". At the same time, about 100,000 people are e..."
2,business,Asian quake hits European shares. Shares in Eu...,The unfolding scale of the disaster in south A...,"More than 23,000 people have been killed follo...",But the disaster has little impact on stock ma...,Shares in Europe's leading reinsurers and trav...,Insurers and reinsurance firms were also under...,Shares in Europe's leading reinsurers and trav...,"{'r': 0.5660377358490566, 'p': 0.1764705882352...","{'r': 0.3620689655172414, 'p': 0.0813953488372...",...,"{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62...","{'r': 0.9897959183673469, 'p': 0.3759689922480...","{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62...","{'r': 1.0, 'p': 0.16470588235294117, 'f': 0.28...","{'r': 0.9285714285714286, 'p': 0.1007751937984...","{'r': 1.0, 'p': 0.16470588235294117, 'f': 0.28...","{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62...","{'r': 0.9897959183673469, 'p': 0.3759689922480...","{'r': 1.0, 'p': 0.45294117647058824, 'f': 0.62...",. Shares in Munich Re and Swiss Re - the world...
3,business,India power shares jump on debut. Shares in In...,"Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","Shares in India's largest power producer, Nati...","The 865 million share offer, a mix of new shar...",India power shares jump on debut. The governme...,Analysts say the success of the NTPC flotation...,"{'r': 0.723404255319149, 'p': 0.58620689655172...","{'r': 0.6078431372549019, 'p': 0.4305555555555...",...,"{'r': 0.5, 'p': 0.39655172413793105, 'f': 0.44...","{'r': 0.3620689655172414, 'p': 0.2916666666666...","{'r': 0.5, 'p': 0.39655172413793105, 'f': 0.44...","{'r': 0.34375, 'p': 0.1896551724137931, 'f': 0...","{'r': 0.02564102564102564, 'p': 0.013888888888...","{'r': 0.34375, 'p': 0.1896551724137931, 'f': 0...","{'r': 0.7906976744186046, 'p': 0.5862068965517...","{'r': 0.6666666666666666, 'p': 0.5277777777777...","{'r': 0.7906976744186046, 'p': 0.5862068965517...",. The firm is the largest utility company in I...
4,business,Lacroix label bought by US firm. Luxury goods ...,LVMH said the French designer's haute couture ...,The Paris-based firm has been shedding non-cor...,Lacroix label bought by US firm. The Paris-bas...,LVMH said the French designer's haute couture ...,Lacroix label bought by US firm. The Falic Gro...,LVMH said the French designer's haute couture ...,"{'r': 0.40384615384615385, 'p': 0.5, 'f': 0.44...","{'r': 0.2, 'p': 0.23404255319148937, 'f': 0.21...",...,"{'r': 0.45614035087719296, 'p': 0.619047619047...","{'r': 0.2898550724637681, 'p': 0.4255319148936...","{'r': 0.43859649122807015, 'p': 0.595238095238...","{'r': 0.5454545454545454, 'p': 0.4285714285714...","{'r': 0.2631578947368421, 'p': 0.2127659574468...","{'r': 0.5454545454545454, 'p': 0.4285714285714...","{'r': 0.46551724137931033, 'p': 0.642857142857...","{'r': 0.2898550724637681, 'p': 0.4255319148936...","{'r': 0.4482758620689655, 'p': 0.6190476190476...",. The Falic Group bought two cosmetics labels ...


### Rogue Scores

In [37]:
results_ensemble = calc_rouge(df_for_ensemble, 'summary', 'ensemble_summary')
results_ensemble.columns = ['rouge-1_ensemble', 'rouge-2_ensemble', 'rouge-l_ensemble']#### Average

#### Average

In [38]:
avg_r1, avg_r2, avg_rL = avg_rouge(results_ensemble, 'rouge-1_ensemble', 'rouge-2_ensemble', 'rouge-l_ensemble')
print("Ensemble:")
print('Average rouge-1 score: ', avg_r1)
print('Average rouge-2 score: ', avg_r2)
print('Average rouge-l score: ', avg_rL)

Ensemble:
Average rouge-1 score:  0.83
Average rouge-2 score:  0.73
Average rouge-l score:  0.82
