In [1]:
# Data
import json
# Data Analysis
import numpy as np
import pandas as pd
# Extractive Summarizer
from models.extractive_summarizer import ExtractiveSummarizer
# Encoders for Extractive Summarizer
import models.Encoders.Word2Vec as Word2Vecencoder
import models.Encoders.TFIDF as TFIDFencoder
# Rankers for Extractive Summarizer
import models.Rankers.LogisticRegression as LogisticRegressionranker
import models.Rankers.LinearRegression as LinearRegressionranker
# Decoders for Extractive Summarizer
import models.Decoders.PureScore as PureScore
import models.Decoders.PureScore as PureRougeSearch
import models.Decoders.PureScore as GuidedRougeSearch
# Baselines
import models.ModelLeadN as LeadN
import models.ModelRandom as Random
# Evaluator
from evaluation.rouge_evaluator import RougeEvaluator

In [2]:
# Data
test_PATH = 'data/test.json'
train_greedy_sent_PATH = 'data/train.greedy_sent.json'
train_PATH = 'data/train.json'
validation_PATH = 'data/validation.json'
very_small_validation_PATH = 'data/very_small_validation.json'
with open(test_PATH, 'r') as f:
    test_data = json.load(f)
with open(train_greedy_sent_PATH, 'r') as f:
    train_greedy_sent_data = json.load(f)
with open(train_PATH, 'r') as f:
    train_data = json.load(f)
with open(validation_PATH, 'r') as f:
    validation_data = json.load(f)
with open(very_small_validation_PATH, 'r') as f:
    very_small_validation_data = json.load(f)

In [3]:
# Table A.1: Random
summarizer = Random.ExtractiveSummarizer()
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , very_small_validation_data)
pred_data

Validating data shape: 100%|██████████| 10000/10000 [00:00<00:00, 2123698.23it/s]
Running extractive summarizer:  90%|█████████ | 9/10 [00:00<00:00, 348.94it/s]


[{'article': "It was a call that changed his life. After a decade without speaking to her, Dan Watson was contacted by his mother Lynn from Ireland last year after she was diagnosed with an aggressive form of lung cancer. The 34 year old Sydney man resolved to turn over a new leaf after hearing the shocking news and decided to hike a staggering 5,000 kilometres across Australia for charity. ‘I was in a bad place at the time. I realised the only way I was going to make it through the other end of the tunnel was to do something selfless. I had to put others first,’ Dan told Daily Mail Australia. Dan Watson, 34, has embarked upon a 5000km solo mega-walk from Perth to Sydney with nothing but a custom made trolley to carry the bare necessities . Watson resolved to make the journey after he spoke with his mother Lynn (pictured) after a decade without contact . In a bid to repair his relationship with his mother, Dan embarked on the inspiring mega-walk from Fremantle to Sydney to raise money 

In [4]:
# Table A.2: LeadN
summarizer = LeadN.ExtractiveSummarizer()
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , very_small_validation_data)
pred_data

Validating data shape: 100%|██████████| 10000/10000 [00:00<00:00, 3566585.03it/s]
Running extractive summarizer:  90%|█████████ | 9/10 [00:00<00:00, 563.84it/s]


[{'article': "It was a call that changed his life. After a decade without speaking to her, Dan Watson was contacted by his mother Lynn from Ireland last year after she was diagnosed with an aggressive form of lung cancer. The 34 year old Sydney man resolved to turn over a new leaf after hearing the shocking news and decided to hike a staggering 5,000 kilometres across Australia for charity. ‘I was in a bad place at the time. I realised the only way I was going to make it through the other end of the tunnel was to do something selfless. I had to put others first,’ Dan told Daily Mail Australia. Dan Watson, 34, has embarked upon a 5000km solo mega-walk from Perth to Sydney with nothing but a custom made trolley to carry the bare necessities . Watson resolved to make the journey after he spoke with his mother Lynn (pictured) after a decade without contact . In a bid to repair his relationship with his mother, Dan embarked on the inspiring mega-walk from Fremantle to Sydney to raise money 

In [None]:
# Table A.3: TFIDFencoder + LogisticRegressionranker + GuidedRougeSearch
num_experiments = 1#0
results = None
for i in range(num_experiments):
    summarizer = ExtractiveSummarizer(encoder=TFIDFencoder, ranker=LogisticRegressionranker, decoder=GuidedRougeSearch)
    pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , test_data)
    try:
        results += summarizer.eval(RougeEvaluator() , test_data, pred_data) / num_experiments
    except:
        results = summarizer.eval(RougeEvaluator() , test_data, pred_data) / num_experiments
results

In [None]:
# Table C.1: TFIDFencoder + LogisticRegressionranker + PureScore
summarizer = ExtractiveSummarizer(encoder=TFIDFencoder, ranker=LogisticRegressionranker, decoder=PureScore)
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , test_data)
results = summarizer.eval(RougeEvaluator() , test_data, pred_data)
results

In [None]:
# Table C.2: TFIDFencoder + LinearRegression + GuidedRougeSearch
summarizer = ExtractiveSummarizer(encoder=TFIDFencoder, ranker=LinearRegressionranker, decoder=GuidedRougeSearch)
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , test_data)
results = summarizer.eval(RougeEvaluator() , test_data, pred_data)
results

In [None]:
# Table C.3: Word2Vecencoder + LogisticRegressionranker + GuidedRougeSearch
summarizer = ExtractiveSummarizer(encoder=Word2Vecencoder, ranker=LogisticRegressionranker, decoder=GuidedRougeSearch)
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , test_data)
results = summarizer.eval(RougeEvaluator() , test_data, pred_data)
results

In [None]:
# Table C.4: Word2Vecencoder + LinearRegression + GuidedRougeSearch
summarizer = ExtractiveSummarizer(encoder=Word2Vecencoder, ranker=LinearRegressionranker, decoder=GuidedRougeSearch)
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , test_data)
results = summarizer.eval(RougeEvaluator() , test_data, pred_data)
results

In [5]:
# Table C.5: Word2Vecencoder + LinearRegression + PureScore
summarizer = ExtractiveSummarizer(encoder=Word2Vecencoder, ranker=LinearRegressionranker, decoder=PureScore)
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , very_small_validation_data)
pred_data

Validating data shape: 100%|██████████| 10000/10000 [00:00<00:00, 3835668.95it/s]


Constructing Word Vectors


Running X_2_VectorList: 100%|██████████| 10000/10000 [00:16<00:00, 623.00it/s]


Training Completed


Running extractive summarizer:  90%|█████████ | 9/10 [00:00<00:00, 92.89it/s]


[{'article': "It was a call that changed his life. After a decade without speaking to her, Dan Watson was contacted by his mother Lynn from Ireland last year after she was diagnosed with an aggressive form of lung cancer. The 34 year old Sydney man resolved to turn over a new leaf after hearing the shocking news and decided to hike a staggering 5,000 kilometres across Australia for charity. ‘I was in a bad place at the time. I realised the only way I was going to make it through the other end of the tunnel was to do something selfless. I had to put others first,’ Dan told Daily Mail Australia. Dan Watson, 34, has embarked upon a 5000km solo mega-walk from Perth to Sydney with nothing but a custom made trolley to carry the bare necessities . Watson resolved to make the journey after he spoke with his mother Lynn (pictured) after a decade without contact . In a bid to repair his relationship with his mother, Dan embarked on the inspiring mega-walk from Fremantle to Sydney to raise money 

In [11]:
pd.concat({'Random': a, 'LeadN':b, 'Word2Vecencoder + LinearRegression':c}).round(3)

Unnamed: 0,Unnamed: 1,Recall,Precision,F1-score
Random,rouge-1,0.294,0.264,0.278
Random,rouge-2,0.077,0.068,0.072
Random,rouge-4,0.023,0.021,0.022
Random,rouge-l,0.243,0.221,0.231
LeadN,rouge-1,0.454,0.321,0.376
LeadN,rouge-2,0.185,0.13,0.153
LeadN,rouge-4,0.067,0.048,0.056
LeadN,rouge-l,0.38,0.269,0.315
Word2Vecencoder + LinearRegression,rouge-1,0.426,0.274,0.334
Word2Vecencoder + LinearRegression,rouge-2,0.152,0.099,0.12


In [None]:
# Table C.6: PureRougeSearch - 
summarizer = ExtractiveSummarizer(encoder=TFIDFencoder, ranker=LinearRegressionranker, decoder=PureRougeSearch)
pred_data = summarizer.run_extractive_summarization(train_greedy_sent_data , test_data)
results = summarizer.eval(RougeEvaluator() , test_data, pred_data)
results