In [1]:
MAIN_PATH = "/home/carlos/MasterDS/tfm"
JSON_DATA_PATH = '{}/data/json/'.format(MAIN_PATH)
CSV_DATA_PATH = '{}/data/csv/'.format(MAIN_PATH)

In [2]:
import sys
sys.path.insert(0, MAIN_PATH)

In [6]:
%load_ext autoreload
%autoreload 2
from scripts.text.semantic_graph import SemanticGraph
from scripts.text.article_text_processor import ArticleTextProcessor
from scripts.text.extractive_summary.key_events_graph import KeyEventsSummaryGraph

from scripts.conf import TEAMS

import pandas as pd

%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Extracción de eventos importantes usando grafos conceptuales

El objetivo de este experimento es utilizar los eventos de los partidos para crear un grafo conceptual, y extraer a partir de este los eventos 
más importantes (atendiendo al grado de los nodos, que serán palabras/entidades nombradas/sujetos nominales).

## Probando con eventos de un partido

In [45]:
processor = ArticleTextProcessor()
key_events = KeyEventsSummaryGraph()

In [5]:
all_files = processor.load_json()

In [6]:
season_file = 'premier_league_2019_2020.json'
league_season_teams = TEAMS[season_file.split('.')[0]]
# Solo para pruebas
key_events.league_season_teams = league_season_teams

In [7]:
match = all_files[season_file]['https://www.bbc.com/sport/football/49791610']

In [8]:
event_list = match['events']

In [9]:
semantic_graph = SemanticGraph(event_list)
g = semantic_graph.create_graph()

In [11]:
hubs_sentences = semantic_graph.get_n_hubs_sentences(n=10)

Hubs: [('Manchester United', 45), ('Arsenal', 40), ('attempt', 39), ('the box', 37), ('Bukayo Saka', 24), ('ball', 23), ('footed shot', 23), ('the centre', 23), ('Pierre-Emerick Aubameyang', 22), ('manchester united', 20)]


In [146]:
hubs_sentences

OrderedDict([('Manchester United',
              [(3,
                'Scott McTominay (Manchester United) wins a free kick in the defensive half.'),
               (5, 'Foul by Jesse Lingard (Manchester United).'),
               (8, 'Corner,  Manchester United. Conceded by Calum Chambers.'),
               (10,
                'Daniel James (Manchester United) wins a free kick on the left wing.'),
               (13, 'Hand ball by Andreas Pereira (Manchester United).'),
               (16, 'Foul by Paul Pogba (Manchester United).'),
               (18, 'Foul by Jesse Lingard (Manchester United).'),
               (20, 'Foul by Andreas Pereira (Manchester United).'),
               (22, 'Foul by Ashley Young (Manchester United).'),
               (24, 'Foul by Marcus Rashford (Manchester United).'),
               (25,
                'Marcus Rashford (Manchester United) is shown the yellow card for a bad foul.'),
               (27,
                'Axel Tuanzebe (Manchester United) 

In [12]:
count_vec_kwargs = {'ngram_range': (1, 2), 'strip_accents': 'unicode'}
n_hubs = 10
fc = 0.5
mode = 'homogeneous'

In [13]:
processed_events = key_events.process_match_events(event_list, n_hubs, fc, mode)

Hubs: [('Manchester United', 45), ('Arsenal', 40), ('attempt', 39), ('the box', 37), ('Bukayo Saka', 24), ('ball', 23), ('footed shot', 23), ('the centre', 23), ('Pierre-Emerick Aubameyang', 22), ('manchester united', 20)]
Hubs with sentences: {'Manchester United': 54, 'Arsenal': 46, 'attempt': 24, 'the box': 20, 'Bukayo Saka': 12, 'ball': 8, 'footed shot': 9, 'the centre': 8, 'Pierre-Emerick Aubameyang': 7, 'manchester united': 5}
The text has 125 sentences
The semantic graph has 76 nodes
The summary should have 62 sentences with a compression factor of 0.5
There are 113 sentences in the 10 nodes with more degree
Showing 62 sentences


In [17]:
key_events.match_summary(match, count_vec_kwargs, save_relations=True, verbose=True, n_hubs=10, 
                         fc=fc, mode=mode)

Hubs: [('Manchester United', 45), ('Arsenal', 40), ('attempt', 39), ('the box', 37), ('Bukayo Saka', 24), ('ball', 23), ('footed shot', 23), ('the centre', 23), ('Pierre-Emerick Aubameyang', 22), ('manchester united', 20)]
Hubs with sentences: {'Manchester United': 54, 'Arsenal': 46, 'attempt': 24, 'the box': 20, 'Bukayo Saka': 12, 'ball': 8, 'footed shot': 9, 'the centre': 8, 'Pierre-Emerick Aubameyang': 7, 'manchester united': 5}
The text has 125 sentences
The semantic graph has 76 nodes
The summary should have 62 sentences with a compression factor of 0.5
There are 113 sentences in the 10 nodes with more degree
Showing 62 sentences
Number of sentences in original article: 37
Number of sentences in summary: 12
Event:
Foul by Matteo Guendouzi (Arsenal).
Nearest sentence in article:
United had the edge on chances, although they were grateful to David de Gea for a fine double save from Saka and Matteo Guendouzi in the first half
Event:
Scott McTominay (Manchester United) wins a free kic

('In a disappointing encounter that was a pale shadow of their mighty clashes of years gone by, Scott McTominay gave Manchester United the lead at the end of an attritional first-half with a rising drive from the edge of the area.\nArsenal equalised just before the hour courtesy of Pierre-Emerick Aubameyang\'s cool finish.It was originally ruled out for offside but the video assistant referee confirmed the striker had been played onside by Harry Maguire.\nGoalkeeper Bernd Leno excelled for Arsenal with fine saves from Maguire and Marcus Rashford\'s late free-kick, while Bukayo Saka\'s goalbound shot crucially struck Victor Lindelof and flew over the top.\nMcTominay also headed a great chance well over from Ashley Young\'s corner but neither side could force a winner.\nThe result takes Arsenal into the top fourManchester United\'s captain was 34-year-old Ashley Young, pressed into defensive service by Aaron Wan-Bissaka\'s absence through injury, and Arsenal were led by Granit Xhaka, new

### Con todos

In [4]:
summaries_path = '{}summaries/key_events_summaries_3.csv'.format(CSV_DATA_PATH)
map_path = '{}summaries/key_events_summaries_map_3.csv'.format(CSV_DATA_PATH)
count_vec_kwargs = {'ngram_range': (1, 2), 'strip_accents': 'unicode'}
n_hubs = 5
fc = 0.5
mode = 'homogeneous'

In [5]:
map_path

'/home/carlos/MasterDS/tfm/data/csv/summaries/key_events_summaries_map_3.csv'

In [6]:
key_events = KeyEventsSummaryGraph()

In [7]:
%%time
pd_df = key_events.run(save_events_sentences=True, path_csv=summaries_path, 
                       path_mapping=map_path, count_vec_kwargs=count_vec_kwargs,
                       n_hubs=n_hubs, fc=fc, mode=mode
                       )

  0%|          | 0/20 [00:00<?, ?it/s]

Hubs: [('attempt', 44), ('the box', 41), ('Napoli', 38), ('the centre', 36), ('Brescia', 32)]
Hubs with sentences: {'attempt': 33, 'the box': 24, 'Napoli': 35, 'the centre': 15, 'Brescia': 32}
The text has 108 events
The semantic graph has 85 nodes
The summary should have 54 sentences with a compression factor of 0.5
There are 89 sentences in the 5 nodes with more degree
Number of original events: 108
Number of processed events: 54
Number of sentences in original article: 12
Number of sentences in summary: 7
Hubs: [('Milan', 46), ('Fiorentina', 38), ('attempt', 32), ('the centre', 31), ('the box', 28)]
Hubs with sentences: {'Milan': 41, 'Fiorentina': 47, 'attempt': 22, 'the centre': 12, 'the box': 16}
The text has 106 events
The semantic graph has 79 nodes
The summary should have 53 sentences with a compression factor of 0.5
There are 89 sentences in the 5 nodes with more degree
Number of original events: 106
Number of processed events: 53
Number of sentences in original article: 6
Num

  5%|▌         | 1/20 [02:05<39:43, 125.47s/it]

Number of sentences in original article: 6
Number of sentences in summary: 4
Could not perform summary for https://www.bbc.com/sport/football/49745022
Hubs: [('Paris Saint Germain', 39), ('the box', 33), ('the centre', 29), ('attempt', 27), ('Real Madrid', 26)]
Hubs with sentences: {'Paris Saint Germain': 47, 'the box': 17, 'the centre': 11, 'attempt': 16, 'Real Madrid': 39}
The text has 102 events
The semantic graph has 69 nodes
The summary should have 51 sentences with a compression factor of 0.5
There are 86 sentences in the 5 nodes with more degree
Number of original events: 102
Number of processed events: 51
Number of sentences in original article: 47
Number of sentences in summary: 12
Hubs: [('Tottenham Hotspur', 54), ('Olympiakos', 45), ('attempt', 34), ('the box', 34), ('the centre', 25)]
Hubs with sentences: {'Tottenham Hotspur': 41, 'Olympiakos': 45, 'attempt': 22, 'the box': 22, 'the centre': 11}
The text has 97 events
The semantic graph has 75 nodes
The summary should have 

 10%|█         | 2/20 [04:34<39:46, 132.59s/it]

Number of sentences in original article: 1
Number of sentences in summary: 1
Hubs: [('Los Angeles Football Club', 39), ('attempt', 35), ('Atlanta United FC', 34), ('the box', 29), ('the left', 26)]
Hubs with sentences: {'Los Angeles Football Club': 42, 'attempt': 23, 'Atlanta United FC': 27, 'the box': 15, 'the left': 10}
The text has 89 events
The semantic graph has 72 nodes
The summary should have 44 sentences with a compression factor of 0.5
There are 74 sentences in the 5 nodes with more degree
Number of original events: 89
Number of processed events: 44
Number of sentences in original article: 11
Number of sentences in summary: 6
Hubs: [('Miami United', 37), ('Orlando City SC', 35), ('attempt', 32), ('a free kick', 25), ('the box', 22)]
Hubs with sentences: {'Miami United': 50, 'Orlando City SC': 52, 'attempt': 17, 'a free kick': 37, 'the box': 12}
The text has 116 events
The semantic graph has 64 nodes
The summary should have 58 sentences with a compression factor of 0.5
There ar

 15%|█▌        | 3/20 [18:07<1:35:23, 336.67s/it]

Number of sentences in original article: 13
Number of sentences in summary: 4
Hubs: [('Alavés', 42), ('Real Madrid', 38), ('attempt', 37), ('the centre', 29), ('the box', 25)]
Hubs with sentences: {'Alavés': 46, 'Real Madrid': 34, 'attempt': 17, 'the centre': 8, 'the box': 8}
The text has 113 events
The semantic graph has 86 nodes
The summary should have 56 sentences with a compression factor of 0.5
There are 85 sentences in the 5 nodes with more degree
Number of original events: 113
Number of processed events: 56
Number of sentences in original article: 13
Number of sentences in summary: 7
Could not perform summary for https://www.bbc.com/sport/football/50537887
Could not perform summary for https://www.bbc.com/sport/football/50537904
Could not perform summary for https://www.bbc.com/sport/football/50537911
Could not perform summary for https://www.bbc.com/sport/football/50537918
Could not perform summary for https://www.bbc.com/sport/football/50537935
Hubs: [('Barcelona', 42), ('the 

 20%|██        | 4/20 [19:43<1:10:32, 264.54s/it]

Number of sentences in original article: 12
Number of sentences in summary: 7
Hubs: [('Inter Milan', 59), ('Sampdoria', 40), ('attempt', 33), ('the box', 32), ('footed shot', 29)]
Hubs with sentences: {'Inter Milan': 61, 'Sampdoria': 38, 'attempt': 28, 'the box': 22, 'footed shot': 9}
The text has 116 events
The semantic graph has 77 nodes
The summary should have 58 sentences with a compression factor of 0.5
There are 99 sentences in the 5 nodes with more degree
Number of original events: 116
Number of processed events: 58
Number of sentences in original article: 9
Number of sentences in summary: 4
Hubs: [('attempt', 41), ('the box', 38), ('Napoli', 37), ('Crotone', 33), ('footed shot', 24)]
Hubs with sentences: {'attempt': 26, 'the box': 22, 'Napoli': 41, 'Crotone': 26, 'footed shot': 10}
The text has 101 events
The semantic graph has 76 nodes
The summary should have 50 sentences with a compression factor of 0.5
There are 82 sentences in the 5 nodes with more degree
Number of original

 25%|██▌       | 5/20 [21:31<54:22, 217.50s/it]  

Number of sentences in original article: 12
Number of sentences in summary: 9
Hubs: [('the box', 52), ('attempt', 50), ('fsv mainz', 47), ('FC Bayern München', 46), ('the centre', 41)]
Hubs with sentences: {'the box': 32, 'attempt': 33, 'fsv mainz': 32, 'FC Bayern München': 26, 'the centre': 21}
The text has 102 events
The semantic graph has 81 nodes
The summary should have 51 sentences with a compression factor of 0.5
There are 67 sentences in the 5 nodes with more degree
Number of original events: 102
Number of processed events: 51
Number of sentences in original article: 16
Number of sentences in summary: 9
Hubs: [('FC Augsburg', 47), ('Borussia Dortmund', 38), ('the centre', 36), ('attempt', 34), ('the box', 33)]
Hubs with sentences: {'FC Augsburg': 56, 'Borussia Dortmund': 47, 'the centre': 15, 'attempt': 20, 'the box': 19}
The text has 131 events
The semantic graph has 76 nodes
The summary should have 66 sentences with a compression factor of 0.5
There are 106 sentences in the 5 

 30%|███       | 6/20 [22:41<40:27, 173.38s/it]

Number of sentences in original article: 12
Number of sentences in summary: 6
Hubs: [('Toronto FC', 40), ('attempt', 35), ('the box', 31), ('D.C. United', 30), ('Sebastian Giovinco', 27)]
Hubs with sentences: {'Toronto FC': 54, 'attempt': 22, 'the box': 20, 'D.C. United': 35, 'Sebastian Giovinco': 16}
The text has 103 events
The semantic graph has 66 nodes
The summary should have 52 sentences with a compression factor of 0.5
There are 94 sentences in the 5 nodes with more degree
Number of original events: 103
Number of processed events: 52
Number of sentences in original article: 21
Number of sentences in summary: 7
Hubs: [('New York Red Bulls', 40), ('D.C. United', 39), ('the box', 39), ('attempt', 39), ('the centre', 36)]
Hubs with sentences: {'New York Red Bulls': 44, 'D.C. United': 40, 'the box': 21, 'attempt': 26, 'the centre': 14}
The text has 108 events
The semantic graph has 70 nodes
The summary should have 54 sentences with a compression factor of 0.5
There are 89 sentences in



Hubs: [('lineups', 1), ('players', 1), ('Second Half', 1), ('real salt lake', 1)]
Hubs with sentences: {'lineups': 1, 'players': 1, 'Second Half': 2, 'real salt lake': 1}
The text has 6 events
The semantic graph has 4 nodes
The summary should have 3 sentences with a compression factor of 0.5
There are 3 sentences in the 4 nodes with more degree
Number of original events: 6
Number of processed events: 3
Number of sentences in original article: 76
Number of sentences in summary: 2
Hubs: [('attempt', 40), ('Columbus Crew SC', 37), ('the box', 35), ('Philadelphia Union', 34), ('the centre', 22)]
Hubs with sentences: {'attempt': 19, 'Columbus Crew SC': 28, 'the box': 16, 'Philadelphia Union': 28, 'the centre': 6}
The text has 75 events
The semantic graph has 64 nodes
The summary should have 38 sentences with a compression factor of 0.5
There are 60 sentences in the 5 nodes with more degree
Number of original events: 75
Number of processed events: 38
Number of sentences in original article: 



Hubs: [('lineups', 1), ('players', 1), ('Second Half', 1), ('Puerto Rico', 1)]
Hubs with sentences: {'lineups': 1, 'players': 1, 'Second Half': 2, 'Puerto Rico': 1}
The text has 6 events
The semantic graph has 4 nodes
The summary should have 3 sentences with a compression factor of 0.5
There are 3 sentences in the 4 nodes with more degree
Number of original events: 6
Number of processed events: 3
Number of sentences in original article: 15
Number of sentences in summary: 2
Hubs: [('attempt', 35), ('Belize', 31), ('the box', 31), ('Canada', 29), ('the centre', 20)]
Hubs with sentences: {'attempt': 26, 'Belize': 28, 'the box': 20, 'Canada': 37, 'the centre': 8}
The text has 82 events
The semantic graph has 62 nodes
The summary should have 41 sentences with a compression factor of 0.5
There are 71 sentences in the 5 nodes with more degree
Number of original events: 82
Number of processed events: 41
Number of sentences in original article: 25
Number of sentences in summary: 8
Hubs: [('goal



Hubs: [('lineups', 1), ('players', 1), ('Second Half', 1), ('sporting kansas city', 1)]
Hubs with sentences: {'lineups': 1, 'players': 1, 'Second Half': 2, 'sporting kansas city': 1}
The text has 6 events
The semantic graph has 4 nodes
The summary should have 3 sentences with a compression factor of 0.5
There are 3 sentences in the 4 nodes with more degree
Number of original events: 6
Number of processed events: 3
Number of sentences in original article: 76
Number of sentences in summary: 2
Hubs: [('attempt', 35), ('FC Edmonton', 33), ('Vancouver Whitecaps FC', 30), ('the box', 29), ('a cross', 24)]
Hubs with sentences: {'attempt': 13, 'FC Edmonton': 31, 'Vancouver Whitecaps FC': 43, 'the box': 10, 'a cross': 6}
The text has 97 events
The semantic graph has 67 nodes
The summary should have 48 sentences with a compression factor of 0.5
There are 80 sentences in the 5 nodes with more degree
Number of original events: 97
Number of processed events: 48
Number of sentences in original artic

 35%|███▌      | 7/20 [35:27<1:16:04, 351.12s/it]

Number of sentences in original article: 18
Number of sentences in summary: 7
Hubs: [('the box', 42), ('attempt', 37), ('FC Bayern München', 35), ('SC Paderborn', 32), ('the centre', 32)]
Hubs with sentences: {'the box': 24, 'attempt': 23, 'FC Bayern München': 25, 'SC Paderborn': 18, 'the centre': 13}
The text has 88 events
The semantic graph has 65 nodes
The summary should have 44 sentences with a compression factor of 0.5
There are 48 sentences in the 5 nodes with more degree
Number of original events: 88
Number of processed events: 44
Number of sentences in original article: 4
Number of sentences in summary: 3
Hubs: [('attempt', 40), ('the box', 38), ('SV Werder Bremen', 35), ('borussia dortmund', 31), ('Borussia Dortmund', 30)]
Hubs with sentences: {'attempt': 25, 'the box': 23, 'SV Werder Bremen': 33, 'borussia dortmund': 20, 'Borussia Dortmund': 27}
The text has 92 events
The semantic graph has 72 nodes
The summary should have 46 sentences with a compression factor of 0.5
There a

 40%|████      | 8/20 [36:46<53:51, 269.33s/it]  

Number of sentences in original article: 14
Number of sentences in summary: 2
Hubs: [('Seattle Sounders FC', 40), ('attempt', 37), ('the centre', 32), ('the box', 31), ('Nicolás Lodeiro', 25)]
Hubs with sentences: {'Seattle Sounders FC': 33, 'attempt': 23, 'the centre': 12, 'the box': 16, 'Nicolás Lodeiro': 11}
The text has 84 events
The semantic graph has 69 nodes
The summary should have 42 sentences with a compression factor of 0.5
There are 51 sentences in the 5 nodes with more degree
Number of original events: 84
Number of processed events: 42
Number of sentences in original article: 23
Number of sentences in summary: 6
Hubs: [('Houston Dynamo', 41), ('attempt', 41), ('the box', 40), ('Vancouver Whitecaps FC', 28), ('the centre', 23)]
Hubs with sentences: {'Houston Dynamo': 44, 'attempt': 25, 'the box': 23, 'Vancouver Whitecaps FC': 34, 'the centre': 7}
The text has 107 events
The semantic graph has 67 nodes
The summary should have 54 sentences with a compression factor of 0.5
Ther

 45%|████▌     | 9/20 [52:23<1:26:05, 469.56s/it]

Number of sentences in original article: 23
Number of sentences in summary: 9
Hubs: [('Sporting Kansas City', 39), ('San Jose Earthquakes', 33), ('attempt', 32), ('the box', 29), ('the centre', 20)]
Hubs with sentences: {'Sporting Kansas City': 37, 'San Jose Earthquakes': 31, 'attempt': 19, 'the box': 17, 'the centre': 7}
The text has 88 events
The semantic graph has 66 nodes
The summary should have 44 sentences with a compression factor of 0.5
There are 72 sentences in the 5 nodes with more degree
Number of original events: 88
Number of processed events: 44
Number of sentences in original article: 4
Number of sentences in summary: 2
Hubs: [('Atlanta United FC', 47), ('Colorado Rapids', 41), ('attempt', 39), ('the box', 31), ('the centre', 30)]
Hubs with sentences: {'Atlanta United FC': 53, 'Colorado Rapids': 42, 'attempt': 24, 'the box': 19, 'the centre': 14}
The text has 112 events
The semantic graph has 70 nodes
The summary should have 56 sentences with a compression factor of 0.5
T



Could not perform summary for https://matchcenter.mlssoccer.com/matchcenter/2017-11-05-toronto-fc-vs-new-york-red-bulls/feed
Hubs: [('FC Dallas', 35), ('Portland Timbers', 34), ('attempt', 33), ('the right', 22), ('the box', 20)]
Hubs with sentences: {'FC Dallas': 45, 'Portland Timbers': 42, 'attempt': 19, 'the right': 6, 'the box': 11}
The text has 98 events
The semantic graph has 63 nodes
The summary should have 49 sentences with a compression factor of 0.5
There are 88 sentences in the 5 nodes with more degree
Number of original events: 98
Number of processed events: 49
Number of sentences in original article: 4
Number of sentences in summary: 4
Hubs: [('attempt', 37), ('Philadelphia Union', 34), ('the box', 33), ('the centre', 32), ('Chicago Fire', 32)]
Hubs with sentences: {'attempt': 23, 'Philadelphia Union': 34, 'the box': 18, 'the centre': 13, 'Chicago Fire': 34}
The text has 81 events
The semantic graph has 58 nodes
The summary should have 40 sentences with a compression facto

 50%|█████     | 10/20 [1:06:31<1:37:10, 583.09s/it]

Number of sentences in original article: 4
Number of sentences in summary: 4
Hubs: [('espanyol', 37), ('Celta de Vigo', 35), ('the box', 35), ('attempt', 32), ('ball', 25)]
Hubs with sentences: {'espanyol': 33, 'Celta de Vigo': 40, 'the box': 16, 'attempt': 17, 'ball': 9}
The text has 101 events
The semantic graph has 70 nodes
The summary should have 50 sentences with a compression factor of 0.5
There are 83 sentences in the 5 nodes with more degree
Number of original events: 101
Number of processed events: 50
Number of sentences in original article: 8
Number of sentences in summary: 6
Hubs: [('attempt', 43), ('Barcelona', 42), ('the box', 38), ('Leganés', 37), ('the centre', 32)]
Hubs with sentences: {'attempt': 26, 'Barcelona': 51, 'the box': 22, 'Leganés': 47, 'the centre': 14}
The text has 127 events
The semantic graph has 77 nodes
The summary should have 64 sentences with a compression factor of 0.5
There are 111 sentences in the 5 nodes with more degree
Number of original events:

 55%|█████▌    | 11/20 [1:08:48<1:07:24, 449.43s/it]

Number of sentences in original article: 17
Number of sentences in summary: 6
Hubs: [('attempt', 47), ('the box', 45), ('Everton', 45), ('Arsenal', 44), ('the centre', 43)]
Hubs with sentences: {'attempt': 35, 'the box': 31, 'Everton': 57, 'Arsenal': 37, 'the centre': 22}
The text has 122 events
The semantic graph has 88 nodes
The summary should have 61 sentences with a compression factor of 0.5
There are 109 sentences in the 5 nodes with more degree
Number of original events: 122
Number of processed events: 61
Number of sentences in original article: 5
Number of sentences in summary: 5
Hubs: [('West Ham United', 44), ('Burnley', 32), ('attempt', 31), ('the box', 27), ('right footed shot', 23)]
Hubs with sentences: {'West Ham United': 48, 'Burnley': 35, 'attempt': 20, 'the box': 17, 'right footed shot': 9}
The text has 97 events
The semantic graph has 71 nodes
The summary should have 48 sentences with a compression factor of 0.5
There are 85 sentences in the 5 nodes with more degree
Nu

 60%|██████    | 12/20 [1:19:59<1:08:46, 515.80s/it]

Number of sentences in original article: 6
Number of sentences in summary: 5
Hubs: [('fc schalke', 57), ('the box', 42), ('leverkusen', 39), ('attempt', 37), ('the centre', 32)]
Hubs with sentences: {'fc schalke': 67, 'the box': 21, 'leverkusen': 26, 'attempt': 23, 'the centre': 13}
The text has 133 events
The semantic graph has 79 nodes
The summary should have 66 sentences with a compression factor of 0.5
There are 94 sentences in the 5 nodes with more degree
Number of original events: 133
Number of processed events: 66
Number of sentences in original article: 8
Number of sentences in summary: 4
Hubs: [('FC Bayern München', 38), ('the box', 37), ('fc bayern münchen', 35), ('attempt', 34), ('FC Nürnberg', 31)]
Hubs with sentences: {'FC Bayern München': 26, 'the box': 22, 'fc bayern münchen': 25, 'attempt': 21, 'FC Nürnberg': 24}
The text has 83 events
The semantic graph has 71 nodes
The summary should have 42 sentences with a compression factor of 0.5
There are 73 sentences in the 5 no

 65%|██████▌   | 13/20 [1:21:01<44:18, 379.73s/it]  

Number of sentences in original article: 5
Number of sentences in summary: 4
Hubs: [('Lazio', 61), ('Sampdoria', 44), ('the centre', 37), ('attempt', 35), ('the box', 35)]
Hubs with sentences: {'Lazio': 63, 'Sampdoria': 43, 'the centre': 19, 'attempt': 30, 'the box': 22}
The text has 134 events
The semantic graph has 86 nodes
The summary should have 67 sentences with a compression factor of 0.5
There are 108 sentences in the 5 nodes with more degree
Number of original events: 134
Number of processed events: 67
Number of sentences in original article: 8
Number of sentences in summary: 6
Hubs: [('Inter Milan', 39), ('Juventus', 37), ('the box', 37), ('attempt', 35), ('the centre', 31)]
Hubs with sentences: {'Inter Milan': 55, 'Juventus': 51, 'the box': 23, 'attempt': 28, 'the centre': 17}
The text has 123 events
The semantic graph has 67 nodes
The summary should have 62 sentences with a compression factor of 0.5
There are 116 sentences in the 5 nodes with more degree
Number of original e

 70%|███████   | 14/20 [1:22:53<29:57, 299.52s/it]

Number of sentences in original article: 8
Number of sentences in summary: 7
Hubs: [('Manchester United', 45), ('Arsenal', 40), ('attempt', 39), ('the box', 37), ('Bukayo Saka', 24)]
Hubs with sentences: {'Manchester United': 54, 'Arsenal': 46, 'attempt': 24, 'the box': 20, 'Bukayo Saka': 12}
The text has 125 events
The semantic graph has 76 nodes
The summary should have 62 sentences with a compression factor of 0.5
There are 106 sentences in the 5 nodes with more degree
Number of original events: 125
Number of processed events: 62
Number of sentences in original article: 37
Number of sentences in summary: 12
Hubs: [('Leicester City', 51), ('attempt', 34), ('the box', 30), ('Newcastle United', 30), ('the centre', 24)]
Hubs with sentences: {'Leicester City': 41, 'attempt': 12, 'the box': 11, 'Newcastle United': 25, 'the centre': 6}
The text has 83 events
The semantic graph has 70 nodes
The summary should have 42 sentences with a compression factor of 0.5
There are 67 sentences in the 5 

 75%|███████▌  | 15/20 [1:30:25<28:46, 345.28s/it]

Number of sentences in original article: 36
Number of sentences in summary: 5
Hubs: [('Manchester City', 47), ('brighton', 47), ('attempt', 38), ('the box', 37), ('hove', 37)]
Hubs with sentences: {'Manchester City': 45, 'brighton': 17, 'attempt': 21, 'the box': 19, 'hove': 9}
The text has 89 events
The semantic graph has 79 nodes
The summary should have 44 sentences with a compression factor of 0.5
There are 67 sentences in the 5 nodes with more degree
Number of original events: 89
Number of processed events: 44
Number of sentences in original article: 6
Number of sentences in summary: 3
Hubs: [('attempt', 43), ('Arsenal', 40), ('the box', 38), ('Burnley', 37), ('a cross', 29)]
Hubs with sentences: {'attempt': 25, 'Arsenal': 26, 'the box': 19, 'Burnley': 30, 'a cross': 8}
The text has 92 events
The semantic graph has 80 nodes
The summary should have 46 sentences with a compression factor of 0.5
There are 73 sentences in the 5 nodes with more degree
Number of original events: 92
Number

 80%|████████  | 16/20 [1:41:25<29:18, 439.72s/it]

Number of sentences in original article: 6
Number of sentences in summary: 3
Hubs: [('Paris Saint Germain', 50), ('attempt', 35), ('the box', 35), ('the centre', 34), ('Amiens', 29)]
Hubs with sentences: {'Paris Saint Germain': 59, 'attempt': 25, 'the box': 23, 'the centre': 18, 'Amiens': 37}
The text has 106 events
The semantic graph has 68 nodes
The summary should have 53 sentences with a compression factor of 0.5
There are 96 sentences in the 5 nodes with more degree
Number of original events: 106
Number of processed events: 53
Number of sentences in original article: 9
Number of sentences in summary: 5
Hubs: [('Paris Saint Germain', 45), ('the box', 43), ('the centre', 42), ('attempt', 39), ('St Etienne', 35)]
Hubs with sentences: {'Paris Saint Germain': 39, 'the box': 26, 'the centre': 20, 'attempt': 27, 'St Etienne': 26}
The text has 87 events
The semantic graph has 75 nodes
The summary should have 44 sentences with a compression factor of 0.5
There are 70 sentences in the 5 node

 85%|████████▌ | 17/20 [1:42:08<16:01, 320.52s/it]

Number of sentences in original article: 19
Number of sentences in summary: 5
Hubs: [('Barcelona', 51), ('the box', 40), ('the centre', 39), ('Huesca', 38), ('attempt', 37)]
Hubs with sentences: {'Barcelona': 54, 'the box': 25, 'the centre': 15, 'Huesca': 30, 'attempt': 26}
The text has 108 events
The semantic graph has 83 nodes
The summary should have 54 sentences with a compression factor of 0.5
There are 89 sentences in the 5 nodes with more degree
Number of original events: 108
Number of processed events: 54
Number of sentences in original article: 11
Number of sentences in summary: 5
Hubs: [('Valencia', 41), ('attempt', 33), ('Real Valladolid', 31), ('the box', 30), ('the centre', 25)]
Hubs with sentences: {'Valencia': 55, 'attempt': 20, 'Real Valladolid': 39, 'the box': 16, 'the centre': 13}
The text has 120 events
The semantic graph has 72 nodes
The summary should have 60 sentences with a compression factor of 0.5
There are 99 sentences in the 5 nodes with more degree
Number of 

 90%|█████████ | 18/20 [1:44:02<08:37, 258.60s/it]

Number of sentences in original article: 8
Number of sentences in summary: 5
Hubs: [('Bournemouth', 43), ('Burnley', 40), ('the box', 40), ('attempt', 39), ('the centre', 33)]
Hubs with sentences: {'Bournemouth': 46, 'Burnley': 38, 'the box': 21, 'attempt': 23, 'the centre': 14}
The text has 108 events
The semantic graph has 69 nodes
The summary should have 54 sentences with a compression factor of 0.5
There are 89 sentences in the 5 nodes with more degree
Number of original events: 108
Number of processed events: 54
Number of sentences in original article: 6
Number of sentences in summary: 5
Hubs: [('Crystal Palace', 43), ('attempt', 35), ('West Bromwich Albion', 33), ('the box', 27), ('the left', 21)]
Hubs with sentences: {'Crystal Palace': 41, 'attempt': 16, 'West Bromwich Albion': 31, 'the box': 11, 'the left': 6}
The text has 85 events
The semantic graph has 70 nodes
The summary should have 42 sentences with a compression factor of 0.5
There are 74 sentences in the 5 nodes with mo

 95%|█████████▌| 19/20 [1:54:45<06:13, 373.92s/it]

Number of sentences in original article: 5
Number of sentences in summary: 5
Hubs: [('D.C. United', 44), ('attempt', 40), ('FC Cincinnati', 37), ('the box', 35), ('the centre', 33)]
Hubs with sentences: {'D.C. United': 61, 'attempt': 39, 'FC Cincinnati': 31, 'the box': 30, 'the centre': 21}
The text has 110 events
The semantic graph has 74 nodes
The summary should have 55 sentences with a compression factor of 0.5
There are 100 sentences in the 5 nodes with more degree
Number of original events: 110
Number of processed events: 55
Number of sentences in original article: 7
Number of sentences in summary: 3
Hubs: [('Toronto FC', 44), ('D.C. United', 37), ('attempt', 35), ('the box', 33), ('Wayne Rooney', 27)]
Hubs with sentences: {'Toronto FC': 43, 'D.C. United': 38, 'attempt': 22, 'the box': 18, 'Wayne Rooney': 11}
The text has 97 events
The semantic graph has 71 nodes
The summary should have 48 sentences with a compression factor of 0.5
There are 85 sentences in the 5 nodes with more d

100%|██████████| 20/20 [2:10:03<00:00, 390.19s/it]

Number of sentences in original article: 7
Number of sentences in summary: 3





Saving summaries in /home/carlos/MasterDS/tfm/data/csv/summaries/key_events_summaries_3.csv
Saving mappings in /home/carlos/MasterDS/tfm/data/csv/summaries/key_events_summaries_map_3.csv
CPU times: user 2h 13min 24s, sys: 5min 12s, total: 2h 18min 37s
Wall time: 2h 10min 7s


## Resultados

__Resúmenes__

In [7]:
pd_df_sum = pd.read_csv(summaries_path)

In [8]:
pd_df_sum

Unnamed: 0,json_file,url,summary,article_sentences,article_sentences_ix,events_mapping
0,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49871134,Mario Balotelli scored his first Serie A goal ...,['Mario Balotelli scored his first Serie A goa...,[ 0 10 0 2 1 6 6 6 6 6 1 1 6 0 2 ...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 1..."
1,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49873763,Fiorentina condemned AC Milan to a third conse...,['Fiorentina condemned AC Milan to a third con...,[0 3 2 5 2 2 0 0 2 2 2 1 1 1 3 0 2 5 1 1 1 5 5...,"[2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, ..."
2,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49865134,Cristiano Ronaldo returned from injury with a ...,['Cristiano Ronaldo returned from injury with ...,[0 0 0 0 3 0 3 1 2 0 0 3 3 3 0 0 3 0 0 0 0 0 2...,"[5, 6, 7, 8, 11, 15, 16, 17, 19, 21, 23, 26, 2..."
3,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49866723,Alexis Sanchez scored and was sent off on his ...,['Alexis Sanchez scored and was sent off on hi...,[4 0 0 1 1 0 1 0 0 1 0 0 1 0 3 0 0 4 0 0 1 1 1...,"[3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18,..."
4,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49832309,On-loan Manchester United defender Chris Small...,['On-loan Manchester United defender Chris Sma...,[4 4 4 4 4 3 4 4 2 4 4 3 3 0 4 3 4 4 4 4 4 4 4...,"[2, 4, 9, 12, 14, 15, 16, 17, 18, 20, 21, 22, ..."
...,...,...,...,...,...,...
4371,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,After lacking the final product for much of 11...,['After lacking the final product for much of ...,[1 1 4 1 1 3 1 3 3 8 0 3 1 3 1 1 8 1 2 1 1 1 1...,"[2, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18..."
4372,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,Jamiro Monteiro scored his second MLS goal on ...,['Jamiro Monteiro scored his second MLS goal\x...,[1 1 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 0 3 3 3 1 1...,"[2, 5, 7, 9, 10, 11, 13, 14, 16, 17, 22, 23, 2..."
4373,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,Nelson Bonilla's goal in first-half stoppage t...,"[""Nelson Bonilla's goal in first-half stoppage...",[ 6 0 6 6 20 6 7 7 0 6 20 9 6 8 0 ...,"[2, 4, 6, 9, 12, 13, 14, 16, 17, 18, 19, 21, 2..."
4374,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,Alphonso Davies quick-kicked the ball to Jonat...,"[""Haiti came back from a two-goal halftime def...",[ 9 9 10 8 3 10 9 4 3 9 6 3 3 3 6 ...,"[2, 3, 6, 9, 11, 12, 13, 14, 15, 16, 17, 18, 1..."


__Relación evento-frase__

In [9]:
pd_df_map = pd.read_csv(map_path)

In [10]:
pd_df_map

Unnamed: 0,event,article_sentence
0,Attempt blocked. Mario Balotelli (Brescia) rig...,Mario Balotelli scored his first Serie A goal ...
1,"Corner, Napoli. Conceded by Stefano Sabelli.",Watford striker Stefano Okaka scored the winne...
2,Attempt missed. Mario Balotelli (Brescia) left...,Mario Balotelli scored his first Serie A goal ...
3,Attempt saved. Mario Balotelli (Brescia) right...,Brescia rallied after the break but Sandro Ton...
4,"Goal! Napoli 1, Brescia 0. Dries Mertens (Nap...",Dries Mertens opened the scoring with his four...
...,...,...
111401,Attempt saved. Rodolfo Zelaya (Los Angeles Foo...,"The Rapids' lone goal came on a corner kick, a..."
111402,Attempt blocked. Keegan Rosenberry (Colorado R...,The Colorado Rapids are on a roll.
111403,Attempt missed. Rodolfo Zelaya (Los Angeles Fo...,"The Rapids' lone goal came on a corner kick, a..."
111404,Attempt blocked. Mohamed El-Munir (Los Angeles...,The Colorado Rapids are on a roll.


In [71]:
pd_df_map.iloc[89985]['event']

'Goal!  Real Sociedad 1, Barcelona 0. Aritz Elustondo (Real Sociedad) left footed shot from the left side of the box to the bottom left corner following a set piece situation.'

In [72]:
pd_df_map.iloc[89985]['article_sentence']

'The champions fell behind following a thumping shot from Aritz Elustondo in the 12th minute from a set piece.'

In [37]:
pd_df_goal = pd_df_map[pd_df_map['event'].str.contains('goal') &
                       ~pd_df_map['event'].str.contains('attempt') &
                       ~pd_df_map['event'].str.contains('Attempt')]  

In [35]:
len(pd_df_goal)

1044

In [44]:
pd_df_goal.iloc[600]['event']

'Goal!  Watford 0, Chelsea 2. Christian Pulisic (Chelsea) right footed shot from very close range to the centre of the goal. Assisted by Tammy Abraham.'

In [45]:
pd_df_goal.iloc[600]['article_sentence']

"In blustery conditions, the visitors opened the scoring after five minutes when Jorginho's delicious pass allowed Tammy Abraham to lift in his 10th goal of the season.\n"

In [47]:
pd_df_goal.iloc[601]['event']

'Goal!  Newcastle United 1, Wolverhampton Wanderers 0. Jamaal Lascelles (Newcastle United) header from very close range to the high centre of the goal. Assisted by Federico Fernández with a cross.'

In [48]:
pd_df_goal.iloc[601]['article_sentence']

"Magpies captain Jamaal Lascelles scored the opener with a powerful near-post header from Federico Fernandez's cross to light up what had been a largely dour first half.\n"