In [1]:
MAIN_PATH = "/home/carlos/MasterDS/tfm"
JSON_DATA_PATH = '{}/data/json/'.format(MAIN_PATH)
CSV_DATA_PATH = '{}/data/csv/'.format(MAIN_PATH)

In [2]:
import sys
sys.path.insert(0, MAIN_PATH)

In [3]:
%load_ext autoreload
%autoreload 2
from scripts.text.semantic_graph import SemanticGraph
from scripts.text.article_text_processor import ArticleTextProcessor
from scripts.extractive_summary.key_events_graph import KeyEventsSummaryGraph

from scripts.conf import TEAMS

import pandas as pd

%reload_ext autoreload

# Extracción de eventos importantes usando grafos conceptuales

El objetivo de este experimento es utilizar los eventos de los partidos para crear un grafo conceptual, y extraer a partir de este los eventos 
más importantes (atendiendo al grado de los nodos, que serán palabras/entidades nombradas/sujetos nominales).

## Probando con eventos de un partido

In [4]:
processor = ArticleTextProcessor()
key_events = KeyEventsSummaryGraph(only_players=True)

In [5]:
all_files = processor.load_json()

In [6]:
season_file = 'premier_league_2019_2020.json'
league_season_teams = TEAMS[season_file.split('.')[0]]
# Solo para pruebas
key_events.league_season_teams = league_season_teams

In [7]:
match = all_files[season_file]['https://www.bbc.com/sport/football/49791610']

In [8]:
event_list = match['events']

In [10]:
event_list

['Foul by Matteo Guendouzi (Arsenal).',
 'Scott McTominay (Manchester United) wins a free kick in the defensive half.',
 'Granit Xhaka (Arsenal) wins a free kick in the defensive half.',
 'Foul by Jesse Lingard (Manchester United).',
 'Corner,  Arsenal. Conceded by Axel Tuanzebe.',
 'Hand ball by Bukayo Saka (Arsenal).',
 'Corner,  Manchester United. Conceded by Calum Chambers.',
 'Foul by Calum Chambers (Arsenal).',
 'Daniel James (Manchester United) wins a free kick on the left wing.',
 'Calum Chambers (Arsenal) is shown the yellow card for a bad foul.',
 'Offside, Manchester United. Ashley Young tries a through ball, but Harry Maguire is caught offside.',
 'Hand ball by Andreas Pereira (Manchester United).',
 'Offside, Arsenal. Bernd Leno tries a through ball, but Bukayo Saka is caught offside.',
 'Pierre-Emerick Aubameyang (Arsenal) wins a free kick on the left wing.',
 'Foul by Paul Pogba (Manchester United).',
 'Bukayo Saka (Arsenal) wins a free kick in the attacking half.',
 'Fo

In [9]:
semantic_graph = SemanticGraph(event_list)
g = semantic_graph.create_graph()

In [11]:
hubs_sentences = semantic_graph.get_n_hubs_sentences(n=10)

Hubs: [('Manchester United', 45), ('Arsenal', 40), ('attempt', 39), ('the box', 37), ('Bukayo Saka', 24), ('ball', 23), ('footed shot', 23), ('the centre', 23), ('Pierre-Emerick Aubameyang', 22), ('manchester united', 20)]


In [146]:
hubs_sentences

OrderedDict([('Manchester United',
              [(3,
                'Scott McTominay (Manchester United) wins a free kick in the defensive half.'),
               (5, 'Foul by Jesse Lingard (Manchester United).'),
               (8, 'Corner,  Manchester United. Conceded by Calum Chambers.'),
               (10,
                'Daniel James (Manchester United) wins a free kick on the left wing.'),
               (13, 'Hand ball by Andreas Pereira (Manchester United).'),
               (16, 'Foul by Paul Pogba (Manchester United).'),
               (18, 'Foul by Jesse Lingard (Manchester United).'),
               (20, 'Foul by Andreas Pereira (Manchester United).'),
               (22, 'Foul by Ashley Young (Manchester United).'),
               (24, 'Foul by Marcus Rashford (Manchester United).'),
               (25,
                'Marcus Rashford (Manchester United) is shown the yellow card for a bad foul.'),
               (27,
                'Axel Tuanzebe (Manchester United) 

In [9]:
count_vec_kwargs = {'ngram_range': (1, 2), 'strip_accents': 'unicode'}
n_hubs = 10
fc = 0.5
mode = 'homogeneous'

In [13]:
processed_events = key_events.process_match_events(event_list, n_hubs, fc, mode)

Hubs: [('bukayo saka', 5), ('pierre-emerick aubameyang', 4), ('joe willock', 4), ('scott mctominay', 3), ('ashley young', 2), ('nicolas pépé', 2), ('marcus rashford', 2), ('dani ceballos', 2), ('andreas pereira', 2), ('harry maguire', 1)]
Hubs with sentences: {'bukayo saka': 12, 'pierre-emerick aubameyang': 7, 'joe willock': 4, 'scott mctominay': 10, 'ashley young': 8, 'nicolas pépé': 2, 'marcus rashford': 6, 'dani ceballos': 4, 'andreas pereira': 2, 'harry maguire': 4}
The text has 119 events
The semantic graph has 17 nodes
The summary should have 60 sentences with a compression factor of 0.5
There are 47 sentences in the 10 nodes with more degree
Number of original events: 119
Number of processed events: 47


In [12]:
key_events.match_summary(match, count_vec_kwargs, save_relations=True, verbose=True, n_hubs=10, 
                         fc=fc, mode=mode)

Hubs: [('bukayo saka', 5), ('pierre-emerick aubameyang', 4), ('joe willock', 4), ('scott mctominay', 3), ('ashley young', 2), ('nicolas pépé', 2), ('marcus rashford', 2), ('dani ceballos', 2), ('andreas pereira', 2), ('harry maguire', 1)]
Hubs with sentences: {'bukayo saka': 12, 'pierre-emerick aubameyang': 7, 'joe willock': 4, 'scott mctominay': 10, 'ashley young': 8, 'nicolas pépé': 2, 'marcus rashford': 6, 'dani ceballos': 4, 'andreas pereira': 2, 'harry maguire': 4}
The text has 119 events
The semantic graph has 17 nodes
The summary should have 60 sentences with a compression factor of 0.5
There are 47 sentences in the 10 nodes with more degree
Number of original events: 119
Number of processed events: 47
Number of sentences in original article: 37
Number of sentences in summary: 8
Event:
Scott McTominay (Manchester United) wins a free kick in the defensive half.
Nearest sentence in article:
In a disappointing encounter that was a pale shadow of their mighty clashes of years gone b

{'article_summary': "Manchester United and Arsenal played out a grim stalemate at Old Trafford that provided compelling evidence to illustrate just how far away from a Premier League challenge both clubs are.\nIn a disappointing encounter that was a pale shadow of their mighty clashes of years gone by, Scott McTominay gave Manchester United the lead at the end of an attritional first-half with a rising drive from the edge of the area.\nArsenal equalised just before the hour courtesy of Pierre-Emerick Aubameyang's cool finish.It was originally ruled out for offside but the video assistant referee confirmed the striker had been played onside by Harry Maguire.\nGoalkeeper Bernd Leno excelled for Arsenal with fine saves from Maguire and Marcus Rashford's late free-kick, while Bukayo Saka's goalbound shot crucially struck Victor Lindelof and flew over the top.\nMcTominay also headed a great chance well over from Ashley Young's corner but neither side could force a winner.\nsigning Nicholas 

### Con todos

In [14]:
summaries_path = '{}summaries/key_events_summaries_graph_5.csv'.format(CSV_DATA_PATH)
map_path = '{}summaries/key_events_summaries_graph_5_map.csv'.format(CSV_DATA_PATH)
count_vec_kwargs = {'ngram_range': (1, 2), 'strip_accents': 'unicode'}
n_hubs = 10
fc = 0.25
mode = 'homogeneous'

In [15]:
map_path

'/home/carlos/MasterDS/tfm/data/csv/summaries/key_events_summaries_graph_4_map.csv'

In [15]:
key_events = KeyEventsSummaryGraph(only_players=True)

In [None]:
%%time
pd_df = key_events.run(save_events_sentences=True, path_csv=summaries_path, 
                       path_mapping=map_path, count_vec_kwargs=count_vec_kwargs,
                       n_hubs=n_hubs, fc=fc, mode=mode
                       )

  0%|          | 0/20 [00:00<?, ?it/s]

Hubs: [('sergio agüero', 5), ('riyad mahrez', 5), ('anthony knockaert', 4), ('glenn murray', 4), ('ilkay gündogan', 3), ('hove albion', 3), ('david silva', 3), ('bernardo silva', 2), ('raheem sterling', 2), ('kyle walker', 2)]
Hubs with sentences: {'sergio agüero': 9, 'riyad mahrez': 8, 'anthony knockaert': 9, 'glenn murray': 5, 'ilkay gündogan': 7, 'hove albion': 4, 'david silva': 7, 'bernardo silva': 3, 'raheem sterling': 6, 'kyle walker': 6}
The text has 83 events
The semantic graph has 22 nodes
The summary should have 21 sentences with a compression factor of 0.25
There are 49 sentences in the 10 nodes with more degree
Number of original events: 83
Number of processed events: 21
Number of sentences in original article: 6
Number of sentences in summary: 4
Hubs: [('chris wood', 6), ('ashley barnes', 5), ('alex iwobi', 4), ('joe willock', 3), ('dwight mcneil', 3), ('johann gudmundsson', 3), ('charlie taylor', 2), ('pierre-emerick aubameyang', 2), ('matthew lowton', 2), ('ashley westwo

In [8]:
pd_df['summary_events'].values[0]

'Corner,  Brighton and Hove Albion. Conceded by Aymeric Laporte. Offside, Brighton and Hove Albion. Anthony Knockaert tries a through ball, but Glenn Murray is caught offside. Foul by Anthony Knockaert (Brighton and Hove Albion). Attempt missed. Alireza Jahanbakhsh (Brighton and Hove Albion) right footed shot from outside the box is close, but misses to the left. Assisted by Anthony Knockaert following a fast break. Foul by Yves Bissouma (Brighton and Hove Albion). Foul by Glenn Murray (Brighton and Hove Albion). Glenn Murray (Brighton and Hove Albion) wins a free kick in the defensive half. Attempt blocked. Alireza Jahanbakhsh (Brighton and Hove Albion) right footed shot from outside the box is blocked. Corner,  Brighton and Hove Albion. Conceded by Raheem Sterling. Goal.  Brighton and Hove Albion 1, Manchester City 0. Glenn Murray (Brighton and Hove Albion) header from the left side of the six yard box to the bottom left corner. Assisted by Pascal Groß with a cross following a corner

## Resultados

__Resúmenes__

In [7]:
pd_df_sum = pd.read_csv(summaries_path)

In [8]:
pd_df_sum

Unnamed: 0,json_file,url,summary,article_sentences,article_sentences_ix,events_mapping
0,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49871134,Mario Balotelli scored his first Serie A goal ...,['Mario Balotelli scored his first Serie A goa...,[ 0 10 0 2 1 6 6 6 6 6 1 1 6 0 2 ...,"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 1..."
1,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49873763,Fiorentina condemned AC Milan to a third conse...,['Fiorentina condemned AC Milan to a third con...,[0 3 2 5 2 2 0 0 2 2 2 1 1 1 3 0 2 5 1 1 1 5 5...,"[2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, ..."
2,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49865134,Cristiano Ronaldo returned from injury with a ...,['Cristiano Ronaldo returned from injury with ...,[0 0 0 0 3 0 3 1 2 0 0 3 3 3 0 0 3 0 0 0 0 0 2...,"[5, 6, 7, 8, 11, 15, 16, 17, 19, 21, 23, 26, 2..."
3,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49866723,Alexis Sanchez scored and was sent off on his ...,['Alexis Sanchez scored and was sent off on hi...,[4 0 0 1 1 0 1 0 0 1 0 0 1 0 3 0 0 4 0 0 1 1 1...,"[3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18,..."
4,italian_serie_a_2019_2020.json,https://www.bbc.com/sport/football/49832309,On-loan Manchester United defender Chris Small...,['On-loan Manchester United defender Chris Sma...,[4 4 4 4 4 3 4 4 2 4 4 3 3 0 4 3 4 4 4 4 4 4 4...,"[2, 4, 9, 12, 14, 15, 16, 17, 18, 20, 21, 22, ..."
...,...,...,...,...,...,...
4371,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,After lacking the final product for much of 11...,['After lacking the final product for much of ...,[1 1 4 1 1 3 1 3 3 8 0 3 1 3 1 1 8 1 2 1 1 1 1...,"[2, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18..."
4372,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,Jamiro Monteiro scored his second MLS goal on ...,['Jamiro Monteiro scored his second MLS goal\x...,[1 1 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 0 3 3 3 1 1...,"[2, 5, 7, 9, 10, 11, 13, 14, 16, 17, 22, 23, 2..."
4373,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,Nelson Bonilla's goal in first-half stoppage t...,"[""Nelson Bonilla's goal in first-half stoppage...",[ 6 0 6 6 20 6 7 7 0 6 20 9 6 8 0 ...,"[2, 4, 6, 9, 12, 13, 14, 16, 17, 18, 19, 21, 2..."
4374,mls_2019_2020.json,https://matchcenter.mlssoccer.com/matchcenter/...,Alphonso Davies quick-kicked the ball to Jonat...,"[""Haiti came back from a two-goal halftime def...",[ 9 9 10 8 3 10 9 4 3 9 6 3 3 3 6 ...,"[2, 3, 6, 9, 11, 12, 13, 14, 15, 16, 17, 18, 1..."


__Relación evento-frase__

In [9]:
pd_df_map = pd.read_csv(map_path)

In [10]:
pd_df_map

Unnamed: 0,event,article_sentence
0,Attempt blocked. Mario Balotelli (Brescia) rig...,Mario Balotelli scored his first Serie A goal ...
1,"Corner, Napoli. Conceded by Stefano Sabelli.",Watford striker Stefano Okaka scored the winne...
2,Attempt missed. Mario Balotelli (Brescia) left...,Mario Balotelli scored his first Serie A goal ...
3,Attempt saved. Mario Balotelli (Brescia) right...,Brescia rallied after the break but Sandro Ton...
4,"Goal! Napoli 1, Brescia 0. Dries Mertens (Nap...",Dries Mertens opened the scoring with his four...
...,...,...
111401,Attempt saved. Rodolfo Zelaya (Los Angeles Foo...,"The Rapids' lone goal came on a corner kick, a..."
111402,Attempt blocked. Keegan Rosenberry (Colorado R...,The Colorado Rapids are on a roll.
111403,Attempt missed. Rodolfo Zelaya (Los Angeles Fo...,"The Rapids' lone goal came on a corner kick, a..."
111404,Attempt blocked. Mohamed El-Munir (Los Angeles...,The Colorado Rapids are on a roll.


In [71]:
pd_df_map.iloc[89985]['event']

'Goal!  Real Sociedad 1, Barcelona 0. Aritz Elustondo (Real Sociedad) left footed shot from the left side of the box to the bottom left corner following a set piece situation.'

In [72]:
pd_df_map.iloc[89985]['article_sentence']

'The champions fell behind following a thumping shot from Aritz Elustondo in the 12th minute from a set piece.'

In [37]:
pd_df_goal = pd_df_map[pd_df_map['event'].str.contains('goal') &
                       ~pd_df_map['event'].str.contains('attempt') &
                       ~pd_df_map['event'].str.contains('Attempt')]  

In [35]:
len(pd_df_goal)

1044

In [44]:
pd_df_goal.iloc[600]['event']

'Goal!  Watford 0, Chelsea 2. Christian Pulisic (Chelsea) right footed shot from very close range to the centre of the goal. Assisted by Tammy Abraham.'

In [45]:
pd_df_goal.iloc[600]['article_sentence']

"In blustery conditions, the visitors opened the scoring after five minutes when Jorginho's delicious pass allowed Tammy Abraham to lift in his 10th goal of the season.\n"

In [47]:
pd_df_goal.iloc[601]['event']

'Goal!  Newcastle United 1, Wolverhampton Wanderers 0. Jamaal Lascelles (Newcastle United) header from very close range to the high centre of the goal. Assisted by Federico Fernández with a cross.'

In [48]:
pd_df_goal.iloc[601]['article_sentence']

"Magpies captain Jamaal Lascelles scored the opener with a powerful near-post header from Federico Fernandez's cross to light up what had been a largely dour first half.\n"