### Showcase: reading .connlu and saving to .csv + utilizing NER

##### Data available: https://nl.ijs.si/et/tmp/ParlaMint/Repo/

##### NoSketch Crystal: https://www.clarin.si/ske-beta/ (ask teamleads for user/pass)


##### Notebook requirements:
   - connlu
   - pandas
   - networkx (optional)
   - rapidfuzz (optional)

In [1]:
import networkx as nx
import pandas as pd
import os
from io import open
from conllu import parse_incr
from rapidfuzz import fuzz

In [19]:
def metadata_to_csv(directory):
    
    df = pd.DataFrame()
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if 'meta.tsv' in file:
                file_path = os.path.join(subdir, file)
                df_meta = pd.read_csv(file_path, sep='\t')
                df = pd.concat([df, df_meta])
                
    return df

In [32]:
# conllu utilization of NERs
def speech_to_csv(directory, df):

    conllu_files_path = './data/ParlaMint-RS.conllu/'
    
    ID_to_text = {}

    for path, years, _ in os.walk(conllu_files_path):
        for year in years:
            conllu_files = path + year
            for file in os.listdir(conllu_files):
                if 'conllu' in file:
                    conllu_file = open(conllu_files + '/' + file, "r", encoding="utf-8")

                    speech = ""
                    speech_id = None
                    for sentence in parse_incr(conllu_file):
                        if 'newdoc id' in sentence.metadata:
                            if speech_id != None:
                                ID_to_text[speech_id] = speech.rstrip()
                            speech_id = sentence.metadata['newdoc id']
                            speech = ""

                        speech += sentence.metadata['text'] + ' '
                        
                    ID_to_text[speech_id] = speech.rstrip()  
                    print("Done with file: ", file)

    df['speech'] = df.ID.apply(lambda x: ID_to_text.get(x, None))
    
    return df


In [33]:
parliament = '../data/ParlaMint-RS.conllu'
df = metadata_to_csv(parliament)
df = speech_to_csv(parliament, df)
df.head() # save this .csv when done

Done with file:  ParlaMint-RS_1997-12-03-0.conllu
Done with file:  ParlaMint-RS_1997-12-29-0.conllu
Done with file:  ParlaMint-RS_1998-02-19-0.conllu
Done with file:  ParlaMint-RS_1998-02-20-0.conllu
Done with file:  ParlaMint-RS_1998-03-24-0.conllu
Done with file:  ParlaMint-RS_1998-04-06-0.conllu
Done with file:  ParlaMint-RS_1998-04-07-0.conllu
Done with file:  ParlaMint-RS_1998-05-26-0.conllu
Done with file:  ParlaMint-RS_1998-05-27-0.conllu
Done with file:  ParlaMint-RS_1998-09-28-0.conllu
Done with file:  ParlaMint-RS_1998-10-20-0.conllu
Done with file:  ParlaMint-RS_1998-11-17-0.conllu
Done with file:  ParlaMint-RS_1998-11-18-0.conllu
Done with file:  ParlaMint-RS_1998-12-07-0.conllu
Done with file:  ParlaMint-RS_1998-12-08-0.conllu
Done with file:  ParlaMint-RS_1998-12-28-0.conllu
Done with file:  ParlaMint-RS_1999-02-04-0.conllu
Done with file:  ParlaMint-RS_1999-03-23-0.conllu
Done with file:  ParlaMint-RS_1999-07-15-0.conllu
Done with file:  ParlaMint-RS_1999-11-09-0.conllu


Done with file:  ParlaMint-RS_2002-05-14-0.conllu
Done with file:  ParlaMint-RS_2002-05-15-0.conllu
Done with file:  ParlaMint-RS_2002-05-21-0.conllu
Done with file:  ParlaMint-RS_2002-05-22-0.conllu
Done with file:  ParlaMint-RS_2002-06-04-0.conllu
Done with file:  ParlaMint-RS_2002-06-05-0.conllu
Done with file:  ParlaMint-RS_2002-06-06-0.conllu
Done with file:  ParlaMint-RS_2002-06-11-0.conllu
Done with file:  ParlaMint-RS_2002-06-12-0.conllu
Done with file:  ParlaMint-RS_2002-06-18-0.conllu
Done with file:  ParlaMint-RS_2002-06-19-0.conllu
Done with file:  ParlaMint-RS_2002-07-02-0.conllu
Done with file:  ParlaMint-RS_2002-07-03-0.conllu
Done with file:  ParlaMint-RS_2002-07-09-0.conllu
Done with file:  ParlaMint-RS_2002-07-10-0.conllu
Done with file:  ParlaMint-RS_2002-07-11-0.conllu
Done with file:  ParlaMint-RS_2002-07-15-0.conllu
Done with file:  ParlaMint-RS_2002-07-16-0.conllu
Done with file:  ParlaMint-RS_2002-07-17-0.conllu
Done with file:  ParlaMint-RS_2002-11-05-0.conllu


Done with file:  ParlaMint-RS_2004-08-18-0.conllu
Done with file:  ParlaMint-RS_2004-08-19-0.conllu
Done with file:  ParlaMint-RS_2004-08-24-0.conllu
Done with file:  ParlaMint-RS_2004-08-25-0.conllu
Done with file:  ParlaMint-RS_2004-08-26-0.conllu
Done with file:  ParlaMint-RS_2004-10-06-0.conllu
Done with file:  ParlaMint-RS_2004-10-07-0.conllu
Done with file:  ParlaMint-RS_2004-10-13-0.conllu
Done with file:  ParlaMint-RS_2004-10-14-0.conllu
Done with file:  ParlaMint-RS_2004-10-18-0.conllu
Done with file:  ParlaMint-RS_2004-10-19-0.conllu
Done with file:  ParlaMint-RS_2004-10-21-0.conllu
Done with file:  ParlaMint-RS_2004-10-26-0.conllu
Done with file:  ParlaMint-RS_2004-10-27-0.conllu
Done with file:  ParlaMint-RS_2004-11-02-0.conllu
Done with file:  ParlaMint-RS_2004-11-03-0.conllu
Done with file:  ParlaMint-RS_2004-11-04-0.conllu
Done with file:  ParlaMint-RS_2004-11-05-0.conllu
Done with file:  ParlaMint-RS_2004-11-08-0.conllu
Done with file:  ParlaMint-RS_2004-11-09-0.conllu


Done with file:  ParlaMint-RS_2005-11-26-0.conllu
Done with file:  ParlaMint-RS_2005-11-27-0.conllu
Done with file:  ParlaMint-RS_2005-11-28-0.conllu
Done with file:  ParlaMint-RS_2005-11-29-0.conllu
Done with file:  ParlaMint-RS_2005-11-30-0.conllu
Done with file:  ParlaMint-RS_2005-12-01-0.conllu
Done with file:  ParlaMint-RS_2005-12-08-0.conllu
Done with file:  ParlaMint-RS_2005-12-12-0.conllu
Done with file:  ParlaMint-RS_2005-12-13-0.conllu
Done with file:  ParlaMint-RS_2005-12-14-0.conllu
Done with file:  ParlaMint-RS_2005-12-15-0.conllu
Done with file:  ParlaMint-RS_2005-12-20-0.conllu
Done with file:  ParlaMint-RS_2005-12-21-0.conllu
Done with file:  ParlaMint-RS_2005-12-22-0.conllu
Done with file:  ParlaMint-RS_2006-02-27-0.conllu
Done with file:  ParlaMint-RS_2006-03-27-0.conllu
Done with file:  ParlaMint-RS_2006-03-28-0.conllu
Done with file:  ParlaMint-RS_2006-03-29-0.conllu
Done with file:  ParlaMint-RS_2006-04-03-0.conllu
Done with file:  ParlaMint-RS_2006-04-04-0.conllu


Done with file:  ParlaMint-RS_2007-12-14-0.conllu
Done with file:  ParlaMint-RS_2007-12-17-0.conllu
Done with file:  ParlaMint-RS_2007-12-18-0.conllu
Done with file:  ParlaMint-RS_2007-12-19-0.conllu
Done with file:  ParlaMint-RS_2007-12-20-0.conllu
Done with file:  ParlaMint-RS_2007-12-21-0.conllu
Done with file:  ParlaMint-RS_2007-12-24-0.conllu
Done with file:  ParlaMint-RS_2007-12-25-0.conllu
Done with file:  ParlaMint-RS_2007-12-26-0.conllu
Done with file:  ParlaMint-RS_2007-12-27-0.conllu
Done with file:  ParlaMint-RS_2007-12-29-0.conllu
Done with file:  ParlaMint-RS_2008-02-15-0.conllu
Done with file:  ParlaMint-RS_2008-02-18-0.conllu
Done with file:  ParlaMint-RS_2008-02-29-0.conllu
Done with file:  ParlaMint-RS_2008-03-05-0.conllu
Done with file:  ParlaMint-RS_2008-06-11-0.conllu
Done with file:  ParlaMint-RS_2008-06-24-0.conllu
Done with file:  ParlaMint-RS_2008-06-25-0.conllu
Done with file:  ParlaMint-RS_2008-06-26-0.conllu
Done with file:  ParlaMint-RS_2008-06-27-0.conllu


Done with file:  ParlaMint-RS_2009-06-09-0.conllu
Done with file:  ParlaMint-RS_2009-06-10-0.conllu
Done with file:  ParlaMint-RS_2009-06-11-0.conllu
Done with file:  ParlaMint-RS_2009-06-12-0.conllu
Done with file:  ParlaMint-RS_2009-06-15-0.conllu
Done with file:  ParlaMint-RS_2009-06-16-0.conllu
Done with file:  ParlaMint-RS_2009-06-17-0.conllu
Done with file:  ParlaMint-RS_2009-06-18-0.conllu
Done with file:  ParlaMint-RS_2009-06-23-0.conllu
Done with file:  ParlaMint-RS_2009-07-08-0.conllu
Done with file:  ParlaMint-RS_2009-07-13-0.conllu
Done with file:  ParlaMint-RS_2009-07-15-0.conllu
Done with file:  ParlaMint-RS_2009-07-16-0.conllu
Done with file:  ParlaMint-RS_2009-07-17-0.conllu
Done with file:  ParlaMint-RS_2009-07-18-0.conllu
Done with file:  ParlaMint-RS_2009-07-20-0.conllu
Done with file:  ParlaMint-RS_2009-07-21-0.conllu
Done with file:  ParlaMint-RS_2009-07-22-0.conllu
Done with file:  ParlaMint-RS_2009-07-23-0.conllu
Done with file:  ParlaMint-RS_2009-07-24-0.conllu


Done with file:  ParlaMint-RS_2010-11-25-0.conllu
Done with file:  ParlaMint-RS_2010-11-30-0.conllu
Done with file:  ParlaMint-RS_2010-12-01-0.conllu
Done with file:  ParlaMint-RS_2010-12-02-0.conllu
Done with file:  ParlaMint-RS_2010-12-07-0.conllu
Done with file:  ParlaMint-RS_2010-12-08-0.conllu
Done with file:  ParlaMint-RS_2010-12-09-0.conllu
Done with file:  ParlaMint-RS_2010-12-15-0.conllu
Done with file:  ParlaMint-RS_2010-12-16-0.conllu
Done with file:  ParlaMint-RS_2010-12-17-0.conllu
Done with file:  ParlaMint-RS_2010-12-20-0.conllu
Done with file:  ParlaMint-RS_2010-12-22-0.conllu
Done with file:  ParlaMint-RS_2010-12-23-0.conllu
Done with file:  ParlaMint-RS_2010-12-24-0.conllu
Done with file:  ParlaMint-RS_2010-12-27-0.conllu
Done with file:  ParlaMint-RS_2010-12-28-0.conllu
Done with file:  ParlaMint-RS_2011-02-21-0.conllu
Done with file:  ParlaMint-RS_2011-02-22-0.conllu
Done with file:  ParlaMint-RS_2011-02-23-0.conllu
Done with file:  ParlaMint-RS_2011-02-24-0.conllu


Done with file:  ParlaMint-RS_2012-10-26-0.conllu
Done with file:  ParlaMint-RS_2012-10-30-0.conllu
Done with file:  ParlaMint-RS_2012-10-31-0.conllu
Done with file:  ParlaMint-RS_2012-11-01-0.conllu
Done with file:  ParlaMint-RS_2012-11-05-0.conllu
Done with file:  ParlaMint-RS_2012-11-06-0.conllu
Done with file:  ParlaMint-RS_2012-11-07-0.conllu
Done with file:  ParlaMint-RS_2012-11-08-0.conllu
Done with file:  ParlaMint-RS_2012-11-13-0.conllu
Done with file:  ParlaMint-RS_2012-11-14-0.conllu
Done with file:  ParlaMint-RS_2012-11-15-0.conllu
Done with file:  ParlaMint-RS_2012-11-20-0.conllu
Done with file:  ParlaMint-RS_2012-11-21-0.conllu
Done with file:  ParlaMint-RS_2012-11-22-0.conllu
Done with file:  ParlaMint-RS_2012-11-23-0.conllu
Done with file:  ParlaMint-RS_2012-11-26-0.conllu
Done with file:  ParlaMint-RS_2012-11-29-0.conllu
Done with file:  ParlaMint-RS_2012-11-30-0.conllu
Done with file:  ParlaMint-RS_2012-12-01-0.conllu
Done with file:  ParlaMint-RS_2012-12-03-0.conllu


Done with file:  ParlaMint-RS_2014-05-22-0.conllu
Done with file:  ParlaMint-RS_2014-05-23-0.conllu
Done with file:  ParlaMint-RS_2014-05-26-0.conllu
Done with file:  ParlaMint-RS_2014-05-27-0.conllu
Done with file:  ParlaMint-RS_2014-05-28-0.conllu
Done with file:  ParlaMint-RS_2014-05-29-0.conllu
Done with file:  ParlaMint-RS_2014-05-30-0.conllu
Done with file:  ParlaMint-RS_2014-06-04-0.conllu
Done with file:  ParlaMint-RS_2014-06-05-0.conllu
Done with file:  ParlaMint-RS_2014-06-09-0.conllu
Done with file:  ParlaMint-RS_2014-06-10-0.conllu
Done with file:  ParlaMint-RS_2014-06-12-0.conllu
Done with file:  ParlaMint-RS_2014-06-13-0.conllu
Done with file:  ParlaMint-RS_2014-06-17-0.conllu
Done with file:  ParlaMint-RS_2014-06-18-0.conllu
Done with file:  ParlaMint-RS_2014-06-19-0.conllu
Done with file:  ParlaMint-RS_2014-06-27-0.conllu
Done with file:  ParlaMint-RS_2014-06-29-0.conllu
Done with file:  ParlaMint-RS_2014-07-01-0.conllu
Done with file:  ParlaMint-RS_2014-07-02-0.conllu


Done with file:  ParlaMint-RS_2015-11-03-0.conllu
Done with file:  ParlaMint-RS_2015-11-04-0.conllu
Done with file:  ParlaMint-RS_2015-11-05-0.conllu
Done with file:  ParlaMint-RS_2015-11-10-0.conllu
Done with file:  ParlaMint-RS_2015-11-12-0.conllu
Done with file:  ParlaMint-RS_2015-11-16-0.conllu
Done with file:  ParlaMint-RS_2015-11-17-0.conllu
Done with file:  ParlaMint-RS_2015-11-18-0.conllu
Done with file:  ParlaMint-RS_2015-11-19-0.conllu
Done with file:  ParlaMint-RS_2015-11-20-0.conllu
Done with file:  ParlaMint-RS_2015-12-01-0.conllu
Done with file:  ParlaMint-RS_2015-12-02-0.conllu
Done with file:  ParlaMint-RS_2015-12-04-0.conllu
Done with file:  ParlaMint-RS_2015-12-08-0.conllu
Done with file:  ParlaMint-RS_2015-12-09-0.conllu
Done with file:  ParlaMint-RS_2015-12-10-0.conllu
Done with file:  ParlaMint-RS_2015-12-11-0.conllu
Done with file:  ParlaMint-RS_2015-12-12-0.conllu
Done with file:  ParlaMint-RS_2015-12-14-0.conllu
Done with file:  ParlaMint-RS_2015-12-15-0.conllu


Done with file:  ParlaMint-RS_2017-12-13-0.conllu
Done with file:  ParlaMint-RS_2017-12-14-0.conllu
Done with file:  ParlaMint-RS_2018-03-06-0.conllu
Done with file:  ParlaMint-RS_2018-03-07-0.conllu
Done with file:  ParlaMint-RS_2018-03-08-0.conllu
Done with file:  ParlaMint-RS_2018-03-09-0.conllu
Done with file:  ParlaMint-RS_2018-03-12-0.conllu
Done with file:  ParlaMint-RS_2018-03-13-0.conllu
Done with file:  ParlaMint-RS_2018-03-14-0.conllu
Done with file:  ParlaMint-RS_2018-03-22-0.conllu
Done with file:  ParlaMint-RS_2018-03-26-0.conllu
Done with file:  ParlaMint-RS_2018-03-27-0.conllu
Done with file:  ParlaMint-RS_2018-03-28-0.conllu
Done with file:  ParlaMint-RS_2018-03-29-0.conllu
Done with file:  ParlaMint-RS_2018-04-02-0.conllu
Done with file:  ParlaMint-RS_2018-04-03-0.conllu
Done with file:  ParlaMint-RS_2018-04-04-0.conllu
Done with file:  ParlaMint-RS_2018-04-05-0.conllu
Done with file:  ParlaMint-RS_2018-04-11-0.conllu
Done with file:  ParlaMint-RS_2018-04-12-0.conllu


Done with file:  ParlaMint-RS_2019-07-15-1.conllu
Done with file:  ParlaMint-RS_2019-07-16-0.conllu
Done with file:  ParlaMint-RS_2019-07-17-0.conllu
Done with file:  ParlaMint-RS_2019-07-18-0.conllu
Done with file:  ParlaMint-RS_2019-07-19-0.conllu
Done with file:  ParlaMint-RS_2019-07-22-0.conllu
Done with file:  ParlaMint-RS_2019-07-22-1.conllu
Done with file:  ParlaMint-RS_2019-07-23-0.conllu
Done with file:  ParlaMint-RS_2019-07-24-0.conllu
Done with file:  ParlaMint-RS_2019-07-25-0.conllu
Done with file:  ParlaMint-RS_2019-07-26-0.conllu
Done with file:  ParlaMint-RS_2019-09-04-0.conllu
Done with file:  ParlaMint-RS_2019-09-09-0.conllu
Done with file:  ParlaMint-RS_2019-09-10-0.conllu
Done with file:  ParlaMint-RS_2019-09-12-0.conllu
Done with file:  ParlaMint-RS_2019-09-13-0.conllu
Done with file:  ParlaMint-RS_2019-09-17-0.conllu
Done with file:  ParlaMint-RS_2019-09-17-1.conllu
Done with file:  ParlaMint-RS_2019-09-18-0.conllu
Done with file:  ParlaMint-RS_2019-09-19-0.conllu


Done with file:  ParlaMint-RS_2021-04-15-0.conllu
Done with file:  ParlaMint-RS_2021-04-20-0.conllu
Done with file:  ParlaMint-RS_2021-04-21-0.conllu
Done with file:  ParlaMint-RS_2021-04-22-0.conllu
Done with file:  ParlaMint-RS_2021-04-27-0.conllu
Done with file:  ParlaMint-RS_2021-04-28-0.conllu
Done with file:  ParlaMint-RS_2021-04-29-0.conllu
Done with file:  ParlaMint-RS_2021-05-05-0.conllu
Done with file:  ParlaMint-RS_2021-05-06-0.conllu
Done with file:  ParlaMint-RS_2021-05-11-0.conllu
Done with file:  ParlaMint-RS_2021-05-12-0.conllu
Done with file:  ParlaMint-RS_2021-05-13-0.conllu
Done with file:  ParlaMint-RS_2021-05-18-0.conllu
Done with file:  ParlaMint-RS_2021-05-19-0.conllu
Done with file:  ParlaMint-RS_2021-05-20-0.conllu
Done with file:  ParlaMint-RS_2021-05-25-0.conllu
Done with file:  ParlaMint-RS_2021-05-26-0.conllu
Done with file:  ParlaMint-RS_2021-05-27-0.conllu
Done with file:  ParlaMint-RS_2021-06-07-0.conllu
Done with file:  ParlaMint-RS_2021-06-09-0.conllu


Unnamed: 0,ID,Title,Date,Body,Term,Session,Meeting,Sitting,Agenda,Subcorpus,Speaker_role,Speaker_MP,Speaker_Minister,Speaker_party,Speaker_party_name,Party_status,Speaker_name,Speaker_gender,Speaker_birth,speech
0,ParlaMint-RS_1997-12-03-0.u1,"Term 4, Meeting 1",1997-12-03,Unicameralism,4,,1,2000-10-09,,Reference,Chairperson,MP,-,LK,LK,,"Tomić, Dragan",M,1958,"Poštovane dame i gospodo, poštovani narodni po..."
1,ParlaMint-RS_1997-12-03-0.u2,"Term 4, Meeting 1",1997-12-03,Unicameralism,4,,1,2000-10-09,,Reference,Regular,MP,-,SPO,Srpski pokret obnove,Opposition,"Miković, Milan",M,1932,"Gospodine predsedniče, dame i gospodo, naš juč..."
2,ParlaMint-RS_1997-12-03-0.u3,"Term 4, Meeting 1",1997-12-03,Unicameralism,4,,1,2000-10-09,,Reference,Chairperson,MP,-,LK,LK,,"Tomić, Dragan",M,1958,Zahvaljujem. Ima reč narodni poslanik Tomislav...
3,ParlaMint-RS_1997-12-03-0.u4,"Term 4, Meeting 1",1997-12-03,Unicameralism,4,,1,2000-10-09,,Reference,Regular,MP,-,SRS,Srpska radikalna stranka,Opposition,"Nikolić, Tomislav",M,1952,"Dame i gospodo, narodni poslanici, prvo vam če..."
4,ParlaMint-RS_1997-12-03-0.u5,"Term 4, Meeting 1",1997-12-03,Unicameralism,4,,1,2000-10-09,,Reference,Chairperson,MP,-,LK,LK,,"Tomić, Dragan",M,1958,Ovo je bio dogovor. Mislim da nema nikakvog ra...


In [11]:
# conllu utilization of NERs
def mentions_network(directory, df):

    all_speakers = df.groupby('Speaker_name').first()

    df = df[df['Speaker_type'] == 'MP']
    df = df[df['Speaker_role'] != 'Chairperson']
    
    df.set_index('ID')

    graph = nx.DiGraph()
    for speaker, metadata in all_speakers.iterrows():
        speaker_speeches = len(df[df['Speaker_name'] == speaker])
        
        gender = metadata['Speaker_gender']

        party_name = metadata['Speaker_party_name']

        graph.add_node(speaker, party_name=party_name, gender=gender, speeches=speaker_speeches)

    # ---- READING THE CONLLU FILES AND ADDING NAMED ENTITY EDGES (MENTION NETWORKS)

    conllu_files_path = directory

    mps = None
    speaker_name = None
    speakers_mentioned_in_speech = set()
    for path, years, _ in os.walk(conllu_files_path):
        for year in years:
            conllu_files = path + year
            for file in os.listdir(conllu_files):
                if 'conllu' in file:
                    conllu_file = open(conllu_files + '/' + file, "r", encoding="utf-8")

                    process_speech = False  # flag if speech is in the preprocessed metadata
                    speech_id = None  # id to fetch the right metadata
                    for sentence in parse_incr(conllu_file):
                        if 'newdoc id' in sentence.metadata:

                            speech_id = sentence.metadata['newdoc id']
                            if speech_id in speech_metadata.index:
                                speaker_name = df.loc[speech_id]['Speaker_name']
                                mps = set(graph.nodes())
                                process_speech = True
                                speakers_mentioned_in_speech = set()
                            else:
                                process_speech = False  # speech should be skipped (either chair, or nonMP)

                        if process_speech:

                            # Finding the most probable person and adding an edge to the network
                            person = None
                            for token in sentence:
                                if token['misc'] == None or 'NER' not in token['misc']:
                                    continue
                                NER = token['misc']['NER']
                                if NER == 'B-PER':
                                    person = token['lemma']
                                elif NER == 'I-PER':
                                    person = person + ' ' + token['lemma']
                                else:
                                    if person != None:
                                        max_similarity = 0
                                        match = None
                                        possible_people = []
                                        for possible_person in all_speakers:
                                            similarity = fuzz.token_set_ratio(person, possible_person)
                                            if similarity == 100:
                                                possible_people.append(possible_person)

                                        if len(possible_people) == 1:
                                            match = possible_people[0]
                                            if match in mps and match not in speakers_mentioned_in_speech:
                                                if graph.has_edge(speaker_name, match):
                                                    graph[speaker_name][match]['weight'] += 1
                                                else:
                                                    graph.add_edge(speaker_name, match, weight=1)
                                                speakers_mentioned_in_speech.add(match)

                                        elif len(possible_people) > 1:
                                            speeches_before = list(
                                                speech_metadata_all.loc[:speech_id].tail(10)['Speaker_name'])
                                            speeches_after = list(
                                                speech_metadata_all.loc[speech_id:].head(10)['Speaker_name'])

                                            chronological_speakers = list(reversed(speeches_before)) + speeches_after

                                            for chronological_speaker in reversed(chronological_speakers):
                                                if chronological_speaker in possible_people:
                                                    match = chronological_speaker
                                                    if chronological_speaker in mps and chronological_speaker not in speakers_mentioned_in_speech:
                                                        if graph.has_edge(speaker_name, match):
                                                            graph[speaker_name][match]['weight'] += 1
                                                        else:
                                                            graph.add_edge(speaker_name, match, weight=1)
                                                        speakers_mentioned_in_speech.add(match)
                                                        break

                                        person = None
                        else:
                            continue
                print("Done with file: ", file)


    nx.readwrite.write_gexf(graph, 'graph.gexf')

