# Coreference task

This notebook processes text from biographies to execute a coreference task 
 
Pre-requirements:
- Text organised in sentences

Problem: missing entities in sentences, mainly due to pronoumns, reference to entities in previous paragraphs
This applies for people, places and time references
    
Our implementation:

    - Step 1
    - Step 2
    - ...
        
 
 
Input

Output

Requirements
- Coreferee (spacyDevelopment): https://github.com/richardpaulhudson/coreferee
    - !pip3 install coreferee
    - !python3 -m pip3 install coreferee
    - !python3 -m coreferee install en
    - !python -m spacy download en_core_web_trf
    - !python -m spacy download en_core_web_lg

In [1]:
#!pip3 install coreferee
#!python3 -m pip3 install coreferee
#!python3 -m coreferee install en
#!python -m spacy download en_core_web_trf
# !python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.5.0/en_core_web_lg-3.5.0-py3-none-any.whl (587.7 MB)
     |████████████████████████████████| 587.7 MB 3.6 kB/s             ��████████▉   | 528.5 MB 35.0 MB/s eta 0:00:02
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.5.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')


In [1]:
import spacy
from spacy import displacy
nlp = spacy.load('en_core_web_trf')
nlp.add_pipe('coreferee')

<coreferee.manager.CorefereeBroker at 0x7fe66c1a35e0>

In [2]:
import os
import pandas as pd
import re

### Identify complete and incomplete meetup sentences

In [4]:
# reading every CSV with indexed sentences
# return a list object of files in the given folder
files_list = [f for f in os.listdir('indexedSentences') if not f.startswith('.')]
# parse to dataframe
df_files = pd.DataFrame(files_list, columns=['file_name'])
# df_files.to_csv('totalBiographiesEntities.csv',index=False)

df_files.info()
df_files.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33309 entries, 0 to 33308
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  33309 non-null  object
dtypes: object(1)
memory usage: 260.4+ KB


Unnamed: 0,file_name
0,10002116.csv
1,10004137.csv
2,10006387.csv
3,1000684.csv
4,10009278.csv


In [5]:
## testing, using one biography
df_files = df_files.query("file_name=='10085.csv'")
df_files.to_csv('coreferenceBiographiesList.csv',index=False)
print(len(df_files))

1


In [8]:
# Reading the biographies in chunks of 50
for chunk in pd.read_csv('coreferenceBiographiesList.csv', chunksize=50):
    df_file_name = pd.DataFrame()
    df_file_name['file_name'] = chunk['file_name']
    for file_name_item in df_file_name.itertuples():
        file_exists = os.path.isfile('indexedSentences/'+file_name_item.file_name.replace(".txt",".csv"))
        if file_exists:
            print(file_name_item.file_name.replace(".txt",".csv"))
            # read the biography organised by sentences
            biography_df = pd.read_csv('indexedSentences/'+file_name_item.file_name.replace(".txt",".csv"))
            # Read the file with all the identified entities
            meetupCandidate_df = pd.read_csv('extractedEntities/'+file_name_item.file_name.replace(".txt",".csv"))
            
            # entities_df = biography_df.copy(deep=True)
            
            # for each sentence in the biography, identify if they can be a 
            # meetup: Y - YES all entities present, N - NO no entities, C - Candidate some entities
            sentencesEntities_df = evaluateCandidateMeetupsEntities(biography_df,meetupCandidate_df)
            
            # Now move to run the coreference task
            executeCoreference(sentencesEntities_df,meetupCandidate_df,file_name_item)
            # if len(temp_df) == 0 or candidate == True:
                #     # if zero, then no entities found
                #     # run a process to double check if there are not entities, use corereference library if there is any entity: type PERSON, GPE, DATE
                #     print("incomplete")
            
            
                    # # if incomplete or zero run nlp

10085.csv
0


AttributeError: 'Pandas' object has no attribute 'paragraph'

### Identify if the sentence can be a meetup

In [6]:
def evaluateCandidateMeetupsEntities(biography_df, meetupCandidate_df):
    column_entities = ["meetup","people","place","timeExpression"]
    entities_df = pd.DataFrame(columns=column_entities)
    
    for sentence_row in biography_df.itertuples():
        # print(sentence_row.paragraphIndex)
        # print(sentence_row.sentenceIndex)
        tempCandidate_df = meetupCandidate_df.query("paragraphIndex=={} & sentenceIndex=={}".format(sentence_row.paragraphIndex,
                                                                                                     sentence_row.sentenceIndex))
        # if count > 0 then, we found some entities, this is a meetup candidate
        # meetup: Y - YES, N - NO, C - Candidate
        meetup = "N"
        entPeople = 0
        entPlaces = 0
        entTimeExpressions = 0

        if len(tempCandidate_df) > 0:
            # Next, evaluate if the candidate is complete or incomplete
            # tag as COM INC
            entPP = pd.unique(tempCandidate_df['entType'])

            for i in entPP:
                if i == "person":
                    entPeople = len(tempCandidate_df[tempCandidate_df.entType == i])
                elif i == "place":
                    entPlaces = len(tempCandidate_df[tempCandidate_df.entType == i])
                elif i == "time":
                    entTimeExpressions = len(tempCandidate_df[tempCandidate_df.entType == i])

            if entPeople > 0 and entPlaces > 0 and entTimeExpressions > 0:
                meetup = "Y"
            else:
                meetup = "C"
            datarow = pd.Series(data={'meetup':meetup,'people':entPeople,'place':entPlaces,'timeExpression':entTimeExpressions})
            # dfnotfound = dfnotfound.append(datarow, ignore_index=True)
            entities_df = entities_df.append(datarow, ignore_index=True)
            # print(entities_df)

            # if complete then
            # continue
        else:
            meetup = "N"
            datarow = pd.Series(data={'meetup':meetup,'people':entPeople,'place':entPlaces,'timeExpression':entTimeExpressions})
            # dfnotfound = dfnotfound.append(datarow, ignore_index=True)
            entities_df = entities_df.append(datarow, ignore_index=True)

    sentencesEntities_df = biography_df.copy(deep=True)
    sentencesEntities_df = sentencesEntities_df.join(entities_df)
    return sentencesEntities_df

# def cleanPeopleEntities(entityList_df):
    
#     object_list = ["rdf:type dbo:PersonFunction","dct:subject dbc:Musical_terminology"]
#     # iterate over all the person entities found
#     for entity_row in entityList_df.itertuples():
#         role = False
#         for item in object_list:
#             # build query
#             query_text = "ASK  { <{}> {}}".format(entity_row.URI)
#             #read cache
            
#             # check if already in cache
#             # if in cache and response is true:
#             # role = True
#             # break
#             # else:
#             # query DBpedia
#             response = executeQueryDbpedia(query_text)
#             if response == 'true':
#                 role = True
#                 break
#         if role:
#             # delete the entity from the results
            
# #ASK  { <http://dbpedia.org/resource/Conducting> rdf:type dbo:PersonFunction } #true for composer, solicitor, conducting
# # ASK  { <https://dbpedia.org/page/Edward_Elgar> rdf:type dbo:Musical_terminology } #false PersonFunction, false Musical_terminology
# #ASK  { <http://dbpedia.org/resource/Contralto> dct:subject dbc:Musical_terminology } #true contralto, tempo

#     return newEntityList_df

# def executeQueryDbpedia(q, f='application/json'):
#     epr = "http://dbpedia.org/sparql"
#     try:
#         params = {'query': q}
#         resp = requests.get(epr, params=params, headers={'Accept': f})
#     #    return resp.text
#         return resp
#     except Exception as e:
#         # print(e, file=sys.stdout)
#         if hasattr(e, 'message'):
#             print(e.message)
#         else:
#             print(e)
#         raise

In [None]:
# http://dbpedia.org/resource/Composer
# http://dbpedia.org/resource/Solicitor
# http://dbpedia.org/resource/Conducting
# http://dbpedia.org/resource/Contralto
# http://dbpedia.org/resource/Tempo

dct:subject dbc:Occupations_in_music
<http://purl.org/dc/terms/subject> https://dbpedia.org/page/Category:Occupations_in_music
rdf:type dbo:PersonFunction
# <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/PersonFunction>

### Run the COREFERENCE task

In [7]:
def corefText(doc):
    doc._.coref_chains.print()
    tok_list = list(token.text for token in doc)
    c = 0
    for chain in doc._.coref_chains:
        for mention in chain:
            res1 = [doc._.coref_chains.resolve(doc[i]) for i in mention]
            res = list(filter((None).__ne__, res1))
            if len(res) != 0:
                if len(res[0]) == 1:
                    tok_list[mention[0] + c] = str(res[0][0])
                elif len(res[0]) > 1:
                    tok_list[mention[0] + c] = str(res[0][0])
                    for j in range(1, len(res[0])):
                        tok_list.insert(mention[0] + c + j, str(res[0][j]))
                        c = c + 1
    textres = " ".join(tok_list)
    print(textres)
    return textres

def executeCoreference(sentencesCandidates_df,meetupCandidate_df,file_name_item):
    # filter only INCOMPLETE meetups
    # to_check_df = sentencesCandidates_df.query("meetup!='Y'")
    
    # paragraphSentences_df = meetupCandidate_df.query("paragraphIndex=={}".format(sentence_row.paragraphIndex))
    paragraphSentences_df = pd.read_csv('indexedSentences/'+file_name_item.file_name.replace(".txt",".csv"))
    
    count = 0
     # iterate all the paragraphs
    for paragraph_row in paragraphSentences_df.itertuples():
        
        count+=1
        print(paragraph_row.paragraphIndex)
        doc = nlp(paragraph_row.paragraph)
        
        for entity in doc.ents:
            print(entity.text, entity.label_)
            
            if entity.label_ == "PERSON" or entity.label_ == "GPE":
                continue_next_bool = False
        crText_string = corefText(doc)
        
        if count == 3:
            break
#     # iterate all the sentences
#     for sentence_row in to_check_df.itertuples():
#         print(sentence_row.paragraphIndex)
#         print(sentence_row.sentenceIndex)
        
        
        
                                                         
#         doc = nlp(sentence_row.sentences)
#         continue_next_bool = True
        
#         # run a process to double check if there are not entities, use corereference library if there is any entity: type PERSON, GPE, DATE
#         for entity in doc.ents:
#             print(entity.text, entity.label_)
            
#             if entity.label_ == "PERSON" or entity.label_ == "GPE":
#                 crText_string = corefText(doc)
#                 continue_next_bool = False
                
    # return

In [133]:
sentencesEntities_df.info()
sentencesEntities_df.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 460 entries, 0 to 459
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   sentences       460 non-null    object
 1   sentenceIndex   460 non-null    int64 
 2   paragraphIndex  460 non-null    int64 
 3   section         440 non-null    object
 4   wikiId          460 non-null    int64 
 5   meetup          460 non-null    object
 6   people          460 non-null    object
 7   place           460 non-null    object
 8   timeExpression  460 non-null    object
dtypes: int64(3), object(6)
memory usage: 32.5+ KB


Unnamed: 0,sentences,sentenceIndex,paragraphIndex,section,wikiId,meetup,people,place,timeExpression
0,"Sir Edward William Elgar, 1st Baronet, ( (lis...",0,0,,10085,Y,2,1,2
1,Among his best-known compositions are orchestr...,1,0,,10085,N,0,0,0
2,"He also composed choral works, including The D...",2,0,,10085,N,0,0,0
3,He was appointed Master of the King's Musick i...,3,0,,10085,N,0,0,0
4,Although Elgar is often regarded as a typicall...,0,1,,10085,C,2,0,0
5,"He felt himself to be an outsider, not only mu...",1,1,,10085,N,0,0,0
6,"In musical circles dominated by academics, he ...",2,1,,10085,C,1,2,0
7,He nevertheless married the daughter of a seni...,3,1,,10085,N,0,0,0
8,"She inspired him both musically and socially, ...",4,1,,10085,C,0,1,1
9,"He followed the Variations with a choral work,...",5,1,,10085,C,0,2,1


In [143]:
# doc = nlp('Sir Edward William Elgar, 1st Baronet,  listen; 2 June 1857 – 23 February 1934) was an English composer, ')
# many of whose works have entered the British and international classical concert repertoire.
# doc = nlp("Although Elgar is often regarded as a typically English composer, most of his musical influences were not from England but from continental Europe.")
doc = nlp("Sir Edward William Elgar, 1st Baronet,  ( (listen); 2 June 1857 – 23 February 1934) was an English composer, many of whose works have entered the British and international classical concert repertoire. Among his best-known compositions are orchestral works including the Enigma Variations, the Pomp and Circumstance Marches, concertos for violin and cello, and two symphonies. He also composed choral works, including The Dream of Gerontius, chamber music and songs. He was appointed Master of the King's Musick in 1924.")


In [173]:
print("Chains:")
doc._.coref_chains.print()
print("[]:")
doc[25]._.coref_chains.print()
print("resolve:")
value = doc._.coref_chains.resolve(doc[25])
print(value)
print("Ents: ")
for entity in doc.ents:
    print(entity.text, entity.label_)
print("====")
# print("Print tokens: ")
# for i in doc:
#     print(i.text, i.label_)
for chain in doc._.coref_chains:
    print("Chain: ")
    print(chain)
    for mention in chain:
        print("Mention: ")
        print(mention)
        for i in mention:
            print("i: ")            
            print(i)
            value = doc._.coref_chains.resolve(doc[i])
            print(value)
    print("----")

Chains:
0: Elgar(1), his(13), He(25), himself(27), he(47), his(59), he(83), his(88), he(93), He(97), him(111), he(118), his(124), his(135), He(149), His(200)
1: Britain(57), Britain(81), Britain(145), Britain(181), Britain(196)
2: daughter(101), She(109)
3: Variations(137), Variations(152)
4: disquiet(175), it(184)
[]:
0: Elgar(1), his(13), He(25), himself(27), he(47), his(59), he(83), his(88), he(93), He(97), him(111), he(118), his(124), his(135), He(149), His(200)
resolve:
[Elgar]
Ents: 
Elgar PERSON
English NORP
England GPE
Europe LOC
Protestant NORP
Britain GPE
Roman Catholicism NORP
Victorian DATE
Edwardian NORP
Britain GPE
British Army ORG
his forties DATE
Enigma Variations WORK_OF_ART
1899 DATE
Britain GPE
Variations WORK_OF_ART
The Dream of Gerontius WORK_OF_ART
1900 DATE
Roman Catholic NORP
Anglican NORP
Britain GPE
Britain GPE
====
Chain: 
0: [1], [13], [25], [27], [47], [59], [83], [88], [93], [97], [111], [118], [124], [135], [149], [200]
Mention: 
[1]
i: 
1
None
Mention: 


In [189]:
print("Chains:")
doc._.coref_chains.print()
n_tokens_int = 0
for sent in doc.sents:
    print(sent)
    # sent.string.strip()
    # for token in sent:
        # print(token, token.idx)
    n_tokens_int = n_tokens_int + len(sent)
    print(len(sent))
    print(n_tokens_int)

Chains:
0: Elgar(1), his(13), He(25), himself(27), he(47), his(59), he(83), his(88), he(93), He(97), him(111), he(118), his(124), his(135), He(149), His(200)
1: Britain(57), Britain(81), Britain(145), Britain(181), Britain(196)
2: daughter(101), She(109)
3: Variations(137), Variations(152)
4: disquiet(175), it(184)
Although Elgar is often regarded as a typically English composer, most of his musical influences were not from England but from continental Europe.
25
25
He felt himself to be an outsider, not only musically, but socially.
15
40
In musical circles dominated by academics, he was a self-taught composer; in Protestant Britain, his Roman Catholicism was regarded with suspicion in some quarters; and in the class-conscious society of Victorian and Edwardian Britain, he was acutely sensitive about his humble origins even after he achieved recognition.
57
97
He nevertheless married the daughter of a senior British Army officer.
12
109
She inspired him both musically and socially, bu

In [171]:
doc = nlp("Although Elgar is often regarded as a typically English composer, most of his musical influences were not from England but from continental Europe. He felt himself to be an outsider, not only musically, but socially. In musical circles dominated by academics, he was a self-taught composer; in Protestant Britain, his Roman Catholicism was regarded with suspicion in some quarters; and in the class-conscious society of Victorian and Edwardian Britain, he was acutely sensitive about his humble origins even after he achieved recognition. He nevertheless married the daughter of a senior British Army officer. She inspired him both musically and socially, but he struggled to achieve success until his forties, when after a series of moderately successful works his Enigma Variations (1899) became immediately popular in Britain and overseas. He followed the Variations with a choral work, The Dream of Gerontius (1900), based on a Roman Catholic text that caused some disquiet in the Anglican establishment in Britain, but it became, and has remained, a core repertory work in Britain and elsewhere. His later full-length religious choral works were well received but have not entered the regular repertory.")




In [172]:
print("Chains:")
doc._.coref_chains.print()
print("[]:")
doc[25]._.coref_chains.print()
print("resolve:")
value = doc._.coref_chains.resolve(doc[25])
print(value)
print("====")
# print("Print tokens: ")
# for i in doc:
#     print(i.text, i.label_)
for chain in doc._.coref_chains:
    print("Chain: ")
    print(chain)
    for mention in chain:
        print("Mention: ")
        print(mention)
        for i in mention:
            print("i: ")            
            print(i)
            value = doc._.coref_chains.resolve(doc[i])
            print(value)
    print("----")

Chains:
0: Elgar(1), his(13), He(25), himself(27), he(47), his(59), he(83), his(88), he(93), He(97), him(111), he(118), his(124), his(135), He(149), His(200)
1: Britain(57), Britain(81), Britain(145), Britain(181), Britain(196)
2: daughter(101), She(109)
3: Variations(137), Variations(152)
4: disquiet(175), it(184)
[]:
0: Elgar(1), his(13), He(25), himself(27), he(47), his(59), he(83), his(88), he(93), He(97), him(111), he(118), his(124), his(135), He(149), His(200)
resolve:
[Elgar]
====
Chain: 
0: [1], [13], [25], [27], [47], [59], [83], [88], [93], [97], [111], [118], [124], [135], [149], [200]
Mention: 
[1]
i: 
1
None
Mention: 
[13]
i: 
13
[Elgar]
Mention: 
[25]
i: 
25
[Elgar]
Mention: 
[27]
i: 
27
[Elgar]
Mention: 
[47]
i: 
47
[Elgar]
Mention: 
[59]
i: 
59
[Elgar]
Mention: 
[83]
i: 
83
[Elgar]
Mention: 
[88]
i: 
88
[Elgar]
Mention: 
[93]
i: 
93
[Elgar]
Mention: 
[97]
i: 
97
[Elgar]
Mention: 
[111]
i: 
111
[Elgar]
Mention: 
[118]
i: 
118
[Elgar]
Mention: 
[124]
i: 
124
[Elgar]
Menti

In [70]:
doc._.coref_chains.print()

0: he(7), his(19)
1: composer(13), he(43), his(48), he(53)
2: Britain(17), Britain(41)
In musical circles dominated by academics , he was a self - taught composer ; in Protestant Britain , he Roman Catholicism was regarded with suspicion in some quarters ; and in the class - conscious society of Victorian and Edwardian Britain , composer was acutely sensitive about composer humble origins even after composer achieved recognition .


  res = list(filter((None).__ne__, res1))


In [67]:
doc._.coref_chains.resolve(doc[1])

In [71]:
# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

# Find named entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

Noun phrases: ['musical circles', 'academics', 'he', 'a self-taught composer', 'Protestant Britain', 'his Roman Catholicism', 'suspicion', 'some quarters', 'the class-conscious society', 'Victorian and Edwardian Britain', 'he', 'his humble origins', 'he', 'recognition']
Verbs: ['dominate', 'teach', 'regard', 'achieve']
Protestant NORP
Britain GPE
Roman Catholicism NORP
Victorian DATE
Edwardian NORP
Britain GPE


In [72]:
spacy.explain('GPE')

'Countries, cities, states'

In [48]:
print(doc.ents)
for entity in doc.ents:
    print(entity)
    
doc_nouns = list(doc.noun_chunks)
print(doc_nouns)

(Elgar, English, England, Europe)
Elgar
English
England
Europe
[Elgar, a typically English composer, his musical influences, England, continental Europe]


In [7]:
[(i, i.ent_type_, i.is_stop) for i in doc]

[(Sir, '', False),
 (Edward, 'PERSON', False),
 (William, 'PERSON', False),
 (Elgar, 'PERSON', False),
 (,, '', False),
 (1st, 'ORDINAL', False),
 (Baronet, '', False),
 (,, '', False),
 ( , '', False),
 ((, '', False),
 ((, '', False),
 (listen, '', False),
 (), '', False),
 (;, '', False),
 (2, 'DATE', False),
 (June, 'DATE', False),
 (1857, 'DATE', False),
 (–, 'DATE', False),
 (23, 'DATE', False),
 (February, 'DATE', False),
 (1934, 'DATE', False),
 (), '', False),
 (was, '', True),
 (an, '', True),
 (English, 'NORP', False),
 (composer, '', False),
 (,, '', False),
 (many, '', True),
 (of, '', True),
 (whose, '', True),
 (works, '', False),
 (have, '', True),
 (entered, '', False),
 (the, '', True),
 (British, 'NORP', False),
 (and, '', True),
 (international, '', False),
 (classical, '', False),
 (concert, '', False),
 (repertoire, '', False),
 (., '', False)]

In [34]:
doc.to_json()

{'text': 'Although he was very busy with his work, Peter had had enough of it. He and his wife decided they needed a holiday. They travelled to Spain because they loved the country very much.',
 'ents': [{'start': 41, 'end': 46, 'label': 'PERSON'},
  {'start': 134, 'end': 139, 'label': 'GPE'}],
 'sents': [{'start': 0, 'end': 68},
  {'start': 69, 'end': 115},
  {'start': 116, 'end': 181}],
 'tokens': [{'id': 0,
   'start': 0,
   'end': 8,
   'tag': 'IN',
   'pos': 'SCONJ',
   'morph': '',
   'lemma': 'although',
   'dep': 'mark',
   'head': 2},
  {'id': 1,
   'start': 9,
   'end': 11,
   'tag': 'PRP',
   'pos': 'PRON',
   'morph': 'Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs',
   'lemma': 'he',
   'dep': 'nsubj',
   'head': 2},
  {'id': 2,
   'start': 12,
   'end': 15,
   'tag': 'VBD',
   'pos': 'AUX',
   'morph': 'Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin',
   'lemma': 'be',
   'dep': 'advcl',
   'head': 11},
  {'id': 3,
   'start': 16,
   'end': 20,
   'tag': 'R

In [18]:
# Nice display of sentence organisation
#displacy.serve(doc, style="dep")

In [8]:
# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

# Find named entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

Noun phrases: ['he', 'his work', 'Peter', 'it', 'He', 'his wife', 'they', 'a holiday', 'They', 'Spain', 'they', 'the country']
Verbs: ['have', 'decide', 'need', 'travel', 'love']
Peter PERSON
Spain GPE


In [23]:
doc1 = nlp('My sister has a dog. She loves him.')
doc1._.coref_chains.print()
print(doc1._.coref_chains.resolve(doc1[6]))

0: sister(1), She(6)
1: dog(4), him(8)
[sister]


In [43]:
tok_list = list(token.text for token in doc1)
c = 0
for chain in doc1._.coref_chains:
    for mention in chain:
        res1 = [doc1._.coref_chains.resolve(doc1[i]) for i in mention]
        res = list(filter((None).__ne__, res1))
        if len(res) != 0:
            if len(res[0]) == 1:
                tok_list[mention[0] + c] = str(res[0][0])
            elif len(res[0]) > 1:
                tok_list[mention[0] + c] = str(res[0][0])
                for j in range(1, len(res[0])):
                    tok_list.insert(mention[0] + c + j, str(res[0][j]))
                    c = c + 1
textres = " ".join(tok_list)
print(textres)

My sister has a dog . sister loves dog .


  res = list(filter((None).__ne__, res1))
