## Libraries

In [2]:
import pandas as pd
import random
import spacy
import re

## Data Acquisition

In [3]:
data = pd.read_csv('hans.csv', usecols=["speech", "speech_date"], low_memory = False)

In [4]:
data.head()

Unnamed: 0,speech,speech_date
0,"Mr. Speaker-Elect, it is one of the happiest f...",1979-05-09
1,"Mr. Speaker-Elect, it is my very great privile...",1979-05-09
2,"On behalf of my right hon. and hon. Friends, M...",1979-05-09
3,"I too, wish to congratulate you, Mr. Speaker-E...",1979-05-09
4,"Sometimes, Mr. Speaker-Elect, the House alters...",1979-05-09


In [5]:
data.shape

(2286324, 2)

In [6]:
#Missing values
data.isna().sum()

speech         3
speech_date    0
dtype: int64

In [7]:
data.dropna(inplace=True)

In [8]:
data.shape

(2286321, 2)

In [216]:

test = random.randint(0, data.shape[0]-1)
print(data['speech'][test])

I assure my hon. Friend that I will not be distracted by light rail systems, but I hope that I will give them the consideration that they merit. Certainly, the fact that there will be a £1.2 billion investment programme in Network South-East over the coming three years suggests that we will not be distracted.



Anaphora (Level: Sentence and punctuation) 
Example for the randomly chosen speech

In [217]:

nlp = spacy.load("en")
sentences = list()

tokens = nlp(data['speech'][test])

for sent in tokens.sents:
    sentences.append(sent.string.strip())

In [218]:

first_sent = sentences[0] 
first_sent = first_sent.lower()
first_sent = re.sub(r'[^\w\s]','',first_sent)
last_sent = first_sent.split()
anaphora = list()
for sent in sentences[1:]:
    sent = sent.lower()
    sent = re.sub(r'[^\w\s]','',sent)
    words = sent.split()
    anaphora_record = list()
    for i, j in zip(words, last_sent):
        if i == j :
            anaphora_record.append(i)
        else:
            break
    if len(anaphora_record) > 0:
        anaphora.append(anaphora_record)
    last_sent = words;

In [219]:
for sent in sentences:
    part = re.split('[,:;]', sent)
    first_part = part[0] 
    first_part = first_part.lower()
    first_part = re.sub(r'[^\w\s]','',first_part)
    words = first_part.split()
    words = list(filter(('and').__ne__, words))
    words = list(filter(('').__ne__, words))
    last_part = words
    for i in part[1:]:
        i  = i.lower()
        i = re.sub(r'[^\w\s]','',i)
        words = i.split()
        words = list(filter(('and').__ne__, words))
        words = list(filter(('').__ne__, words))
        anaphora_record = list()
        for i, j in zip(words, last_part):
            if i == j :
                anaphora_record.append(i)
            else:
                break
        if len(anaphora_record) > 0:
            anaphora.append(anaphora_record)
        last_part = words;
        
        
            
        
    
    

In [220]:
print(anaphora)

[]


In [221]:
print("Number of anaphora candidates: ", len(anaphora))

Number of anaphora candidates:  0


Epistrophe (Level: Sentence and punctuation) 
Example for the randomly chosen speech

In [222]:

first_sent = sentences[0] 
first_sent = first_sent.lower()
first_sent = re.sub(r'[^\w\s]','',first_sent)
last_sent = first_sent.split()
epistrophe = list()
for sent in sentences[1:]:
    sent = sent.lower()
    sent = re.sub(r'[^\w\s]','',sent)
    words = sent.split()
    epistrophe_record = list()
    for i, j in zip(words[::-1], last_sent[::-1]):
        if i == j :
            epistrophe_record.append(i)
        else:
            break
    if len(epistrophe_record) > 0:
        epistrophe.append(epistrophe_record[::-1])
    last_sent = words;

In [223]:
for sent in sentences:
    part = re.split('[,:;]', sent)
    first_part = part[0] 
    first_part = first_part.lower()
    first_part = re.sub(r'[^\w\s]','',first_part)
    words = first_part.split()
    words = list(filter(('and').__ne__, words))
    words = list(filter(('').__ne__, words))
    last_part = words
    for i in part[1:]:
        i  = i.lower()
        i = re.sub(r'[^\w\s]','',i)
        words = i.split()
        words = list(filter(('and').__ne__, words))
        words = list(filter(('').__ne__, words))
        epistrophe_record = list()
        for i, j in zip(words[::-1], last_part[::-1]):
            if i == j :
                epistrophe_record.append(i)
            else:
                break
        if len(epistrophe_record) > 0:
            epistrophe.append(epistrophe_record)
        last_part = words;

In [224]:
print(epistrophe)

[]


In [225]:
print("Number of epistrophe candidates: ", len(epistrophe))

Number of epistrophe candidates:  0


Epalanepsis (Level: Sentence and punctuation) 
Example for the randomly chosen speech

In [226]:

first_sent = sentences[0] 
first_sent = first_sent.lower()
first_sent = re.sub(r'[^\w\s]','',first_sent)
last_sent = first_sent.split()
epanalepsis = list()
for sent in sentences[1:]:
    sent = sent.lower()
    sent = re.sub(r'[^\w\s]','',sent)
    words = sent.split()
    epanalepsis_record = list()
    for i, j in zip(words[::-1], last_sent):
        if i == j :
            epanalepsis_record.append(i) 
        else:
            break
    if len(epanalepsis_record) > 0:
        epanalepsis.append(epanalepsis_record[::-1])
    last_sent = words;

In [227]:
for sent in sentences:
    part = re.split('[,:;]', sent)
    first_part = part[0] 
    first_part = first_part.lower()
    first_part = re.sub(r'[^\w\s]','',first_part)
    words = first_part.split()
    words = list(filter(('and').__ne__, words))
    words = list(filter(('').__ne__, words))
    last_part = words
    for i in part[1:]:
        i  = i.lower()
        i = re.sub(r'[^\w\s]','',i)
        words = i.split()
        words = list(filter(('and').__ne__, words))
        words = list(filter(('').__ne__, words))
        epanalepsis_record = list()
        for i, j in zip(words[::-1], last_part):
            if i == j :
                epanalepsis_record.append(i)
            else:
                break
        if len(epanalepsis_record) > 0:
            epanalepsis.append(epanalepsis_record)
        last_part = words;

In [228]:
print(epanalepsis)

[]


In [215]:
print("Number of epanalepsis candidates: ", len(epanalepsis))

Number of epanalepsis candidates:  0
