# Import Dependancies

In [1]:
import os, re
import pandas as pd
from summarise.io import doc_to_csv
from summarise.io import pdf_to_text_file
from summarise.preprocess import clean_text
from summarise.visualise import highlight_str

In [None]:
# Helper functions
def write_text_to_file( text, filename ):
    """Writes text to file"""
    with open( filename, "w", encoding="utf-8") as f:    
        f.write( text )
        f.close()

# Import Raw Data

In [9]:
# Define constants
ROOT        = os.path.abspath('') #current directory
DATA_FOLDER = os.path.join( ROOT, 'data')
ALL_DATA    = os.path.join( DATA_FOLDER, 'all_data.csv' )
OUTPUT_FOLDER   = os.path.join( ROOT, 'output')
if os.path.isfile( ALL_DATA ):
    # Read pre-processed data
    df          = pd.read_csv( ALL_DATA )
else:
    # Create from scratch
    filenames   = pdf_to_text_file( DATA_FOLDER )
    df          = doc_to_csv( filenames, ALL_DATA )
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Text
0,0,cyborg-supervision-speech-by-james-proudman.txt,\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \...
1,1,managing-machines-the-governance-of-artificial...,\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \...
2,2,supervisor-centred-automation-the-role-of-huma...,\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \...


# Clean

In [3]:
# Clean documents
documents   = df['Text'].values.tolist()
cleaned     = [clean_text(doc) for doc in documents]

# Summarise

In [13]:
from summarizer import Summarizer
def summarise_text(text):
    model   = Summarizer()
    result  = model(text, min_length=60)
    return ''.join(result)
# Summarise documents
summarised = [ summarise_text(doc) for doc in cleaned ]
print(summarised[0])

Cyborg supervision – the application of advanced analytics in prudential supervision Speech given by James Proudman Executive Director, UK Deposit Takers Supervision Workshop on research on bank supervision Bank of England I am grateful to Sian Besley, David Bholat, Charlotte Bull, Stephen Denby, Ryan Lovelock, Clair Mills, Philip Sellar, Pete Thomas and Sam Woods for their assistance in preparing these remarks and conducting background research and analysis. Enabling a machine to teach itself to recognise a face requires sophisticated algorithms that can learn from data. In this speech I want to explore the impact of AI and advanced analytics more broadly, on the safety and soundness of the firms we supervise at the PRA, and how we are starting to apply such technology to the supervision of firms. Changing the nature of the risks we supervise Like many other firms, banks are looking to harness the power and speed of AI. No hard data on industry-wide uptake are available but intelligen

In [11]:
# Save results to file
def write_text_to_file( text, filename ):
    """Writes text to file"""
    with open( filename, "w", encoding="utf-8") as f:    
        f.write( text )
        f.close()
for i, doc in enumerate(summarised):    
    filename    = os.path.join( OUTPUT_FOLDER, "BERT_summary{:02d}.txt".format(i) )
    write_text_to_file( doc, filename )
print('Written files')

Written files


# Visualise Result

In [14]:
import spacy
nlp     = spacy.load("en_core_web_lg")
doc     = nlp(summarised[0])

In [15]:
sentences = list(doc.sents)

In [16]:
text_highlighted = cleaned[0]
for sentence in sentences:    
    text_highlighted = highlight_str( text_highlighted, sentence.text )
print(text_highlighted[:10000])

[1;30m[1;43mCyborg supervision – the application of advanced analytics in prudential supervision Speech given by James Proudman Executive Director, UK Deposit Takers Supervision Workshop on research on bank supervision Bank of England[0m[0m [1;30m[1;43mI am grateful to Sian Besley, David Bholat, Charlotte Bull, Stephen Denby, Ryan Lovelock, Clair Mills, Philip Sellar, Pete Thomas and Sam Woods for their assistance in preparing these remarks and conducting background research and analysis.[0m[0m I. Introduction II. Recognising faces comes instinctively to humans. Until fairly recently, however, it proved beyond the ability of computers. Advances in artificial intelligence (AI) - the use of a machine to simulate human behaviour – and its subset, machine learning (ML) – in which a machine teaches itself to perform tasks – are now making facial recognition software much more widely available. You might even use it to access your bank account. Because it is so easy for us but so har