# What we're basically doing:

1. Loading a bunch of "earnings call" pdfs containing financial sentences, segmenting them to sentences.

2. Replacing words that are contained in lexicons/dictionaries witht their lexicon title throughout all sentences.
    (example: "profits gone up 3% last quarter." -> " LagInd gone dir:up 3% last quarter.")
    
3. Training a word2vec embedding utilizing these transformed sentences(we feed it by paragraphs)

4. Built a function that transforms a 2d matrix of embedding(representing a sentence), to a 1d vector(by averaging throughout all the word vectors). This way we obtain a sentence vector(a vector representing every sentence).

5. We need these sentence vectors as it allows us to classify per sentence basis, and that it is convenient for us to layer various downstream classifiers on top of it as they now in a very structured manner.


# Importing basic libraries 

In [1]:
import pandas as pd
import string
import os
import re

# Loading tagging dictionaries

In [2]:
#directionality
labelled_data = []
labelled_direction = []    
with open("resources/tagging_lexicons/Directionality.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()   
for pairs in labelled_data:
    labelled_direction += [(pairs[:-1].strip().split(" ")[0].strip(),pairs[:-1].strip().split(" ")[1].strip())]

In [3]:
#positive
labelled_data = []
labelled_positive = []    
with open("resources/tagging_lexicons/LM_positive.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()   
for words in labelled_data:
    labelled_positive += [words[:-1].strip()]
    labelled_positive = [x.lower() for x in labelled_positive]
labelled_positive = sorted(labelled_positive) 

In [4]:
#negative
labelled_data = []
labelled_negative = []    
with open("resources/tagging_lexicons/LM_negative.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()   
for words in labelled_data:
    labelled_negative += [words[:-1].strip()]
    labelled_negative = [x.lower() for x in labelled_negative]
labelled_negative = sorted(labelled_negative) 

In [5]:
#leading
labelled_data = []
labelled_leading = []    
with open("resources/tagging_lexicons/Leading Indicators.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()   

for words in labelled_data:
    labelled_leading += [words[:-1].strip()]
labelled_leading = sorted(labelled_leading) 

In [6]:
#lagging
labelled_data = []
labelled_lagging = []    
with open("resources/tagging_lexicons/Lagging Indicators.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()   

for words in labelled_data:
    labelled_lagging += [words[:-1].strip()]
labelled_lagging = sorted(labelled_lagging)

In [7]:
#uncertainty
labelled_data = []
labelled_uncertainty = []    
with open("resources/tagging_lexicons/LM_uncertainty.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()
for words in labelled_data:
    labelled_uncertainty += [words[:-1].strip().split(" ")]
final_labelled_uncertainty = []    
for i in labelled_uncertainty:
    final_labelled_uncertainty += i
    
final_labelled_uncertainty = [x.lower() for x in final_labelled_uncertainty][:-1]

In [8]:
#constraining
labelled_data = []
labelled_constraining = []    
with open("resources/tagging_lexicons/LM_constraining.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()
for words in labelled_data:
    labelled_constraining += [words[:-1].strip().split(" ")]
final_labelled_constraining = []    
for i in labelled_constraining:
    final_labelled_constraining += i
final_labelled_constraining = [x.lower() for x in final_labelled_constraining]

In [9]:
#Modal
labelled_data = []
labelled_modal = []    
with open("resources/tagging_lexicons/modal.txt", 'r', encoding="ISO-8859-1") as f:
    labelled_data = f.readlines()   
for pairs in labelled_data:
    labelled_modal += [(pairs[:-1].strip().split(" ")[0].strip().lower(),pairs[:-1].strip().split(" ")[1].strip())]

In [10]:
word_lists_dict = {'LM_pos' :labelled_positive,'LM_neg' : labelled_negative ,'LagInd' : labelled_lagging ,'LeadInd' : labelled_leading ,'dir:UP' : [x[0] for x in labelled_direction if x[1] == 'UP'],'dir:DOWN': [x[0] for x in labelled_direction if x[1] == 'DOWN'], 'constrain' : final_labelled_constraining,'uncertain' : final_labelled_uncertainty,'mod:weak':[x[0] for x in labelled_modal if x[1] == 'WEAK'],'mod:medium': [x[0] for x in labelled_modal if x[1] == 'MEDIUM'],'mod:strong' : [x[0] for x in labelled_modal if x[1] == 'STRONG'] }

In [11]:
# need to modify exisitng dictionary for new tagging function
def new_tag_data_structure(tags_dict):
    new_tags = {}
    for category in tags_dict:
        temp_dict = {}
        first_words_list = [i.split(' ') for i in  tags_dict[category]]
        first_words_set = set([i[0] for i in  first_words_list])
        for i in first_words_set:
            for j in first_words_list:
                if j[0] == i:
                    if len(j) == 1:
                        temp_dict[i] = [[]]
                    else:
                        if i not in temp_dict:
                            temp_dict[i] = []
                        temp_dict[i].append(j[1:])
        new_tags[category] = temp_dict
    return new_tags

In [12]:
word_lists_dict = new_tag_data_structure(word_lists_dict)

In [15]:
# dumping lexicons on pkl format
import pickle
#dumping apriori rules in pkl format
pickling_on_ = open("resources/pickle/word_list_dict.pkl","wb")
pickle.dump(word_lists_dict, pickling_on_)
pickling_on_.close()

## Importing Libraries for Sentence2Vec Training

In [15]:
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas(desc="progress-bar")
from gensim.models import Doc2Vec
from sklearn import utils
from sklearn.model_selection import train_test_split
import gensim
import re
import string
import spacy
import operator
from ml_models.pdf_parser import *
from ml_models.pre_processing import *
nlp = spacy.load("en_core_web_sm")

## Load real Financial Earnings Call: Training Data for Sent2Vec

link to google drive containing pdfs:
https://drive.google.com/drive/folders/19t-40BOKHhtgZcGYJKbewygx0YF-vo2n?usp=sharing

In [16]:
## Copy all the working pdf files on the drive to "app/resources/train_pdfs" folder(create this folder)
import os
raw_parsed_output = []
for filename in os.listdir("resources/train_pdfs/"):
    if filename.endswith(".pdf"): 
        print(filename)
        raw_parsed_output.append(pdf_parse(os.path.join("resources/train_pdfs/", filename)))

BNS-Q3F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Our first question comes from Meny Grauman with Cormark Securities
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(co

ERROR:root:Cannot extract name and title from A [ph] Thanks (41:37).
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Our next question comes from Scott Chan with Canaccord Genuity
Traceback (most recent call last):
  File "/

TD-Q4F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll take our first question from Gabriel Dechaine. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll take our first question from Gabriel Dechaine. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, 

ERROR:root:Cannot extract name and title from Operator: [Operator Instructions] We'll take our next question from Doug Young. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Ladies and gentlemen
Traceback (m

RY-Q3F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you, Mr. Hughes. [Operator Instructions] Our first question is from Robert Sedran with CIBC. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
 

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Nick Stogdill with Credit Suisse. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next questio

TD-Q1F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] And we'll take our first question from Ebrahim Poonawala with Bank of America Merrill Lynch
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_

ERROR:root:Cannot extract name and title from Operator: We'll go next to Sumit Malhotra with Scotia Capital.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: We'll go next to Robert Sedran with CIBC
Traceback (most recent cal

ERROR:root:Cannot extract name and title from Operator: That does conclude today's conference. We thank you for your participation
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: That does conclude today's conference. We thank you for your participation.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_t

BMO-Q1F18 copy.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Nick Stogdill with Credit Suisse. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Nick Stogdill with Credit Suisse. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. We have one more question in queue. The last question will be from Mario Mendonca with TD Securities. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title 

BNS-Q4F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take our first question from Robert Sedran of CIBC Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take our first question from Robert Sedran of CIBC Capital Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: We will now take our next question from Scott Chan of Canaccord Genuity. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: We will now take our next

TD-Q3F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] And we'll first hear from Ebrahim Poonawala of Bank of America
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role

ERROR:root:Cannot extract name and title from Operator: Next we'll hear from Doug Young from Desjardians Capital Markets.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Next we'll hear from Sumit Malhotra of Scotia Capital


CM Q1-F19.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] The first question is from John Aiken with Barclays. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] The first question is from John Aiken with Barclays. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, 

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. I would now like to turn the meeting back over to Victor
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. I would now like to turn the meetin

RY-Q4F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] The first question is from Ebrahim Poonawala from BoA Merrill Lynch
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Gabriel Dechaine from National Bank Financial. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The next

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. It is from – the next question
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. It is from – the next question, Nigel D'Souza from Veritas In

CM Q1-F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from Ebrahim Poonawala with Bank of America Merrill Lynch. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models

ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Sumit Malhotra with Scotia Capital. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Sumit Malhotra with Scotia Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content

ERROR:root:Cannot extract name and title from Operator: Thank you. This concludes our question-and-answer session. I would now like to turn the meeting back to over to Mr. Dodig
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. This concludes our question-and-answer session. I would now like to turn the meeting back to over to Mr. Dodig.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/

BMO-Q2F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] Thank you for your patience. The first question is from Meny Grauman from Cormark Securities. Please go ahead
Traceback (most recent call 

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Steve Theriault from Eight Capital. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Steve Theriault from Eight Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content

RY-Q1F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone line. [Operator Instructions] Our first question is from Sumit Malhotra from Scotiabank. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha

ERROR:root:Cannot extract name and title from Operator: Thank you. Our following question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our following question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has now ended. Please disconnect your lines at this time
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has n

CM Q4-F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from Gabriel Dechaine with National Bank Financial. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Oper

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Sohrab Movahedi with BMO Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has now ended. Please disconnect your lines at this time and we thank you for your participation
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from O

BNS-Q1F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. The first question comes from Gabriel Dechaine
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The first question comes from Gabriel Dechaine, National Bank Financial. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analys

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: It's from Doug Young in Desjardins Capital Markets
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: It's from Doug Young in Desjardins Capital Markets.
Traceback (

CM Q3-F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError:

ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Sohrab Movahedi with BMO Capital Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next ques

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has now ended. Please disconnect your lines at this time
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has n

BMO-Q3F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines [Operator Instructions]  The first question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines [Operator Instructions]  The first question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Meny Grauman from Cormark Securities. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has now ended. Please disconnect your lines at this time. And we thank you for your participation
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from 

TD-Q2F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll now take a question from Meny Grauman from Cormark Securities
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll now take a question from Meny Grauman from Cormark Securities.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in

ERROR:root:Cannot extract name and title from Operator: We'll now take a question from Sohrab Movahedi from BMO Capital Markets
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: We'll now take a question from Sohrab Movahedi from BMO Capital Markets.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, e

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method


BMO-Q1F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from Sumit Malhotra from Scotia Capital. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from Sumit Malhotra from Scotia Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Nick Stogdill from Credit Suisse. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is 

BMO-Q4F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initia

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is Mario Mendonca with TD Securities. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is

BNS-Q2F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take our first question from Robert Sedran from CIBC Capital Management. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take our first question from Robert Sedran from CIBC Capital Management. Please go ahead, your line is open.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/m

ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question comes from Mario Mendonca. Please go ahead your line is open.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question 

CM Q2-F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError:

ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Nigel D'Souza with Veritas Investment. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: I do apologize
Traceback (m

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Victor G. Dodig President, Chief Executive Officer & Director, Canadian Imperial Bank of Commerce Thanks very much, operator. So, let me just close with a few remarks. Our strong results this quarter reflect the continuing success of our client-focused strategy and our ability to generate sustainable organic growth. I want to emphasize that. We're going to build on this momentum going forward with our overriding focus on put

RY-Q2F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you, Mr. Hepworth. We will now take questions from the telephone lines. [Operator Instructions] Our first question is from Ebrahim Poonawala with Bank of America Merrill Lynch. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309,

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Scott Chan with Canaccord Genuity. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next questi

ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our next question is from Nigel D'Souza with Veritas Investment. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Tha

BMO-Q1F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Nick Stogdill with Credit Suisse. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Nick Stogdill with Credit Suisse. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. We have one more question in queue. The last question will be from Mario Mendonca with TD Securities. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title 

TD Q3-F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] And we'll first hear from Ebrahim Poonawala of Bank of America
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role

ERROR:root:Cannot extract name and title from Operator: Next we'll hear from Doug Young from Desjardians Capital Markets.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Next we'll hear from Sumit Malhotra of Scotia Capital


BMO-Q1F19.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from Robert Sedran with CIBC Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from Robert Sedran with CIBC Capital Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_mod

ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Steve Theriault with Eight Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question i

BMO-Q4F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'None

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Nick Stogdill from Credit Suisse. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is 

ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has now ended. Please disconnect your lines at this time. And we thank you for your participation
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has now ended. Please disconnect your lines at this time. And we thank you for your participation.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisd

CM Q2-F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] The first question is from John Aiken of Barclays. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] The first question is from John Aiken of Barclays. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in q

ERROR:root:Cannot extract name and title from Operator: Thank you. The following question is from Doug Young of Desjardins Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The following qu

ERROR:root:Cannot extract name and title from Operator: The conference has now ended. Please disconnect your lines at this time. We thank you for your participation.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method


CM Q3-F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] And the first question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] And the first question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davista

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Sumit Malhotra from Scotia Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question i

ERROR:root:Cannot extract name and title from Victor G. Dodig President, Chief Executive Officer & Director, Canadian Imperial Bank of Commerce Thank you very much, operator, and thanks everyone for being on the call. One hour and 15 minutes, I think that's a record for us, at least a recent record. Before we wrap, I wanted to just do a couple of things. I wanted to announce that we're going to hold our next Investor Day on the 13th of December in Toronto. We look forward to this opportunity to introduce you to our new leadership team. And what we'd like to do is provide you with a perspective on what we told you almost two and a half years ago in terms of what we're going to deliver and give you a perspective going forward on what we intend to deliver for you as our shareholders.  And for those of you who are able to join us, we're gearing up for another successful CIBC Run for the Cure on the 1st of October. It's a cause our team across our country and our clients very much are passi

BMO-Q3F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] The first question is from Steve Theriault with Eight Capital. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", l

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Nigel D'Souza with Veritas. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Nigel D'Souza with Veritas. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_an

TD-Q2F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] And we'll take our first question from John Aiken from Barclays
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in rol

ERROR:root:Cannot extract name and title from Operator: And we'll take our next question from Sumit Malhotra with Scotia Capital.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: [Operator Instructions] We'll take our next qu

ERROR:root:Cannot extract name and title from Greg Braca Chief Operating Officer, TD Bank [indiscernible] (53:33)
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Bharat B. Masrani Group President, CEO & Non-Independent Director, The T

BNS-Q4F17.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Certainly
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError:

ERROR:root:Cannot extract name and title from Operator: Next question is from Mario Mendonca from TD Securities
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Next question is from Mario Mendonca from TD Securities.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users

TD-Q3F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] And we will hear first from Ebrahim Poonawala of Bank of America Merrill Lynch
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", l

ERROR:root:Cannot extract name and title from Operator: And we will hear next from the line of Sumit Malhotra with Scotia Capital. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: And we will hear next from the line of Sumit Malhotra with Scotia Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_con

RY-Q4F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] Our first question is from Robert Sedran with CIBC World Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] Our first question is from Robert Sedran with CIBC World Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA 

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Mario Mendonca with TD Securities. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next questi

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you, gentlemen. The conference has now ended. Please disconnect your lines at this time, and we th

CM Q1-F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Meny Grauman from Cormark Securities. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Meny Grauman from Cormark Securities. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA te

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The following question is from Doug Young from Desjardins Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. T

BMO-Q2F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you, sir. [Operator Instructions] First question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, ex

ERROR:root:Cannot extract name and title from Operator: Thank you. The last question for today is from Doug Young from Desjardins Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The last question for today is from Doug Young from Desjardins Capital Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line

RY-Q1F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] We thank you for your patience.  Our first question is from Ebrahim Poonawala from Bank of America Merrill Lynch. Please go ahead
Tracebac

ERROR:root:Cannot extract name and title from Operator: Thank you. Our following question is from Robert Sedran from CIBC Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our following question is from Robert Sedran from CIBC Capital Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_sec

ERROR:root:Cannot extract name and title from Operator: Thank you. We have no further questions registered at this time. I would now like to turn the meeting back over to Mr. McKay.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Davi

CM Q4-F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from John Aiken with Barclays.  Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] Our first question is from John Aiken with Barclays.  Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next question is from Darko Mihelic with RBC Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Our next q

BNS-Q1F18.pdf


ERROR:root:Cannot extract name and title from Operator: Yes. Our first question comes from Robert Sedran of CIBC Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Yes. Our first question comes from Robert Sedran of CIBC Capital Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_conte

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Your next question comes from Nick Stogdill of Credit Suisse. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Your next question comes from Nick S

BNS-Q1F19.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Yes
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min()

ERROR:root:Cannot extract name and title from Operator: Yes, sir. Next question comes from Sohrab Movahedi with BMO Capital Markets.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Yes
Traceback (most recent call last):
  Fi

RY-Q1F19.pdf


ERROR:root:Cannot extract name and title from Operator: Certainly. Thank you. [Operator Instructions] The first question is from Ebrahim Poonawala with Bank of America Merrill Lynch. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Certainly. Thank you. [Operator Instructions] The first question is from Ebrahim Poonawala with Bank of America Merrill Lynch. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "

ERROR:root:Cannot extract name and title from A Please re-queue. Yeah. Thanks.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is from Gabriel Dechaine with National Bank Financial. Please go ahe

ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is a follow-up from Sohrab Movahedi with BMO Capital Markets. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. The next question is a follow-up from Sohrab Movahedi with BMO Capital Markets. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line

ERROR:root:Cannot extract name and title from Operator: Thank you. The conference has now ended. Please disconnect your lines at this time and we thank you for your participation.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method


BNS-Q3F18.pdf


ERROR:root:company initial used as company name to divide sections
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 195, in management_discussion
    [re.search(".*(?=" + company_name + ")", paragraph).group(0).strip()]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:unable to seperate speaker title with text in second format
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 206, in management_discussion
    re.search("^(.*?)" + company_initial, paragraph).group(0).strip()[: -len(company_initial)]]
AttributeError: 'NoneType' object has no attribute 'group'
ERROR:root:Cannot extract name and title from Operator: Thank you. Our first question comes from Meny Grauman from Cormark Securities. Please go ahead. Your line is open
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", l

ERROR:root:Cannot extract name and title from Operator: Our next question comes from Steve Theriault from Eight Capital.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Our next question comes from Scott Chan from Canaccord


TD-Q4F17.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll go first to Meny Grauman with Cormark Securities
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll go first to Meny Grauman with Cormark Securities.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_conte

ERROR:root:Cannot extract name and title from Operator: And we'll go next to Doug Young with Desjardins Capital Markets
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: And we'll go next to Doug Young with Desjardins Capital Markets.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)

RY-Q3F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Ebrahim Poonawala from BoA (sic) [BofA] (00:20:44) Merrill Lynch
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. We will now take questions from the telephone lines. [Operator Instructions] The first question is from Ebrahim Poonawala from BoA (sic) [BofA] (00:20:44) Merrill Lynch, Bank of America. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERR

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. Next question is from Scott Chan from Canaccord Genuity. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. Next question is fr

TD-Q1F18.pdf


ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll take our first question from Ebrahim Poonawala with Bank of America Merrill Lynch
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Thank you. [Operator Instructions] We'll take our first question from Ebrahim Poonawala with Bank of America Merrill Lynch.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdon

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: And we'll take our next question from Steve Theriault with Eight Capital
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: And we'll take our next question from Ste

TD-Q1F19.pdf


ERROR:root:Cannot extract name and title from Operator: Certainly. Thank you. [Operator Instructions] The first question is from Sumit Malhotra with Scotia Capital. Please go ahead
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from Operator: Certainly. Thank you. [Operator Instructions] The first question is from Sumit Malhotra with Scotia Capital. Please go ahead.
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 263, in first_occur
    type_pos = min(first_occuring.items(), key=lambda x: x[1])
ValueError: min() arg is an empty sequence
ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthi

ERROR:root:unble to extract qna information from the QnA text block
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 309, in qna_section
    qna_content += [qna_text_analysis(qna_text, executive_titles)]
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 289, in qna_text_analysis
    company = text_qna[text_qna.find(",") + 1: type_index[1]].strip()
TypeError: slice indices must be integers or None or have an __index__ method
ERROR:root:Cannot extract name and title from Operator: Thank you. This concludes the question-and-answer session. I would now like to turn the meeting back over to Bharat Masrani
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/pdf_parser.py", line 163, in role
    name_title = (text[:min(counter)].strip(), text[min(counter):].strip())
ValueError: min() arg is an empty sequence
ERROR:root:Cannot extract name and title from 

In [17]:
# Number of PDFs going to be used to train the word2vec
len(raw_parsed_output)

45

In [18]:
files = []
sections = []
for file in raw_parsed_output:
    files.append(file)
    sections += [(md['Text'], 'mds') for md in file["management_discussion_section"]]
    sections += [ (i['Question_text'], 'question') for i in file['question_and_answers']]
    sections += [ (j['A_text'], 'answer') for i in file['question_and_answers'] for j in i['Answer']] 
    try:
        sections += [ (file['final_remarks']['Text'], 'cr') ]
    except:
        pass

In [19]:
sections_df = pd.DataFrame(sections, columns = ['text', 'section'])
sections_df.head()

Unnamed: 0,text,section
0,"Thank you, Adam, and good morning, everyone. ...",mds
1,"Thanks, Brian. I will begin on slide 6, which...",mds
2,"Thank you, Sean. I'll start on slide 13. We c...",mds
3,"Thank you, Daniel. I'd like to close by highl...",mds
4,"Thanks, Brian. That concludes our prepared re...",mds


In [20]:
sections_df.shape

(3542, 2)

In [22]:
import pickle
training_s2v_dataset = sections_df['text']
pickling_on_ = open("resources/pickle/sent2vec_training_data.pkl","wb")
pickle.dump(training_s2v_dataset, pickling_on_)
pickling_on_.close()

# Training Sent2Vec

In [1]:
pickle_in = open("../resources/pickle/sent2vec_training_data.pkl","rb")
training_s2v_dataset = pickle.load(pickle_in)

In [23]:
from ml_models.sent2vec import Sent2Vec
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

In [24]:
# Default parameters size=50, window=5, min_count=5, workers=4, sg = 1, epochs= 50
s2v = Sent2Vec(size=50, window=7, min_count=2, workers=8, sg = 1, epochs= 10, stopwords = set(stopwords.words('english')), stemmer= PorterStemmer())

In [25]:
# train function trains the model and also saves it to pickle
s2v.train(training_s2v_dataset)


ERROR:root:could not find a match with word in lexicon, checking next lexicon word
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/apriori.py", line 68, in replace_words_with_tags
    if tokenize_text[inner_counter] == word:
IndexError: list index out of range
ERROR:root:could not find a match with word in lexicon, checking next lexicon word
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/apriori.py", line 68, in replace_words_with_tags
    if tokenize_text[inner_counter] == word:
IndexError: list index out of range
ERROR:root:could not find a match with word in lexicon, checking next lexicon word
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/apriori.py", line 68, in replace_words_with_tags
    if tokenize_text[inner_counter] == word:
IndexError: list index out of range
ERROR:root:could not find a match with word in lexicon, checking next lexico

ERROR:root:could not find a match with word in lexicon, checking next lexicon word
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/apriori.py", line 68, in replace_words_with_tags
    if tokenize_text[inner_counter] == word:
IndexError: list index out of range
ERROR:root:could not find a match with word in lexicon, checking next lexicon word
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/apriori.py", line 68, in replace_words_with_tags
    if tokenize_text[inner_counter] == word:
IndexError: list index out of range
ERROR:root:could not find a match with word in lexicon, checking next lexicon word
Traceback (most recent call last):
  File "/Users/davistanugraha/Desktop/getthisdone/ml_models/apriori.py", line 68, in replace_words_with_tags
    if tokenize_text[inner_counter] == word:
IndexError: list index out of range
ERROR:root:could not find a match with word in lexicon, checking next lexico

In [26]:
s2v.transform_text("""That's liquidity we hold for a rainy day. So it's just excess liquidity we keep from a risk management perspective.
I wouldn't expect that to grow at a higher rate than the overall balance sheet going forward.
Thank you, operator, and thank you, all, for your questions. I will close with a quick comment to reinforce the confidence that we've got in the momentum in all of our businesses. The bank is strong, diversified, and growing. And as you heard today, we're executing against the strategies that position us to grow in our target markets which is leading to accelerated growth and improving efficiency. So we're on track to achieve the financial targets we set out at the beginning of the year and sustainable growth over the longer term.  Thank you, everyone, for your time on the call today we look forward to speaking to you again at our Investor Day in October.""")

array([-0.11223785, -0.11779752, -0.3735351 , -0.28228015,  0.17414626,
        0.11614864,  0.03636776,  0.32569128, -0.30084515,  0.21774977,
       -0.04691575,  0.1563455 , -0.00620037, -0.20708863, -0.06807388,
       -0.1224912 , -0.2824294 , -0.09484794, -0.21068986,  0.12078916,
        0.02331301,  0.19357456,  0.04604859,  0.1278233 ,  0.24560714,
       -0.2698469 ,  0.06597026, -0.16282628, -0.21102527, -0.23552911,
       -0.11553819,  0.3524071 , -0.0782838 , -0.02429141, -0.05367053,
       -0.1419052 , -0.5174197 , -0.19730406,  0.18576044,  0.2095271 ,
        0.28321105, -0.03676943,  0.20812783, -0.16983698, -0.3047352 ,
       -0.30179802,  0.05991584, -0.21690767,  0.11209837, -0.08246278],
      dtype=float32)

In [None]:
#storing sentence2vec model
#this sentence2vec saved class will also be used for word2vec transformation for the CNN method.
pickling_on_ = open("../resources/pickle/sent2vec.pkl","wb")
pickle.dump(s2v, pickling_on_)
pickling_on_.close()