In [1]:
# Run on first instance to install required libraries
%pip install smart_open minecart textract-trp

Note: you may need to restart the kernel to use updated packages.


In [2]:
import time 
import re
import os
import trp
import boto3
import minecart
import json
import logging 

import numpy as np
import pandas as pd

from smart_open import open
from sagemaker.session import Session

# AWS Asynchronous Textract Script (requesting Job)
**Content modified from Amazon AWS Textract repository (refer to [URL](https://github.com/aws-samples/amazon-textract-code-samples/blob/master/python/12-pdf-text.py) below)** 

In [3]:
def startJob(s3BucketName:str, objectName:str) -> str:
    """
    Starts a Textract job on AWS server 
    """
    # initialize return and client object
    response = None                         
    client = boto3.client('textract')
    
    # issue response to AWS to start Textract job for table analysis 
    response = client.start_document_analysis(
        DocumentLocation={
            'S3Object': {
                'Bucket': s3BucketName,     # location of data to be read from s3 bucket 
                'Name': objectName}},       # file name to be read from Textract  
        FeatureTypes=['FORMS', 'TABLES']    # selecting FORMS (key-values) and TABLES from the OCR
    )
    
    # return response job ID for service
    return response["JobId"]

In [4]:
def isJobComplete(jobId:str) -> str:
    """
    Tracks the completion status of the Textract job when queued
    """
    # allow for interal sleep timer (efficiency)
    time.sleep(1)                               
    
    client = boto3.client('textract')
    response = client.get_document_analysis(JobId=jobId)
    
    # job-status of the response object 
    status = response["JobStatus"]                        
    print("Job status: {}".format(status))
    
    # if job still running check current status every 5 seconds
    while(status == "IN_PROGRESS"):
        
        # time lag before reporting status
        time.sleep(5)                                         
        response = client.get_document_analysis(JobId=jobId)
        
        # job-status of the response object
        status = response["JobStatus"]                        
        print("Job status: {}".format(status))
    
    return status

In [5]:
def getJobResults(jobId:str) -> list:
    """
    Returns the contents of the Textract job, after job status is completed
    """
    # initialize list object to track pages read
    pages = []                    

    client = boto3.client('textract')
    response = client.get_document_analysis(JobId=jobId)
    
    # add first page response to list (length of pages will be arbitrary) 
    pages.append(response)      
    print("Resultset page recieved: {}".format(len(pages)))
    
    # if NextToken present we have a pointer to page (e.g. Response -> Page) 
    nextToken = None
    if('NextToken' in response):
        nextToken = response['NextToken']
    
    # iterate through the pages and append to response figure (assuming nextToken not None)
    while(nextToken):
        response = client.get_document_analysis(JobId=jobId, NextToken=nextToken)
        pages.append(response)
        print("Resultset page recieved: {}".format(len(pages)))
        
        # move along linked-list for presence of NextToken response
        nextToken = None
        if('NextToken' in response):
            nextToken = response['NextToken']
    
    # return amalgamation of all page responses 
    return pages

In [6]:
def runJob(bucket:str, key:str):
    """
    Function designed to call an AWS Textract job (implements helper function above)
    """
    jobId = startJob(bucket, key)   
    print("Started job with id: {}".format(jobId))

    # if job is complete on AWS return page responses 
    if(isJobComplete(jobId)):
        response = getJobResults(jobId)
        
    return response

# AWS Extraction Scripts (Key-Value Pairs)
**The content was modified from AWS to extract key-value pairs in form documents from Block objects that are stored in a map. (refer to [URL](https://docs.aws.amazon.com/textract/latest/dg/examples-extract-kvp.html))**

In [7]:
def find_value_block(key_block, value_map):
    """
    Retrieving value block from AWS textract job, this contains the value text 
    """
    # iterate through the key blocks in the FORM relationships (should have a VALUE and CHILD type, n=2)
    for relationship in key_block['Relationships']:
        
        # if our key block object type is a VALUE we examine the relationship ID
        # NOTE WE SHOULD HAVE ONLY ONE ID FOR THE VALUE RELATIONSHIP TYPE
        if relationship['Type'] == 'VALUE':
            
            # singular ID item stored in list object (return value block object)
            for value_id in relationship['Ids']:
                value_block = value_map[value_id]
            
    # return all corresponding value series
    return value_block

In [8]:
def get_kv_relationship(key_map, value_map, block_map):
    """
    Retrieving the Key-Value relationship from FORM OCR Textract 
    """
    # initialize key-map dictionary for lineitems and corresponding accounting values
    key_value_map = {}
    
    # unpack the key_map to retrieve the block id and key names
    for block_id, key_block in key_map.items():

        # retrieve value block provided the key_block from each block id
        value_block = find_value_block(key_block, value_map)

        # get text value from key and value blocks
        key = get_text(key_block, block_map)
        val = get_text(value_block, block_map)
        
        # map the key and value pairs (e.g. {'Total Assets':'$ 189,232'})
        key_value_map[key] = val
        
    return key_value_map

In [9]:
def get_text(result, blocks_map):
    """
    Retrieving text values from given block object
    """
    # initialize container for text
    text = ''
    
    # if relationships header exists we can extract CHILD header
    if 'Relationships' in result:
        
        # relationship maps to a list (iterate through to reveal a dictionary)
        # e.g. 'Relationships' : [{'Type' : 'CHILD', 'Ids': ['e2b3b12f-ebb7-4f6e-914f-97b315672530']}]
        for relationship in result['Relationships']:
            
            # if relationship type is CHILD we explore job-id (indicates good fit)
            if relationship['Type'] == 'CHILD':
                
                # iterate through Ids list
                for child_id in relationship['Ids']:
                    
                    # select corresponding CHILD_ID from block map, this is sub-dictionary
                    word = blocks_map[child_id]
                    
                    # if block type is a word then we append with a space
                    if word['BlockType'] == 'WORD':
                        text += word['Text'] + ' '
                        
                    # if block type is a selection element (e.g. an option button/mark)
                    # note we treat these cases with an X to denote an optional field 
                    if word['BlockType'] == 'SELECTION_ELEMENT':
                        if word['SelectionStatus'] == 'SELECTED':
                            text += 'X '    
    
    # return string corresponding with word 
    return text

# OCR Wrapper Functions
**The scripts perform an OCR job from AWS Textract, and returning well formated data**

In [10]:
def trp2df(table:trp.Table) -> pd.DataFrame:
    """
    Function designed to convert a trp table into a dataframe Complexity -> O(n^2) approx.
    ------------------------------------------------------------------------------------------
    Input
        :param table: (type trp.Table)
            A trp table object parsed from a pdf using AWS Textract  
    
    Output
        :return: type pandas.DataFrame
            A DataFrame object that is constructed by deconstructed a Textract trp table
    """
    N = len(table.rows)               # number of rows in table
    M = len(table.rows[0].cells)      # number of columns in table
    arr = [0]*N                       # initialize matrix container
    
    # iterate through each row within the provided table
    for row in np.arange(N):
        
        # strip the text from the cell references to construct (N X M) matrix
        arr[row] = [table.rows[row].cells[col].text.strip() for col in np.arange(M)]    # move column-wise to get text
        
    return pd.DataFrame(arr)

In [11]:
def readTable(response:list) -> tuple:
    """
    Function to transform AWS Textract object to a dataframe, by searching for tables
     ------------------------------------------------------------------------------------------
    Input
        :param response: (type list)
            An AWS Textract response object corresponding to pages of a given document page 
    
    Output
        :return: type tuple
            A (3x1) tuple is returned, storing the concated dataframe at the first index, and the accompanying 
            trp page objects for where the balance sheet was determined to reside at the second index
    """
    
    catDF = []          # in the event multiple tables detected on one page (concat them)
    page_series = []    # keep track of page objects where balance sheet was flagged
    page_nums = []
    page_count = 0
    
    # format the Textract response type 
    doc = trp.Document(response)
    
    # iterate through document pages
    for page in doc.pages:
        
        # itterate through page tables
        for table in page.tables: 
            
            # convert trp-table into dataframe object
            df = trp2df(table)
            
            # remove columns that are completely empty
            empty_cols = [col for col in df.columns if (df[col] == '').all()]
            df = df.drop(empty_cols, axis=1)
  
            # number of columns in dataframe
            n = df.columns.size
            
            # reset the column names (avoid the column names)
            df.columns = np.arange(n)
            
            ##############################################################
            #                           NOTES
            #          a good dataframe should have 2-3 columns
            #      anything more or less is a reading error we ignore
            ##############################################################
            
            # if the dataframe has more than 3 columns then we most likley have an issue in parsing
            if n > 3:
                return None
            
            elif n > 1:
                
                ##############################
                # Balance Sheet Assummptions
                ##############################
                
                # this is the column with all line items (e.g. Cash, Total Assets, Total Liabilites)
                lineIndex = df.columns[0]

                # check for the word "cash" in a string at the begining, ignoring case sensitivity (asset check)
                assetCheck = df[lineIndex].str.contains('^Cash', regex=True, flags=re.IGNORECASE)

                # check for the word "Liabilities" in a string at the end, ignoring case sensitivity (liability check)
                debtCheck1 = df[lineIndex].str.contains('Liabilities$|^Liabilities', regex=True, flags=re.IGNORECASE)
                debtCheck2 = df[lineIndex].str.contains('Liability$|^Liability', regex=True, flags=re.IGNORECASE)
                
                # check for the presence of $ sign, we assume the balance sheet items should have at least one $ sign
                # this check is used to avoid reading the table of contents, which was flagged in prior reads
                dollarCheck = df[df.columns[1]].str.contains('\$[^\]]+', regex=True, flags=re.IGNORECASE)
                
                ##############################
                # Balance Sheet Determination
                ##############################
                
                # check if the key words have been found 
                check1 = df[assetCheck | debtCheck1 | debtCheck2].empty      # check for line item terms
                check2 = df[dollarCheck == True].empty                       # check for presence of '$' sign  
                check3 = df[debtCheck1 == True].empty                        # debt check for Liabilities
                check4 = df[debtCheck2 == True].empty                        # debt check for Liability 
                
                # if either asset term or liability term is found, with a $ sign we append the dataframe
                if not check1 and not check2:
                    
                    # we append pages since asset and liablility tables are often seperate
                    # there is no loss of generality if asset and liability terms are in one table
                    catDF.append(df)                
                    
                    # we want to keep track of pages that have been deemed as balance sheet
                    if page not in page_series:
                        page_series.append(page)   # only append if page isn't already recorded
                        page_nums.append(page_count)
                        
                    if not check3 or not check4:
                        # if liability table was found on the first iteration we simply concat data frames 
                        return (pd.concat(catDF), page_series, page_nums)
                    
        page_count += 1
        

In [12]:
def readPNG(pages:list, png_path:str, bucket='ran-s3-systemic-risk') -> tuple:
    """
    Function to transform AWS Textract object to a dataframe, by searching for tables
     ------------------------------------------------------------------------------------------
    Input
        :param response: (type list)
            
    
    Output
        :return: type tuple
          
    """
    subfolder = png_path.split('/')[-2]      # subfolder where PNG files are stored
    
    # construct PNG directories with relevant pages
    textract_paths = [png_path + subfolder + '-p{}.png'.format(idx) for idx in pages]
    
    catDF = []          # in the event multiple pages we concat them
    
    # path iterates through each png image matching the page numbers found in PDFs
    for path in textract_paths:
        
        try:
            # temporary data frame object for balance sheet information
            res = runJob(bucket, path)
            
            # if Textract job did not fail we continue extraction
            if res[0]['JobStatus'] != 'FAILED':

                # format the Textract response type 
                doc = trp.Document(res)

                # iterate through document pages
                for page in doc.pages:
                    
                    # itterate through page tables
                    for table in page.tables: 

                        # convert trp-table into dataframe object
                        df = trp2df(table)
                        
                        # remove columns that are completely empty
                        empty_cols = [col for col in df.columns if (df[col] == '').all()]
                        df = df.drop(empty_cols, axis=1)

                        # number of columns in dataframe
                        n = df.columns.size

                        # reset the column names (avoid the column names)
                        df.columns = np.arange(n)
                        
                        ##############################################################
                        #                           NOTES
                        #          a good dataframe should have 2-3 columns
                        #      anything more or less is a reading error we ignore
                        ##############################################################

                        # if the dataframe has more than 3 columns then we most likley have an issue in parsing
                        if n > 3:
                            return None

                        elif n > 1:

                            ##############################
                            # Balance Sheet Assummptions
                            ##############################

                            # this is the column with all line items (e.g. Cash, Total Assets, Total Liabilites)
                            lineIndex = df.columns[0]

                            # check for the word "cash" in a string at the begining, ignoring case sensitivity 
                            assetCheck = df[lineIndex].str.contains('^Cash', regex=True, flags=re.IGNORECASE)

                            # check for the word "Liabilities" in a string at the end, ignoring case sensitivity 
                            debtCheck1 = df[lineIndex].str.contains('Liabilities$|^Liabilities', 
                                                                    regex=True, flags=re.IGNORECASE)
                            debtCheck2 = df[lineIndex].str.contains('Liability$|^Liability', 
                                                                    regex=True, flags=re.IGNORECASE)

                            # check for the presence of $ sign, we assume the balance sheet items should have 
                            # this check is used to avoid reading the table of contents, which was flagged in prior reads
                            dollarCheck = df[df.columns[1]].str.contains('\$[^\]]+', regex=True, flags=re.IGNORECASE)

                            ##############################
                            # Balance Sheet Determination
                            ##############################

                            # check if the key words have been found 
                            check1 = df[assetCheck | debtCheck1 | debtCheck2].empty      # check for line item terms
                            check2 = df[dollarCheck == True].empty                       # check for presence of '$' sign  
                            check3 = df[debtCheck1 == True].empty                        # debt check for Liabilities
                            check4 = df[debtCheck2 == True].empty                        # debt check for Liability 

                            # if either asset term or liability term is found, with a $ sign we append the dataframe
                            if not check1 and not check2:

                                # we append pages since asset and liablility tables are often seperate
                                # there is no loss of generality if asset and liability terms are in one table
                                catDF.append(df)                

                                if not check3 or not check4:
                                    # if liability table was found on the first iteration we simply concat data frames 
                                    return pd.concat(catDF)
        
        # broad exeption to catch Textract parsing errors
        except:
            pass
    
    # default return None
    return None

In [13]:
def readForm(doc_pages:list) -> dict:
    """
    Function to transform AWS Textract object to a dictionary, by searching for key value pairs
    ------------------------------------------------------------------------------------------
    Input
        :param doc_pages: (type list)
            TRP page(s) for a AWS Textract response object corresponding to pages of a given document page 
    
    Output
        :return: type dict
            A python dictionary that maps KEYS (line items) with VALUES (corresponding records) for broker
            dealers balance sheet (e.g. {'Cash and cash equivalents : $ 12,513})
    """
    
    # initializing dictionary maps for KEY and VALUE pairs
    key_map = {}
    value_map = {}
    block_map = {}

    # iterate through document pages
    for page in doc_pages:

        # itterate through page tables
        for block in page.blocks: 

            # store the block id in map to retrive information later
            block_id = block['Id']
            block_map[block_id] = block

            # if Key-value set has been seen we deconstruct each KEY and VALUE map
            if block['BlockType'] == "KEY_VALUE_SET":

                # if KEY is labeled as entity type then we found Key, else we found VALUE
                if 'KEY' in block['EntityTypes']:
                    key_map[block_id] = block
                else:
                    value_map[block_id] = block
    
    # convert block objects to text dictionary map
    return get_kv_relationship(key_map, value_map, block_map)

In [14]:
def readText(doc_pages:list) -> dict:
    """
    Function to transform AWS Textract object to a dictionary of text values and confidence 
    ------------------------------------------------------------------------------------------
    Input
        :param doc_pages: (type list)
            TRP page(s) for a AWS Textract response object corresponding to pages of a given document page
    
    Output
        :return: type dict
            A python dictionary that maps TEXT (line items) with corresponding confidence figures as reported
            by AWS Textract object (e.g. {'Cash and cash equivalents : 99.97891})
    """
    # initializing dictionary maps for text
    text_map = {}
    
    # iterate through document pages
    for page in doc_pages:
        
        # itterate through page tables
        for block in page.blocks: 
            
            # if our block type is a line, we map the line text and confidence
            if block['BlockType'] == "LINE":
                text_map[block['Text']] = block['Confidence']
    
    # return completed text to confidence map
    return text_map

## Extract Balance Sheet information

In [15]:
def textractParse(pdf_path:str, png_path:str, bucket:str) -> dict:
    """
    Function runs a Textract job and saves Balance Sheet information to .csv file in s3 folder 
    """
    errors = ''
    
    # temporary data frame object for balance sheet information
    res = runJob(bucket, pdf_path)
    
    # if Textract job did not fail we continue extraction
    if res[0]['JobStatus'] != 'FAILED':

        # perform OCR and return balance sheet with corresponding page object(s)
        tb_response = readTable(res)           
        
        # checks for type of return, if none then we log an error
        if type(tb_response) == tuple:
            
            # deconstruct the table response tuple into dataframe and page object parts
            df1, page_obj, page_num = tb_response
            print('\nPage number(s) for extraction in PNG are {}\n'.format(page_num))
            
            # try to extract from a PNG (we can still return a None here)
            df2 = readPNG(page_num, png_path)
            
            # provided balance sheet page number we select FORM and TEXT data
            forms_data = readForm(page_obj)      
            text_data = readText(page_obj)        
            
            print('\nTextract-PDF dataframe')
            print(df1)
            
            print('\nTextract-PNG dataframe')
            print(df2)
            
            return (df1, df2, forms_data, text_data, None)
        else:
            error = 'No Balance Sheet found, or parsing error'
            return (None, None, None, None, error)
    else:
        error = 'Could not parse, JOB FAILED'
        return (None, None, None, None, error)

## Main File Execution

In [16]:
if __name__ == "__main__":

    # Amazon Textract client and Sagemaker session
    textract = boto3.client('textract')
    s3 = boto3.client('s3')
    session = Session()
    
    # initiate s3 bucket and corresponding data/output folder
    bucket = 'ran-s3-systemic-risk'
    
    data_png_folder = 'Input/X-17A-5-PNG-SUBSETS/'
    data_pdf_folder = 'Input/X-17A-5-PDF-SUBSETS/'
    
    output_png_folder = 'Output/X-17A-5-PNG-RAW/'
    output_pdf_folder = 'Output/X-17A-5-PDF-RAW/'
    
    temp_folder = 'Temp/'
    
    # csv directory where we store balance sheet information 
    output_png_csvs = np.array(session.list_s3_files(bucket, output_png_folder))
    output_pdf_csvs = np.array(session.list_s3_files(bucket, output_pdf_folder))
    
    # temp directory where JSON files is stored
    temp = np.array(session.list_s3_files(bucket, temp_folder))
    
    # pdf directory where we store the broker-dealer information 
    pdf_files = np.array(session.list_s3_files(bucket, data_pdf_folder))[1:]
    png_files = np.array(session.list_s3_files(bucket, data_png_folder))[1:]
    png_file_directory = list(set((map(lambda x: '/'.join(x.split('/')[:-1]), png_files))))
    
    # ===========================================================================
    # Load in Temp JSON files if present (FORM, TEXT, ERROR)
    # ===========================================================================
    
    if 'Temp/X17A5-FORMS.json' in temp:
        # retrieving downloaded files from s3 bucket
        s3.download_file(bucket, 'Temp/X17A5-FORMS.json', 'temp1.json')
        
        # read data on KEY-VALUE dictionary (i.e Textract FORMS) 
        with open('temp1.json', 'r') as f: forms_dictionary = json.loads(f.read())
        
        # remove local files for JSON
        os.remove('temp1.json')
    else:
        forms_dictionary = {}
        
    if 'Temp/X17A5-TEXT.json' in temp:
        # retrieving downloaded files from s3 bucket
        s3.download_file(bucket, 'Temp/X17A5-TEXT.json', 'temp2.json')
        
        # read data on TEXT-Confidence dictionary
        with open('temp2.json', 'r') as f: text_dictionary = json.loads(f.read())  
            
        # remove local files for JSON
        os.remove('temp2.json')
    else:
        text_dictionary = {}
        
    if 'Temp/ERROR-TEXTRACT.json' in temp:
        # retrieving downloaded files from s3 bucket
        s3.download_file(bucket, 'Temp/ERROR-TEXTRACT.json', 'temp3.json')
        
        # read data on errors derived from Textract
        with open('temp3.json', 'r') as f: error_dictionary = json.loads(f.read()) 
            
        # remove local files for JSON
        os.remove('temp3.json')
    else:
        error_dictionary = {}
    
    # ===========================================================================
    # Perform Textract analysis on PDFs and PNGs
    # ===========================================================================
    
    # e.g. ['Input/X-17A-5-PDF-SUBSETS/42352-2012-02-29-subset.pdf'] otherwise pdf_files (full sample)
    select_sample = ['Input/X-17A-5-PDF-SUBSETS/782124-2014-03-05-subset.pdf', 
                     'Input/X-17A-5-PDF-SUBSETS/853784-2003-02-28-subset.pdf',
                     'Input/X-17A-5-PDF-SUBSETS/853784-2004-03-01-subset.pdf',
                     'Input/X-17A-5-PDF-SUBSETS/853784-2005-02-28-subset.pdf']

    for pdf_paths in pdf_files:
        
        # baseFile name to name export .csv file e.g. 1224385-2004-03-01.csv
        basefile = pdf_paths.split('/')[-1].split('-subset')[0]
        fileName = basefile + '.csv'
        print('\nPerforming OCR for {}'.format(fileName))
        
        # if file is not found in directory we extract the balance sheet
        # WE LOOK TO AVOID RE-RUNNING OLD TEXTRACT PARSES TO SAVE TIME
        if (output_pdf_folder + fileName not in output_pdf_csvs):
            
            # run Textract OCR job and extract the parsed data 
            png_paths = data_png_folder + basefile + '/'
            df1, df2, forms_data, text_data, error = textractParse(pdf_paths, png_paths, bucket)

            # if no error is reported we save FORMS, TEXT, DataFrame
            if error is None:

                # store accompanying information for JSONs
                forms_dictionary[basefile] = forms_data
                text_dictionary[basefile]  = text_data
                print(text_data)
                
                # writing data frame to .csv file
                df1.to_csv(fileName, index=False)

                # save contents to AWS S3 bucket
                with open(fileName, 'rb') as data:
                    s3.put_object(Bucket=bucket, Key=output_pdf_folder + fileName, Body=data)
                
                # writing data frame to .csv file extracted from PNG
                if df2 is not None:
                    df2.to_csv(fileName, index=False)
                    
                    with open(fileName, 'rb') as data:
                        s3.put_object(Bucket=bucket, Key=output_png_folder + fileName, Body=data)
    
                # remove local file after it has been created
                os.remove(fileName)

                print('-----------------------------------------------------')
                print('Saved {} file to s3 bucket'.format(fileName))
            
            else:
                error_dictionary[basefile] = error
                
        else:
            print('{} has been downloaded'.format(fileName))
    
    # ===========================================================================
    # Save JSON files for updated figures (FORM, TEXT, ERROR)
    # ===========================================================================
    
    # write to a JSON file for FORMS 
    with open('X17A5-FORMS.json', 'w') as file: 
        json.dump(forms_dictionary, file)
        file.close()
    
    # save contents to AWS S3 bucket
    with open('X17A5-FORMS.json', 'rb') as data: 
        s3.upload_fileobj(data, bucket, 'Temp/X17A5-FORMS.json')
    
    # ---------------------------------------------------------------------------
    
    # write to a JSON file for TEXT 
    with open('X17A5-TEXT.json', 'w') as file: 
        json.dump(text_dictionary, file)
        file.close()
    
    # save contents to AWS S3 bucket
    with open('X17A5-TEXT.json', 'rb') as data: 
        s3.upload_fileobj(data, bucket, 'Temp/X17A5-TEXT.json')
    
    # ---------------------------------------------------------------------------
    
    # write to a JSON file for FORMS 
    with open('ERROR-TEXTRACT.json', 'w') as file: 
        json.dump(error_dictionary, file)
        file.close()
    
    # save contents to AWS S3 bucket
    with open('ERROR-TEXTRACT.json', 'rb') as data: 
        s3.upload_fileobj(data, bucket, 'Temp/ERROR-TEXTRACT.json')
    
    # remove local files for JSON
    os.remove('X17A5-FORMS.json')
    os.remove('X17A5-TEXT.json')
    os.remove('ERROR-TEXTRACT.json')
        


Performing OCR for 1146184-2004-03-01.csv
1146184-2004-03-01.csv has been downloaded

Performing OCR for 1146184-2005-03-02.csv
Started job with id: 7e6f794f696b1a1075d08427f87ee3affbf2889b399ce0e1c349b763f84470f0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Performing OCR for 1146184-2006-03-01.csv
1146184-2006-03-01.csv has been downloaded

Performing OCR for 1146184-2007-02-26.csv
1146184-2007-02-26.csv has been downloaded

Performing OCR for 1146184-2008-02-29.csv
1146184-2008-02-29.csv has been downloaded

Performing OCR for 1146184-2009-03-02.csv
1146184-2009-03-02.csv has been downloaded

Performing OCR for 1146184-2010-02-25.csv
1

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Performing OCR for 1591458-2017-01-23.csv
Started job with id: b392b2dc0165cb637534120bf4ab2186ac89a46482a1ce8f6161f454ecdea14c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Performing OCR for 1591458-2018-02-28.csv
1591458-2018-02-28.csv has been downloaded

Performing OCR for 1591458-2019-02-28.csv
1591458-2019-02-28.csv has been downloaded

Performing OCR for 1591458-2020-02-28.csv
159

{'DAIWA SECURITIES AMERICA INC.': 99.59651184082031, '(A Wholly Owned Subsidiary of Daiwa America Corporation)': 99.5159683227539, 'Statement of Financial Condition': 99.93001556396484, 'March 31, 2005': 99.1182632446289, '(In thousands, except per share data)': 99.90766143798828, 'Assets': 99.9847640991211, 'Cash and cash equivalents': 99.9422378540039, '$': 99.88455200195312, '100,697': 99.94585418701172, 'Cash and securities segregated under Federal and other regulations': 99.93540954589844, '79,264': 99.89991760253906, 'Securities purchased under agreements to resell': 99.96813201904297, '13,339,723': 99.77059173583984, 'Securities borrowed': 99.96603393554688, '3,769,187': 99.8650131225586, 'Receivable from brokers, dealers, and clearing organizations': 99.91917419433594, '1,122,806': 99.68282318115234, 'Securities owned, at market value': 99.96402740478516, '925,963': 99.90121459960938, 'Investment in limited partnership': 99.94544982910156, '33,979': 99.8828125, 'Receivable from

-----------------------------------------------------
Saved 26617-2006-05-26.csv file to s3 bucket

Performing OCR for 26617-2007-05-29.csv
Started job with id: 3c2066b0427fdda51255a842788999508ef3f7cbfb237f2d36a59d414e5a61cc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: cdd96781954109e6998cba2287cae49294dd9e42848708c92556bb5352178da8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents      $ 83,284
1   Cash and securities segregated for regulatory ...       139,973
2        

Started job with id: be9a5c87ff524cb2870b138ed21a35455dd01546f88fb1310e47964f500d029a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: a04fd765652880550ff881a0c0ae8b224b23c7da75745a2b43cd04905abe7943
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equ

-----------------------------------------------------
Saved 26617-2008-05-29.csv file to s3 bucket

Performing OCR for 26617-2009-05-29.csv
Started job with id: 6b1689da4c7957974d8883830b3d11220e4bb56c486a582c709be5e7c935d49a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 8d58a9c870158d37a429f969173fc324d0e1f86135ebb85cd24ba41149a41307
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                     

-----------------------------------------------------
Saved 26617-2009-05-29.csv file to s3 bucket

Performing OCR for 26617-2009-06-08.csv
Started job with id: 8990314015cef500e42bff4256d4bb4c2765b4882208195024952c4f0c24fa2d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 582f9db13261f035304424bed33ab9d6e27244181efed3be69730c89a265bfb5
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 183,472
1   Cash and securities segre

Started job with id: 50cf854943329fe67278c290d98958909760d7ef4c2a96f05a992c0afe138f1c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: b9f0691c63384ac2939d497495bac2c95460c3207901ecd5f529496b358534b5
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 123,493
1   Cash and securities segregated for regulatory ..

-----------------------------------------------------
Saved 26617-2010-05-28.csv file to s3 bucket

Performing OCR for 26617-2011-05-31.csv
Started job with id: 9f637e68a14b4ca1c0f8e3dcf73078b1f7e6e5e5b7eed69c5e9e4e10acb8d7f8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 2a238972faf8a025c8e1668b81378191ed79d9e021605238cc845289e8752b2c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                     

-----------------------------------------------------
Saved 26617-2011-05-31.csv file to s3 bucket

Performing OCR for 26617-2012-05-30.csv
Started job with id: 02779a027820c88ee8053b40a1fc048f51ff0ca88267689281cb662ef545351e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: c5202dd7b1dac6db9b3829c9ec20bd46bf85e85d781529ff894d934061d43d56
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 351,311
1             Cash segregated

-----------------------------------------------------
Saved 26617-2012-05-30.csv file to s3 bucket

Performing OCR for 26617-2013-05-30.csv
Started job with id: 70c8cac57d7eab2537503e5a55facfb2290af8ec24f996e22bd1d9b8cb1758f1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [6]

Started job with id: d4fd9c514150fc0b1c13e4ed53dfbf99198baf633a2aeaea55230416a5bd3abb
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                

-----------------------------------------------------
Saved 26617-2013-05-30.csv file to s3 bucket

Performing OCR for 26617-2014-05-29.csv
Started job with id: 636577719c498ca25048111e5b2d39313dae7218c527f40a67e8250764229aac
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [6]

Started job with id: 1391a3474883615a7f273bb67015f5d23c2c5abfea9e8d6c1520473bee3aeeab
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Res

-----------------------------------------------------
Saved 26617-2014-05-29.csv file to s3 bucket

Performing OCR for 26617-2015-06-01.csv
Started job with id: 600f9e0d72d3037ba6aca4f67b1d52f012aba3c98c7f47f3599fdc7224a72455
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: ddb9e3458c1c71f4157aee4337fd2ed04d4bc7e3bc55171126f7c19889e3b724
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRES

Started job with id: 19a3efc1af903da5895e2f3405297b0de539f8b861923cb62101d3b9a8bda4ea
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [0]

Started job with id: 6deee48344af772596c59885dc50554aa4b683bdf4a9f1f5244e3388ccb418fc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 567,798
1             Cash segregated for regulatory purposes       102,572
2     Securities purchased under agreements to resell    23,505,748
3          Financial instruments owned, at fair value 

-----------------------------------------------------
Saved 26617-2016-05-31.csv file to s3 bucket

Performing OCR for 26617-2017-05-30.csv
Started job with id: 713f7390cef8ccd8013c9606ac9a23bfd7ad7efdc9fcc400a8e626538dba9d1b
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [2]

Started job with id: fd3acae83dd7c84477a892842867b1c8dd4919e92dafa48d269407d3cb7b9edf
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page reciev

Started job with id: a0b2f65b55914a40d619b30f2ac86024be9f4c6e9538148354bfcad3ab830fcd
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [2]

Started job with id: 44f97f7316b7cc931183b5dc2ba1c6694bcae8e7508e0444fccb7e0a38c48cc8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 887,735
1             Cash segregated for regulatory purposes       121,783
2     Securities purchased under agreements to resell    25,881,985
3          Financial instrumen

Started job with id: b039fe5cc7af4ccd8342a48a890185a567ba2e1426659d5d22587a7719718c75
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [2]

Started job with id: b71dd41578d5e6197b466a4c15a702f8cb1c4df0305e9dea7f8b2ae8873306e6
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash   $ 1,085,088
1             Cash segregated for regulatory purposes       125,169
2     Securities purchased under agreements to resell    30,230,922
3          Financial instruments owned, at fair value    15,938,122
4            

-----------------------------------------------------
Saved 26617-2020-06-01.csv file to s3 bucket

Performing OCR for 29648-2003-03-03.csv
Started job with id: 79c3d92a94f36b69a6e9b05b60ae79b2af36061a2ab73e5b5e22ce01015dc35a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [5]

Started job with id: 7f730d395c0c70fb00b39613df7cb62db6f11bc426d7fcd747b61a8c676374d6
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF data

-----------------------------------------------------
Saved 29648-2003-03-03.csv file to s3 bucket

Performing OCR for 29648-2004-03-01.csv
Started job with id: fbb8415555c6fd42d2c5d64c5b0a8b939c51fd8bfed0c7f35fac7c8fe137bfc8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: f72032fda5ea9962f175d0a7e7d87730ba525cd90e41327d567b6fd4e9a7db1f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
J

-----------------------------------------------------
Saved 29648-2005-03-01.csv file to s3 bucket

Performing OCR for 29648-2006-03-01.csv
Started job with id: 4bf5674b061b12ce7ff02d558f005b5457833986426cc0671045843b9dfbb2e4
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 4dbc134ea7486f35d0ce34c95a721f1a1bdd35b14ef52c1c3d5cb7217ff734ba
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                                              Assets          
1                           Cash and cash equivalents     $ 590
2   Cash and securities 

{'Pershing LLC': 99.92916870117188, '(An indirect wholly owned subsidiary of The Bank of New York Company, Inc.)': 99.83995819091797, 'Statement of Financial Condition': 99.80061340332031, 'December 31, 2006': 99.78964233398438, '(Dollars in millions)': 99.4906997680664, 'Assets': 99.76245880126953, 'Cash and cash equivalents': 99.92443084716797, '$': 99.78955841064453, '730': 99.9144058227539, 'Cash and securities segregated for regulatory purposes': 99.92858123779297, '1,272': 99.79054260253906, 'Collateralized financing agreements:': 99.75645446777344, 'Securities purchased under agreements to resell': 99.9334487915039, '279': 99.89633178710938, 'Securities borrowed': 99.85332489013672, '1,086': 99.9043960571289, 'Receivables:': 99.0147933959961, 'Customers': 99.73745727539062, '5,704': 99.65400695800781, 'Brokers, dealers and clearing organizations': 99.79139709472656, '1,921': 99.76179504394531, 'Due from affiliates': 99.79991149902344, '53': 99.87824249267578, 'Intangible assets'

-----------------------------------------------------
Saved 29648-2008-02-29.csv file to s3 bucket

Performing OCR for 29648-2008-04-07.csv
Started job with id: 1c3afff6b2f9ba63c0245f9c88cf5654d2a1df6dfebc9cf723d972f6316d11c8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 859b50bbb376894a4b60b8df112c9f3f90e74cb2ae35873858b64336ac7eadce
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1


Started job with id: f408233e32dc0fc9811809cb38affda85262d53456180d7b4982f940e731220c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: a04f3c10050a1d4d00b2b7525c7fca355544ddb54b232d9314ba06bba718e31a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                         

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: ccfbca0868f59e65a7b21841f4db96cb62d1ae939641feea7f020c427a1320f7
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents   $ 1,056
1   Cash and securities segregated for regulatory ...     3,909
2          Financial instrument

Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 350b7a76e99b62d3f1c567af0baa141f36978df9070eac24227af003d4e8defa
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents     $ 978
1   Cash and securities segregated for regulatory ...     4,772
2          Financial instruments owned, at fair value       192
3                Collateralized financing agreements:          
4     Securities purchased under agreements to resell       762
5                                 Securities borrowed     2,637
6                                        Receivables:          
7                                           Customers     5,469
8         Brokers, dealers and clearin

-----------------------------------------------------
Saved 29648-2011-03-01.csv file to s3 bucket

Performing OCR for 29648-2011-03-30.csv
Started job with id: 6ee7c57b56a02741eb89562f34e9d7ba50603f74afb6a06639685b007ed90435
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: fa8ff002f60f1fb06f98360aae5f6078795c3d4338fb71b2a4221c48d2c649f1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
J

Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 39dbdd6fdc25056f8985b2cd8b20da45217cacb96407701cbda1d99ff0093cd1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents     $ 244
1   Cash and securities segregated for regulatory ...     4,454
2          Financial instruments owned, at fair value       216
3                Collateralized financing agreements:          
4                                 Securities borrowed     3,459
5     Securities purchased under agreements to resell       425
6                                        Receivables:          
7  

-----------------------------------------------------
Saved 29648-2013-03-01.csv file to s3 bucket

Performing OCR for 29648-2014-02-28.csv
Started job with id: 0ef645930013f8e547a214d8b49195a6da640b97207edd69cb39ef87fe6585ca
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 9769efbc5753606181d5daed69703bbd2a4a2c163c103ea8697621775226caf9
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                        

-----------------------------------------------------
Saved 29648-2015-03-02.csv file to s3 bucket

Performing OCR for 29648-2016-02-29.csv
Started job with id: 9b701e6866240d985e60bec33bc71e5b1397c1ed430dc78336950b00c6e76e3e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: 9d47178c1d5d16add331d9e4b143d45411f7e49403bfb3a4f78f23cf1e1c3389
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                     

Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: 4f53de41f396a8e65cc167883c8d609da8b324cf14ae27390ec2b3ef46d9d05a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents     $ 459
1   Cash and qualified securities segregated for r...     5,437
2                Collateralized financing agreements:          
3                                 Securities borrowed     7,149
4     Securities purchased under agreements to resell     1,780
5                            

-----------------------------------------------------
Saved 29648-2018-03-01.csv file to s3 bucket

Performing OCR for 29648-2019-03-01.csv
Started job with id: 9fa1f1d67cf9292a377c77508a2b81fb7e8628b298da87eb37089700054913a8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: bef8090edde152e6ef091ca44217e65bdc4d310735cd62f42bb27770d6b55fab
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents     $ 703
1   Cash and qualified securities segregated for r...     4,2

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents     $ 302
1   Cash and qualified securities segregated for r...     9,124
2                Collateralized financing agreements:          
3                                 Securities borrowed     8,889
4     Securities purchased under agreements to resell     1,171
5                                        Receivables:          
6                                           Customers    12,947
7           Broker-dealers and clearing organizations     2,794
8                                          Affiliates       216
9                                   Intangible assets         5
10         Financial instruments owned, at fair value       113
11                                     

Started job with id: 8e4f5e0d861d123eb6f82b3bd1d39875032ffb210b4795d22cdcbe8c692d15a4
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: FAILED
Resultset page recieved: 1

Performing OCR for 356628-1998-02-23.csv
Started job with id: 25ae66479aa0cbbe14fbe0b95a6cb003e4da3d09557ca9da5f918de4af849787
Job status: IN_PROGRESS
Job status: FAILED
Resultset page recieved: 1

Performing OCR for 356628-2002-02-25.csv
356628-2002-02-25.csv has been downloaded

Performing OCR for 356628-2003-02-24.csv
356628-2003-02-24.csv has been downloaded

Performing OCR for 356628-2004-03-01.csv
356628-2004-03-01.csv has been downloaded

Performing OCR for 356628-2005-03-01.csv
356628-2005-03-01.csv has been downloaded

Performing OCR for 356628-2006-03-02.csv
356628-2006-03-02.csv has been downloaded

Performing OCR for 356628-2007-03-01.csv
356628-2007-03-01.csv has been downloaded

Performing OCR for 356628-2008-02-29.csv
3566

Started job with id: 84d297f0c38f84c73402e3b63f843ac17af6105d4987fe64cdcb29463f6746b5
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Jo

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: b90a61be2968a1219a49d918f8a21ff306950b1f10bbffcb3f9395427d9b3ba2
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0            1
0                                                Cash     $ 11,025
1   Securities deposite

Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: 427d4349bcadb66215b561e97e7390c1f7d1af4779207180830a4b26c63744f1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0            1
0                                                Cash      $ 8,326
1   Securities deposited in special reserve bank a...       19,901
2   Receivable from brokers, dealers, and clearing...      548,580
3                           Receivable from customers        4,854
4                          Receivable from affiliates        2,974
5   Securities and other financial instruments own...      461,907
6   Securities owned, pledged to creditors or clea...    2,106,362
7     Securities purchased un

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCC

-----------------------------------------------------
Saved 803012-2005-01-03.csv file to s3 bucket

Performing OCR for 803012-2006-01-03.csv
Started job with id: 79737a2a947e8d9df84ec6d2a3fbf3e6ee4d278946d57a5ae95f6b713c768c03
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: ff90230b3afc415b70bdfa2551138247adeae75ac52ca042be3c74f878818f78
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0                       1
0                                                      (dollars in thousands)
1                                 

-----------------------------------------------------
Saved 803012-2006-01-03.csv file to s3 bucket

Performing OCR for 803012-2007-01-29.csv
Started job with id: 0ec30711223f8f43de7009bbbaa9a87585d37fdaba6249e435848ac66b70ab79
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: 568695ab304f9accb285962bf9d4adc192cd9e60a35f8d94d9e51ace1ac71e33
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Jo

-----------------------------------------------------
Saved 803012-2007-01-29.csv file to s3 bucket

Performing OCR for 803012-2007-12-28.csv
Started job with id: 32d4d21c325eabdb579c9e43d1f0dd9065192d8bf0e0c0a8d07e17e0218f50e2
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job 

-----------------------------------------------------
Saved 803012-2007-12-28.csv file to s3 bucket

Performing OCR for 803012-2008-12-30.csv
Started job with id: d78f47b3dd0526fd04ef596d0c0ab87511f5662cda908eff2ff861402ff91c53
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: 9d52dfa05d745971ebe4ef7e3e4813b8c40904dfeba749b69701514837ba5e3e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0            1
0  

{'TD Securities (USA) LLC': 99.87708282470703, 'Consolidated Statement of Financial Condition': 99.95140075683594, 'October 31, 2009': 99.83422088623047, '(In Thousands)': 99.68466186523438, 'Assets': 99.94576263427734, 'Cash': 99.98241424560547, '$': 98.27133178710938, '73,479': 99.94873046875, 'Receivable from brokers, dealers and clearing organizations': 99.93914794921875, '68,765': 99.92769622802734, 'Receivable from affiliates': 99.9591064453125, '24,284': 99.92536163330078, 'Securities and other financial instruments owned, at fair value': 99.97520446777344, '4,990,106': 99.76342010498047, 'Securities purchased under agreements to resell': 99.97535705566406, '3,986,820': 99.87214660644531, 'Securities borrowed': 99.89753723144531, '2,198,080': 99.83653259277344, 'Fixed assets (net of accumulated depreciation of $24,354)': 99.92532348632812, '13,396': 99.91541290283203, 'Deferred tax asset': 99.98057556152344, '28,221': 99.78042602539062, 'Interest receivable': 99.96958923339844, 

-----------------------------------------------------
Saved 803012-2010-12-27.csv file to s3 bucket

Performing OCR for 803012-2011-12-21.csv
Started job with id: 8d631ff87dc64b32614a5cfbe4551f7ebe380d8206e22510d86dbed713a4d16e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [4]

Started job with id: ac41a2d039e99891a936d9df50a54987bf72be8a87bbd7e9893f319e2c91b8e3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 203,801
1   Receivable from brokers, dealers and clearing ...       238,949
2            

-----------------------------------------------------
Saved 803012-2012-12-26.csv file to s3 bucket

Performing OCR for 803012-2013-12-26.csv
Started job with id: 6abed56fef4c30b6103fc4de8e8cfa3819ceafba0e12bc62dc6c34acd24d511b
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: 293f7bd057728a80e9f96ef0c4b1faa590e07738bb28f8204bd153ece610eebc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 118,831
1   Securities segregated under federal and other ...        29,981
2            


Page number(s) for extraction in PNG are [4]

Started job with id: ad6b90503ce0a58f0bc3fcc8ec600bada87b174fbde24073903bbed55d0fad99
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                              Assets              
1                                                Cash     $ 145,533
2   Securities segregated under federal and other ...        39,999
3                Collateralized financing agreements:              
4                                 Securities borrowed     1,166,148
5     Securities purchased under agreements to resell    19,566,111
6   Receivable from brokers, dealers and clearing ...       660,953
7                          Receivable from affiliates        98,394
8   Financial instruments owned, at fair value (in...    13,749,584
9                    Commodities owned, at fair value       325,91

-----------------------------------------------------
Saved 803012-2015-12-22.csv file to s3 bucket

Performing OCR for 803012-2016-12-22.csv
Started job with id: 908638caf17ebef0efc74b3dd151917745f182851e6e94c6f7e069622c40303a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: 0a9f7b6bdaf9c84eedb99b1cb9282128d6e4b98e36c7a90a6855189c14d209cc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [2]

Started job with id: 6cf8cc517ff63d9d3df56aaeb9e87d3d10620696bc3e845eb16967fdc38e49f1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 160,853
1     Securities segregated under federal regulations        51,471
2                Collateralized financing agreements:              
3                         

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 10bb7bb807b0d92a53bc1c5fdc83994003bf287a7001b9eb26923260c46415a0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 259,102
1     Securities segregated under federal regulations        64,188
2                Collateralized financing agreements:              
3                                 Securities borrowed     1,737,809
4     Securities purchased under agreements to resell    20,570,010
5   Receivable from customers, brokers, dealers an...       320,819
6             

Started job with id: 1c8f1e62fbb3987838ade06fe32a794b4f2fd4e692799b96b6d380a28dd17976
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Performing OCR for 867626-2003-04-25.csv
Started job with id: cabb134254dc032db987af9edaa939ab6ac3c0a69949798c521913d1b184b81c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 

-----------------------------------------------------
Saved 867626-2004-04-28.csv file to s3 bucket

Performing OCR for 867626-2005-04-29.csv
Started job with id: 87808360dd1cf09c0f23f4ce4892f86f0f3a4003f4e7178e70313253c0ea9b0c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [4]

Started job with id: e8a7b1e2737a277f90dce7f75cc84b4f9e7023a6e1fd78664cd636252bea27dc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Jo

Started job with id: 8613c02d26a5a584084336d7bb05fccca6598f5f2463c627d8e184a0bb67d652
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: d832748516ed22f73b3a298e888244a9bff1047746226cb1b6254604888fc6b0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0                1
0                           Cash and cash equivalents     $ 12,043,817
1   Cash and securities deposited with clearing or...        1,500,000
2   segregated under federal and other regulations...                 


Started job with id: 48a1bc67d3e520bc739b5d49be5bf2b718c433378fbf39dbb3cdb0c409a95dab
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 69886903f38e974cf445c8c6feb8e5363b9baedf3ab77f90f418ca0710b3ec04
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0                1
0                                              ASSETS                 
1                           Cash and cash equivalents     $ 52,678,

-----------------------------------------------------
Saved 867626-2007-04-26.csv file to s3 bucket

Performing OCR for 867626-2008-02-29.csv
Started job with id: 53506d1a93fb2a1feb67704f447a6c1bc9b232ecc8aade270ec7d003ebf3463a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: f034e3e6030e75f1a4d0c0b4df5e0018982da554b6c3d5f9109d510ba56438ff
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                           

-----------------------------------------------------
Saved 867626-2008-02-29.csv file to s3 bucket

Performing OCR for 867626-2009-03-02.csv
Started job with id: 4447c6ea260c5f0f0066f1bd8ba332ff50fcdade1841ab42f571f4c535bc3e17
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: aeede7fafdfa8fa8b1ebb25ddde15ab34c9ec97242f6a50323354690690b3a2a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                   

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: e782d2b003fb8e88dcc9aa6c54aa90577ea68db89875796cb78a4a8f6d1a3344
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0                 1
0                              

-----------------------------------------------------
Saved 867626-2011-03-01.csv file to s3 bucket

Performing OCR for 867626-2012-02-29.csv
Started job with id: b725e3fd471863402291bd1b2d096502ae3623a367fc02e2135834e1ff6afd45
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: dc95ce68a0f65eedc30e54852cca278ec3139d3de169f3891d43bd6981ff62c9
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                   

Started job with id: da545c37b5474736d3f06af509ec8e3a39960605ca335ea47bd3a10bd9c226b0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Performing OCR for 867626-2014-03-04.csv
Started job with id: 5a62fa381e3a027343b382f7efe45b17143dfa14262987958233919ecb9badac
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset pag

-----------------------------------------------------
Saved 867626-2014-03-04.csv file to s3 bucket

Performing OCR for 867626-2015-02-27.csv
Started job with id: 3037e4f066f015591b25ad13cb3e3dfb7542d76375d8536a502cd161c9b17502
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Performing OCR for 867626-2016-02-26.csv
Started job with id: 56c2f6bd86719b4d5d7bf38406b2eb9228d765fe70c9c2193101ca2fbeaee703
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
J

-----------------------------------------------------
Saved 867626-2016-02-26.csv file to s3 bucket

Performing OCR for 867626-2017-02-28.csv
Started job with id: a3f4343a93fe1f84d0f940cf6f86cab3fa0a067edb2463ccdcff9d8f146112d9
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [2]

Started job with id: f3afbf9b91b8ef0f08421e5f45031746a8b1432e3587be9f9a7cd624495cfd9c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents      $ 86,143
1   Cash segregated unde

-----------------------------------------------------
Saved 867626-2017-02-28.csv file to s3 bucket

Performing OCR for 867626-2018-02-27.csv
Started job with id: eed367e31e3c9dbe650159a32ef85704d7132fc7758eda92619bde6e612503a2
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [2]

Started job with id: 500ebbc3803ecb6546c7c75084425034e0ffb6ac0de6f854e4066d29504b148c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PR

Started job with id: 3affaaa4e961b7ad577c496c1413086553cba763158b5019c645896f03b1634f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8
Resultset page recieved: 9

Page number(s) for extraction in PNG are [2]

Started job with id: 2ccdba449619f26c10e9af9774bc08dc9f296547fbb29b51ef73d4b9b2da8f3a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                             ASSETS:              
1                           Cash and cash equivalents      $ 67,384
2   Cash segregated under federal and other regula...        17,000
3                          

Started job with id: 6a5d5d73f1548337a51922c0bd89d665a9f11fa26408eb37f2cdbbb3a3a5510c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8
Resultset page recieved: 9

Page number(s) for extraction in PNG are [2]

Started job with id: 22c058b86d0627cd99c5aab1bfc9a35cbf60abd23c287aa1d95885c65e68ed86
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0      

-----------------------------------------------------
Saved 867626-2020-02-27.csv file to s3 bucket

Performing OCR for 867626-2021-02-26.csv
Started job with id: 45720fbad1665a02cf9d0371345d5fda821c2d79b129f125f39d3bca16b03e61
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8
Resultset page recieved: 9

Page number(s) for extraction in PNG are [2]

Started job with id: 22a374997addad53cd18349a745dcc4d6a541f3aed04bcacc00320fdb961af52
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN

Started job with id: 396c746451e020e16e1994a99607d86ccf3ff5e95caad6c12f39297e6dea4256
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Performing OCR for 87634-2005-02-28.csv
87634-2005-02-28.csv has been downloaded

Performing OCR for 87634-2006-02-28.csv
87634-2006-02-28.csv has been downloaded

Performing OCR for 87634-2006-09-21.csv
87634-2006-09-21.csv has been downloaded

Performing OCR for 87634-2007-03-01.csv
87634-2007-03-01.csv has been downloaded

Performing OCR for 87634-2008-03-03.csv
87634-2008-03-03.csv has been downloaded

Performing OCR for 87634-2009-02-27.csv
87634-2009-02-27.csv has been downlo

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Performing OCR for 91154-2007-03-01.csv
91154-2007-03-01.csv has been downloaded

Performing OCR for 91154-2008-02-29.csv
91154-2008-02-29.csv has been downloaded

Performing OCR for 91154-2009-03-02.csv
91154-2009-03-02.csv has been downloaded

Performing OCR for 91154-2010-03-02.csv
91154-2010-03-02.csv has been downloaded

Performing OCR for 91154-2011-03-01.csv
91154-2011-03-01.csv has been downloaded

Performing OCR for 91154-2012-02-29.csv
91154-2012-02-29.csv has been downloaded

Performing OCR for 91154-2013-03-01.csv
91154-2013-03-01.csv has been downloaded

Performing OCR for 91154-2014-03-04.csv
91154-2014-03-04.csv has be

In [None]:
# # single reading for testing purposes and debugging Textract results e.g. 853784-2002-03-01
# textractParse('Input/X-17A-5-PDF-SUBSETS/230611-2011-03-01-subset.pdf', 
#               'Input/X-17A-5-PNG-SUBSETS/230611-2011-03-01/', 'ran-s3-systemic-risk')