In [1]:
# Run on first instance to install required libraries
%pip install smart_open minecart textract-trp

Collecting smart_open
  Downloading smart_open-5.1.0-py3-none-any.whl (57 kB)
[K     |████████████████████████████████| 57 kB 3.2 MB/s eta 0:00:011
[?25hCollecting minecart
  Downloading minecart-0.3.0-py3-none-any.whl (23 kB)
Collecting textract-trp
  Downloading textract_trp-0.1.3-py3-none-any.whl (5.8 kB)
Collecting pdfminer3k
  Downloading pdfminer3k-1.3.4-py3-none-any.whl (100 kB)
[K     |████████████████████████████████| 100 kB 6.0 MB/s eta 0:00:01
Installing collected packages: pdfminer3k, textract-trp, smart-open, minecart
Successfully installed minecart-0.3.0 pdfminer3k-1.3.4 smart-open-5.1.0 textract-trp-0.1.3
Note: you may need to restart the kernel to use updated packages.


In [2]:
import time 
import re
import os
import trp
import boto3
import minecart
import json
import logging 

import numpy as np
import pandas as pd

from smart_open import open
from sagemaker.session import Session

# AWS Asynchronous Textract Script (requesting Job)
**Content modified from Amazon AWS Textract repository (refer to [URL](https://github.com/aws-samples/amazon-textract-code-samples/blob/master/python/12-pdf-text.py) below)** 

In [3]:
def startJob(s3BucketName:str, objectName:str) -> str:
    """
    Starts a Textract job on AWS server 
    """
    # initialize return and client object
    response = None                         
    client = boto3.client('textract')
    
    # issue response to AWS to start Textract job for table analysis 
    response = client.start_document_analysis(
        DocumentLocation={
            'S3Object': {
                'Bucket': s3BucketName,     # location of data to be read from s3 bucket 
                'Name': objectName}},       # file name to be read from Textract  
        FeatureTypes=['FORMS', 'TABLES']    # selecting FORMS (key-values) and TABLES from the OCR
    )
    
    # return response job ID for service
    return response["JobId"]

In [4]:
def isJobComplete(jobId:str) -> str:
    """
    Tracks the completion status of the Textract job when queued
    """
    # allow for interal sleep timer (efficiency)
    time.sleep(1)                               
    
    client = boto3.client('textract')
    response = client.get_document_analysis(JobId=jobId)
    
    # job-status of the response object 
    status = response["JobStatus"]                        
    print("Job status: {}".format(status))
    
    # if job still running check current status every 5 seconds
    while(status == "IN_PROGRESS"):
        
        # time lag before reporting status
        time.sleep(5)                                         
        response = client.get_document_analysis(JobId=jobId)
        
        # job-status of the response object
        status = response["JobStatus"]                        
        print("Job status: {}".format(status))
    
    return status

In [5]:
def getJobResults(jobId:str) -> list:
    """
    Returns the contents of the Textract job, after job status is completed
    """
    # initialize list object to track pages read
    pages = []                    

    client = boto3.client('textract')
    response = client.get_document_analysis(JobId=jobId)
    
    # add first page response to list (length of pages will be arbitrary) 
    pages.append(response)      
    print("Resultset page recieved: {}".format(len(pages)))
    
    # if NextToken present we have a pointer to page (e.g. Response -> Page) 
    nextToken = None
    if('NextToken' in response):
        nextToken = response['NextToken']
    
    # iterate through the pages and append to response figure (assuming nextToken not None)
    while(nextToken):
        response = client.get_document_analysis(JobId=jobId, NextToken=nextToken)
        pages.append(response)
        print("Resultset page recieved: {}".format(len(pages)))
        
        # move along linked-list for presence of NextToken response
        nextToken = None
        if('NextToken' in response):
            nextToken = response['NextToken']
    
    # return amalgamation of all page responses 
    return pages

In [6]:
def runJob(bucket:str, key:str):
    """
    Function designed to call an AWS Textract job (implements helper function above)
    """
    jobId = startJob(bucket, key)   
    print("Started job with id: {}".format(jobId))

    # if job is complete on AWS return page responses 
    if(isJobComplete(jobId)):
        response = getJobResults(jobId)
        
    return response

# AWS Extraction Scripts (Key-Value Pairs)
**The content was modified from AWS to extract key-value pairs in form documents from Block objects that are stored in a map. (refer to [URL](https://docs.aws.amazon.com/textract/latest/dg/examples-extract-kvp.html))**

In [7]:
def find_value_block(key_block, value_map):
    """
    Retrieving value block from AWS textract job, this contains the value text 
    """
    # iterate through the key blocks in the FORM relationships (should have a VALUE and CHILD type, n=2)
    for relationship in key_block['Relationships']:
        
        # if our key block object type is a VALUE we examine the relationship ID
        # NOTE WE SHOULD HAVE ONLY ONE ID FOR THE VALUE RELATIONSHIP TYPE
        if relationship['Type'] == 'VALUE':
            
            # singular ID item stored in list object (return value block object)
            for value_id in relationship['Ids']:
                value_block = value_map[value_id]
            
    # return all corresponding value series
    return value_block

In [8]:
def get_kv_relationship(key_map, value_map, block_map):
    """
    Retrieving the Key-Value relationship from FORM OCR Textract 
    """
    # initialize key-map dictionary for lineitems and corresponding accounting values
    key_value_map = {}
    
    # unpack the key_map to retrieve the block id and key names
    for block_id, key_block in key_map.items():

        # retrieve value block provided the key_block from each block id
        value_block = find_value_block(key_block, value_map)

        # get text value from key and value blocks
        key = get_text(key_block, block_map)
        val = get_text(value_block, block_map)
        
        # map the key and value pairs (e.g. {'Total Assets':'$ 189,232'})
        key_value_map[key] = val
        
    return key_value_map

In [9]:
def get_text(result, blocks_map):
    """
    Retrieving text values from given block object
    """
    # initialize container for text
    text = ''
    
    # if relationships header exists we can extract CHILD header
    if 'Relationships' in result:
        
        # relationship maps to a list (iterate through to reveal a dictionary)
        # e.g. 'Relationships' : [{'Type' : 'CHILD', 'Ids': ['e2b3b12f-ebb7-4f6e-914f-97b315672530']}]
        for relationship in result['Relationships']:
            
            # if relationship type is CHILD we explore job-id (indicates good fit)
            if relationship['Type'] == 'CHILD':
                
                # iterate through Ids list
                for child_id in relationship['Ids']:
                    
                    # select corresponding CHILD_ID from block map, this is sub-dictionary
                    word = blocks_map[child_id]
                    
                    # if block type is a word then we append with a space
                    if word['BlockType'] == 'WORD':
                        text += word['Text'] + ' '
                        
                    # if block type is a selection element (e.g. an option button/mark)
                    # note we treat these cases with an X to denote an optional field 
                    if word['BlockType'] == 'SELECTION_ELEMENT':
                        if word['SelectionStatus'] == 'SELECTED':
                            text += 'X '    
    
    # return string corresponding with word 
    return text

# OCR Wrapper Functions
**The scripts perform an OCR job from AWS Textract, and returning well formated data**

In [10]:
def trp2df(table:trp.Table) -> pd.DataFrame:
    """
    Function designed to convert a trp table into a dataframe Complexity -> O(n^2) approx.
    ------------------------------------------------------------------------------------------
    Input
        :param table: (type trp.Table)
            A trp table object parsed from a pdf using AWS Textract  
    
    Output
        :return: type pandas.DataFrame
            A DataFrame object that is constructed by deconstructed a Textract trp table
    """
    N = len(table.rows)               # number of rows in table
    M = len(table.rows[0].cells)      # number of columns in table
    arr = [0]*N                       # initialize matrix container
    
    # iterate through each row within the provided table
    for row in np.arange(N):
        
        # strip the text from the cell references to construct (N X M) matrix
        arr[row] = [table.rows[row].cells[col].text.strip() for col in np.arange(M)]    # move column-wise to get text
        
    return pd.DataFrame(arr)

In [11]:
def readTable(response:list) -> tuple:
    """
    Function to transform AWS Textract object to a dataframe, by searching for tables
     ------------------------------------------------------------------------------------------
    Input
        :param response: (type list)
            An AWS Textract response object corresponding to pages of a given document page 
    
    Output
        :return: type tuple
            A (3x1) tuple is returned, storing the concated dataframe at the first index, and the accompanying 
            trp page objects for where the balance sheet was determined to reside at the second index
    """
    
    catDF = []          # in the event multiple tables detected on one page (concat them)
    page_series = []    # keep track of page objects where balance sheet was flagged
    page_nums = []
    page_count = 0
    
    # format the Textract response type 
    doc = trp.Document(response)
    
    # iterate through document pages
    for page in doc.pages:
        
        # itterate through page tables
        for table in page.tables: 
            
            # convert trp-table into dataframe object
            df = trp2df(table)
            
            # remove columns that are completely empty
            empty_cols = [col for col in df.columns if (df[col] == '').all()]
            df = df.drop(empty_cols, axis=1)
  
            # number of columns in dataframe
            n = df.columns.size
            
            # reset the column names (avoid the column names)
            df.columns = np.arange(n)
            
            ##############################################################
            #                           NOTES
            #          a good dataframe should have 2-3 columns
            #      anything more or less is a reading error we ignore
            ##############################################################
            
            # if the dataframe has more than 3 columns then we most likley have an issue in parsing
            if n > 3:
                return None
            
            elif n > 1:
                
                ##############################
                # Balance Sheet Assummptions
                ##############################
                
                # this is the column with all line items (e.g. Cash, Total Assets, Total Liabilites)
                lineIndex = df.columns[0]

                # check for the word "cash" in a string at the begining, ignoring case sensitivity (asset check)
                assetCheck = df[lineIndex].str.contains('^Cash', regex=True, flags=re.IGNORECASE)

                # check for the word "Liabilities" in a string at the end, ignoring case sensitivity (liability check)
                debtCheck1 = df[lineIndex].str.contains('Liabilities$|^Liabilities', regex=True, flags=re.IGNORECASE)
                debtCheck2 = df[lineIndex].str.contains('Liability$|^Liability', regex=True, flags=re.IGNORECASE)
                
                # check for the presence of $ sign, we assume the balance sheet items should have at least one $ sign
                # this check is used to avoid reading the table of contents, which was flagged in prior reads
                dollarCheck = df[df.columns[1]].str.contains('\$[^\]]+', regex=True, flags=re.IGNORECASE)
                
                ##############################
                # Balance Sheet Determination
                ##############################
                
                # check if the key words have been found 
                check1 = df[assetCheck | debtCheck1 | debtCheck2].empty      # check for line item terms
                check2 = df[dollarCheck == True].empty                       # check for presence of '$' sign  
                check3 = df[debtCheck1 == True].empty                        # debt check for Liabilities
                check4 = df[debtCheck2 == True].empty                        # debt check for Liability 
                
                # if either asset term or liability term is found, with a $ sign we append the dataframe
                if not check1 and not check2:
                    
                    # we append pages since asset and liablility tables are often seperate
                    # there is no loss of generality if asset and liability terms are in one table
                    catDF.append(df)                
                    
                    # we want to keep track of pages that have been deemed as balance sheet
                    if page not in page_series:
                        page_series.append(page)   # only append if page isn't already recorded
                        page_nums.append(page_count)
                        
                    if not check3 or not check4:
                        # if liability table was found on the first iteration we simply concat data frames 
                        return (pd.concat(catDF), page_series, page_nums)
                    
        page_count += 1
        

In [12]:
def readPNG(pages:list, png_path:str, bucket='ran-s3-systemic-risk') -> tuple:
    """
    Function to transform AWS Textract object to a dataframe, by searching for tables
     ------------------------------------------------------------------------------------------
    Input
        :param response: (type list)
            
    
    Output
        :return: type tuple
          
    """
    subfolder = png_path.split('/')[-2]      # subfolder where PNG files are stored
    
    # construct PNG directories with relevant pages
    textract_paths = [png_path + subfolder + '-p{}.png'.format(idx) for idx in pages]
    
    catDF = []          # in the event multiple pages we concat them
    
    # path iterates through each png image matching the page numbers found in PDFs
    for path in textract_paths:
        
        try:
            # temporary data frame object for balance sheet information
            res = runJob(bucket, path)
            
            # if Textract job did not fail we continue extraction
            if res[0]['JobStatus'] != 'FAILED':

                # format the Textract response type 
                doc = trp.Document(res)

                # iterate through document pages
                for page in doc.pages:
                    
                    # itterate through page tables
                    for table in page.tables: 

                        # convert trp-table into dataframe object
                        df = trp2df(table)
                        
                        # remove columns that are completely empty
                        empty_cols = [col for col in df.columns if (df[col] == '').all()]
                        df = df.drop(empty_cols, axis=1)

                        # number of columns in dataframe
                        n = df.columns.size

                        # reset the column names (avoid the column names)
                        df.columns = np.arange(n)
                        
                        ##############################################################
                        #                           NOTES
                        #          a good dataframe should have 2-3 columns
                        #      anything more or less is a reading error we ignore
                        ##############################################################

                        # if the dataframe has more than 3 columns then we most likley have an issue in parsing
                        if n > 3:
                            return None

                        elif n > 1:

                            ##############################
                            # Balance Sheet Assummptions
                            ##############################

                            # this is the column with all line items (e.g. Cash, Total Assets, Total Liabilites)
                            lineIndex = df.columns[0]

                            # check for the word "cash" in a string at the begining, ignoring case sensitivity 
                            assetCheck = df[lineIndex].str.contains('^Cash', regex=True, flags=re.IGNORECASE)

                            # check for the word "Liabilities" in a string at the end, ignoring case sensitivity 
                            debtCheck1 = df[lineIndex].str.contains('Liabilities$|^Liabilities', 
                                                                    regex=True, flags=re.IGNORECASE)
                            debtCheck2 = df[lineIndex].str.contains('Liability$|^Liability', 
                                                                    regex=True, flags=re.IGNORECASE)

                            # check for the presence of $ sign, we assume the balance sheet items should have 
                            # this check is used to avoid reading the table of contents, which was flagged in prior reads
                            dollarCheck = df[df.columns[1]].str.contains('\$[^\]]+', regex=True, flags=re.IGNORECASE)

                            ##############################
                            # Balance Sheet Determination
                            ##############################

                            # check if the key words have been found 
                            check1 = df[assetCheck | debtCheck1 | debtCheck2].empty      # check for line item terms
                            check2 = df[dollarCheck == True].empty                       # check for presence of '$' sign  
                            check3 = df[debtCheck1 == True].empty                        # debt check for Liabilities
                            check4 = df[debtCheck2 == True].empty                        # debt check for Liability 

                            # if either asset term or liability term is found, with a $ sign we append the dataframe
                            if not check1 and not check2:

                                # we append pages since asset and liablility tables are often seperate
                                # there is no loss of generality if asset and liability terms are in one table
                                catDF.append(df)                

                                if not check3 or not check4:
                                    # if liability table was found on the first iteration we simply concat data frames 
                                    return pd.concat(catDF)
        
        # broad exeption to catch Textract parsing errors
        except:
            pass
    
    # default return None
    return None

In [13]:
def readForm(doc_pages:list) -> dict:
    """
    Function to transform AWS Textract object to a dictionary, by searching for key value pairs
    ------------------------------------------------------------------------------------------
    Input
        :param doc_pages: (type list)
            TRP page(s) for a AWS Textract response object corresponding to pages of a given document page 
    
    Output
        :return: type dict
            A python dictionary that maps KEYS (line items) with VALUES (corresponding records) for broker
            dealers balance sheet (e.g. {'Cash and cash equivalents : $ 12,513})
    """
    
    # initializing dictionary maps for KEY and VALUE pairs
    key_map = {}
    value_map = {}
    block_map = {}

    # iterate through document pages
    for page in doc_pages:

        # itterate through page tables
        for block in page.blocks: 

            # store the block id in map to retrive information later
            block_id = block['Id']
            block_map[block_id] = block

            # if Key-value set has been seen we deconstruct each KEY and VALUE map
            if block['BlockType'] == "KEY_VALUE_SET":

                # if KEY is labeled as entity type then we found Key, else we found VALUE
                if 'KEY' in block['EntityTypes']:
                    key_map[block_id] = block
                else:
                    value_map[block_id] = block
    
    # convert block objects to text dictionary map
    return get_kv_relationship(key_map, value_map, block_map)

In [14]:
def readText(doc_pages:list) -> dict:
    """
    Function to transform AWS Textract object to a dictionary of text values and confidence 
    ------------------------------------------------------------------------------------------
    Input
        :param doc_pages: (type list)
            TRP page(s) for a AWS Textract response object corresponding to pages of a given document page
    
    Output
        :return: type dict
            A python dictionary that maps TEXT (line items) with corresponding confidence figures as reported
            by AWS Textract object (e.g. {'Cash and cash equivalents : 99.97891})
    """
    # initializing dictionary maps for text
    text_map = {}
    
    # iterate through document pages
    for page in doc_pages:
        
        # itterate through page tables
        for block in page.blocks: 
            
            # if our block type is a line, we map the line text and confidence
            if block['BlockType'] == "LINE":
                text_map[block['Text']] = block['Confidence']
    
    # return completed text to confidence map
    return text_map

## Extract Balance Sheet information

In [15]:
def textractParse(pdf_path:str, png_path:str, bucket:str) -> dict:
    """
    Function runs a Textract job and saves Balance Sheet information to .csv file in s3 folder 
    """
    errors = ''
    
    # temporary data frame object for balance sheet information
    res = runJob(bucket, pdf_path)
    
    # if Textract job did not fail we continue extraction
    if res[0]['JobStatus'] != 'FAILED':

        # perform OCR and return balance sheet with corresponding page object(s)
        tb_response = readTable(res)           
        
        # checks for type of return, if none then we log an error
        if type(tb_response) == tuple:
            
            # deconstruct the table response tuple into dataframe and page object parts
            df1, page_obj, page_num = tb_response
            print('\nPage number(s) for extraction in PNG are {}\n'.format(page_num))
            
            # try to extract from a PNG (we can still return a None here)
            df2 = readPNG(page_num, png_path)
            
            # provided balance sheet page number we select FORM and TEXT data
            forms_data = readForm(page_obj)      
            text_data = readText(page_obj)        
            
            print('\nTextract-PDF dataframe')
            print(df1)
            
            print('\nTextract-PNG dataframe')
            print(df2)
            
            return (df1, df2, forms_data, text_data, None)
        else:
            error = 'No Balance Sheet found, or parsing error'
            return (None, None, None, None, error)
    else:
        error = 'Could not parse, JOB FAILED'
        return (None, None, None, None, error)

## Main File Execution

In [20]:
if __name__ == "__main__":

    # Amazon Textract client and Sagemaker session
    textract = boto3.client('textract')
    s3 = boto3.client('s3')
    session = Session()
    
    # initiate s3 bucket and corresponding data/output folder
    bucket = 'ran-s3-systemic-risk'
    
    data_png_folder = 'Input/X-17A-5-PNG-SUBSETS/'
    data_pdf_folder = 'Input/X-17A-5-PDF-SUBSETS/'
    
    output_png_folder = 'Output/X-17A-5-PNG-RAW/'
    output_pdf_folder = 'Output/X-17A-5-PDF-RAW/'
    
    temp_folder = 'Temp/'
    
    # csv directory where we store balance sheet information 
    output_png_csvs = np.array(session.list_s3_files(bucket, output_png_folder))
    output_pdf_csvs = np.array(session.list_s3_files(bucket, output_pdf_folder))
    
    # temp directory where JSON files is stored
    temp = np.array(session.list_s3_files(bucket, temp_folder))
    
    # pdf directory where we store the broker-dealer information 
    pdf_files = np.array(session.list_s3_files(bucket, data_pdf_folder))[1:]
    png_files = np.array(session.list_s3_files(bucket, data_png_folder))[1:]
    png_file_directory = list(set((map(lambda x: '/'.join(x.split('/')[:-1]), png_files))))
    
    # ===========================================================================
    # Load in Temp JSON files if present (FORM, TEXT, ERROR)
    # ===========================================================================
    
    if 'Temp/X17A5-FORMS.json' in temp:
        # retrieving downloaded files from s3 bucket
        s3.download_file(bucket, 'Temp/X17A5-FORMS.json', 'temp1.json')
        
        # read data on KEY-VALUE dictionary (i.e Textract FORMS) 
        with open('temp1.json', 'r') as f: forms_dictionary = json.loads(f.read())
        
        # remove local files for JSON
        os.remove('temp1.json')
    else:
        forms_dictionary = {}
    
    if 'Temp/X17A5-TEXT.json' in temp:
        # retrieving downloaded files from s3 bucket
        s3.download_file(bucket, 'Temp/X17A5-TEXT.json', 'temp2.json')
        
        # read data on TEXT-Confidence dictionary
        with open('temp2.json', 'r') as f: text_dictionary = json.loads(f.read())  
            
        # remove local files for JSON
        os.remove('temp2.json')
    else:
        text_dictionary = {}
    
    if 'Temp/ERROR-TEXTRACT.json' in temp:
        # retrieving downloaded files from s3 bucket
        s3.download_file(bucket, 'Temp/ERROR-TEXTRACT.json', 'temp3.json')
        
        # read data on errors derived from Textract
        with open('temp3.json', 'r') as f: error_dictionary = json.loads(f.read()) 
            
        # remove local files for JSON
        os.remove('temp3.json')
    else:
        error_dictionary = {}
    
    # ===========================================================================
    # Perform Textract analysis on PDFs and PNGs
    # ===========================================================================
    
    # e.g. ['Input/X-17A-5-PDF-SUBSETS/42352-2012-02-29-subset.pdf'] otherwise pdf_files (full sample)
    select_sample = ['Input/X-17A-5-PDF-SUBSETS/1146184-2010-02-25-subset.pdf']

    for pdf_paths in pdf_files:
        
        # baseFile name to name export .csv file e.g. 1224385-2004-03-01.csv
        basefile = pdf_paths.split('/')[-1].split('-subset')[0]
        fileName = basefile + '.csv'
        print('\nPerforming OCR for {}'.format(fileName))
        
        # if file is not found in directory we extract the balance sheet
        # WE LOOK TO AVOID RE-RUNNING OLD TEXTRACT PARSES TO SAVE TIME
        if (output_pdf_folder + fileName not in output_pdf_csvs):
            
            # run Textract OCR job and extract the parsed data 
            png_paths = data_png_folder + basefile + '/'
            df1, df2, forms_data, text_data, error = textractParse(pdf_paths, png_paths, bucket)

            # if no error is reported we save FORMS, TEXT, DataFrame
            if error is None:

                # store accompanying information for JSONs
                forms_dictionary[basefile] = forms_data
                text_dictionary[basefile]  = text_data
                print(text_data)
                
                # writing data frame to .csv file
                df1.to_csv(fileName, index=False)

                # save contents to AWS S3 bucket
                with open(fileName, 'rb') as data:
                    s3.put_object(Bucket=bucket, Key=output_pdf_folder + fileName, Body=data)
                
                # writing data frame to .csv file extracted from PNG
                if df2 is not None:
                    df2.to_csv(fileName, index=False)
                    
                    with open(fileName, 'rb') as data:
                        s3.put_object(Bucket=bucket, Key=output_png_folder + fileName, Body=data)
    
                # remove local file after it has been created
                os.remove(fileName)

                print('-----------------------------------------------------')
                print('Saved {} file to s3 bucket'.format(fileName))
            
            else:
                error_dictionary[basefile] = error
                
        else:
            print('{} has been downloaded'.format(fileName))
    
    # ===========================================================================
    # Save JSON files for updated figures (FORM, TEXT, ERROR)
    # ===========================================================================
    
    # write to a JSON file for FORMS 
    with open('/home/ec2-user/SageMaker/SEC_X17A5/temp/X17A5-FORMS.json', 'w') as file: 
        json.dump(forms_dictionary, file)
        file.close()
    
    # save contents to AWS S3 bucket
    with open('/home/ec2-user/SageMaker/SEC_X17A5/temp/X17A5-FORMS.json', 'rb') as data: 
        s3.upload_fileobj(data, bucket, 'Temp/X17A5-FORMS.json')
    
    # ---------------------------------------------------------------------------
    
    # write to a JSON file for TEXT 
    with open('/home/ec2-user/SageMaker/SEC_X17A5/temp/X17A5-TEXT.json', 'w') as file: 
        json.dump(text_dictionary, file)
        file.close()
    
    # save contents to AWS S3 bucket
    with open('/home/ec2-user/SageMaker/SEC_X17A5/temp/X17A5-TEXT.json', 'rb') as data: 
        s3.upload_fileobj(data, bucket, 'Temp/X17A5-TEXT.json')
    
    # ---------------------------------------------------------------------------
    
    # write to a JSON file for FORMS 
    with open('/home/ec2-user/SageMaker/SEC_X17A5/temp/ERROR-TEXTRACT.json', 'w') as file: 
        json.dump(error_dictionary, file)
        file.close()
    
    # save contents to AWS S3 bucket
    with open('/home/ec2-user/SageMaker/SEC_X17A5/temp/ERROR-TEXTRACT.json', 'rb') as data: 
        s3.upload_fileobj(data, bucket, 'Temp/ERROR-TEXTRACT.json')



Performing OCR for 1101180-2002-02-28.csv
Started job with id: 8ca5c5e4726fc433877d25ddeb5c4be8994a18ad4eceeb98853cb42deec7cdc6
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4

Page number(s) for extraction in PNG are [4]

Started job with id: b79ec6080f8c505aea39732c3152c869c3ef119a252601c75df662dfed509212
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0                1
0                           Cash and cash equivalents   

-----------------------------------------------------
Saved 1101180-2003-02-28.csv file to s3 bucket

Performing OCR for 1101180-2004-02-25.csv
Started job with id: 65bb7064b1d7623c52a1dbfb27bbe420d9ad7cb54ebe62e6bc67835f76f2d0cb
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4

Page number(s) for extraction in PNG are [4]

Started job with id: f6bd95e5ea2018d6407b6423da43e55743d310d3d0f4c5080bba5839d40858f0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0                 1
0                           Cash and cash equivalents       $ 6,810,264
1   Cash and securities segregated under federal a...        27,217,169
2                   Securities owned, at market va

-----------------------------------------------------
Saved 1101180-2005-03-02.csv file to s3 bucket

Performing OCR for 1101180-2006-03-01.csv
Started job with id: 97ddde725182ae11b95b9f27018d6e6647069906e96c3a0b32cf064599e22bff
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [4]

Started job with id: 011acd525959f392bacdccb844edc553e6150570dbcf95e15a345dd01911f69c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                          

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents      $ 60,672
1   Cash and securities segregated under Federal a...       159,345
2     Securities purchased under agreements to resell    18,840,294
3                                 Securities borrowed    18,685,059
4                Deposits with clearing organizations         6,905
5                          Receivables from customers         5,177
6   Receivables from broker-dealers and clearing o...     1,109,963
7                     Securities owned, at fair value       129,866
8                    Securities pledged as collateral     2,363,404
9                   Interest and dividends receivable       151,753
10                                       Other assets        28,961
11                                                     $ 41,541

-----------------------------------------------------
Saved 1101180-2009-05-12.csv file to s3 bucket

Performing OCR for 1101180-2009-07-20.csv
Started job with id: 8a26d056e229262e1c993bc28a40d858e38d7175ef567863969419c587b949af
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: 9ee04adf895dc30a8301ab0ee3c48acc9400fe2ed41fa1d24dcdfa3a1f950d9b
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents      $ 21,898
1   

-----------------------------------------------------
Saved 1101180-2009-07-20.csv file to s3 bucket

Performing OCR for 1101180-2010-03-02.csv
Started job with id: f67490856de54307e130b88dc36e299d81e958dbe527d314edf9ba7eb7174c83
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 

Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Performing OCR for 1146184-2006-03-01.csv
1146184-2006-03-01.csv has been downloaded

Performing OCR for 1146184-2007-02-26.csv
1146184-2007-02-26.csv has been downloaded

Performing OCR for 1146184-2008-02-29.csv
1146184-2008-02-29.csv has been downloaded

Performing OCR for 1146184-2009-03-02.csv
1146184-2009-03-02.csv has been downloaded

Performing OCR for 1146184-2010-02-25.csv
1146184-2010-02-25.csv has been downloaded

Performing OCR for 1146184-2010-03-09.csv
Started job with id: 74ff01feeccfa97e136333219e2b35a07905b5e579ec400ad6a2c703c091ccdf
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3

Performing OCR for 1146184-2011-02-25.csv
1146184-2011-02-25.csv has been downloaded

Performing OCR for 1

Started job with id: 8c2def11d74ea2df136ceb8da299b93dcfda09f84a9721cb3fd6842be6484f3e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Performing OCR for 1591458-2017-01-23.csv
Started job with id: 3110c55b84d228b1f23435368a97c49a29aea166551cecbd0f207065ac86041f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Performing OCR for 1591458-2018-02-28.csv
1591458-2018-02-28.csv has been downloade

-----------------------------------------------------
Saved 1675365-2021-03-01.csv file to s3 bucket

Performing OCR for 200565-2002-03-01.csv
Started job with id: ecc2a82a39520b668af60374986d420bd22bf8bb9169e47a65a8cfa0e2da18f4
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: 5a176475a215a87ddf75892a344d288a6a84a957502662dbe588d403504ca878
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0            1
0                    Assets C

-----------------------------------------------------
Saved 200565-2002-03-01.csv file to s3 bucket

Performing OCR for 200565-2003-03-03.csv
Started job with id: 14e025bf675fce55131205f4db190c56b0867868df63e0e8a368ead05ce559ed
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 78d84bf3a2f9ff2e66e4cc4175e7d2f986e2cf5a90d75beaa70dbbbef5f1371a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 298,003
1   Ca

-----------------------------------------------------
Saved 200565-2003-03-03.csv file to s3 bucket

Performing OCR for 200565-2004-03-01.csv
Started job with id: 71e73587be70ce25e7a0b97666b8835b7319bf970a11cb749a02f695fbe2cf23
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: ad13cfe6e8d70cc6b72148a30578e184721bf248651868339d41d9e48c6e95a3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS

-----------------------------------------------------
Saved 200565-2004-03-01.csv file to s3 bucket

Performing OCR for 200565-2005-03-01.csv
Started job with id: 4acdf6aa87f08bd2915f22e111cbab31418e37bf322e7bf7250f5a06a1ade418
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 5dc781722af7d571ae2b41bb22d6f84da588693c7136dba487fe9e7a89d0e33a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivale

-----------------------------------------------------
Saved 200565-2005-03-01.csv file to s3 bucket

Performing OCR for 200565-2006-03-03.csv
Started job with id: 4cb1d8281ef2653f52c49e031b1f0d563a29f1fa5fc3163facf63482e1c1a362
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [6]

Started job with id: dbd9fc04dbe7b225b59d97c9c42a01099b43826596f39198c04ef6fa944c7c4c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Jo

-----------------------------------------------------
Saved 200565-2006-03-03.csv file to s3 bucket

Performing OCR for 200565-2007-03-01.csv
Started job with id: 9eea6f8fb70894c64b95f340c78c38fa0b0aa5e395657be6bf3f39699a4ec908
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [6]

Started job with id: 857683042245ec86394373c858a35c34b75cd99af31f0470b5a1cb684a30f0a9
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivale

-----------------------------------------------------
Saved 200565-2007-03-01.csv file to s3 bucket

Performing OCR for 200565-2008-03-03.csv
Started job with id: 2c5181a68a7d09533abd91bc95080910481abf17688287f9d5f0aaed99d7eb9e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 930f6e8631ad300a69082119294987b14e45ff3d55baeb49611727fb049201da
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                      

-----------------------------------------------------
Saved 200565-2008-03-03.csv file to s3 bucket

Performing OCR for 200565-2008-04-03.csv
Started job with id: ec3c43d7651a9fcccca0957cdb79f882fee5f8bd27728019a3a69e36a5dc86f3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 0e2d5dd50b0a99d102494b5e77217a2ba2ab24b31841d16f8038d8ed6d8da322
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0            1
0                           Cash and cash equivalen

-----------------------------------------------------
Saved 200565-2008-04-03.csv file to s3 bucket

Performing OCR for 200565-2009-03-02.csv
Started job with id: 7ba8d9aa703856f5c6c0fcd283d81129e31bd48cf356d8aea12290f67a47c409
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 18662fe8e9128c4c20b881d1b6f380010119d162899d1fafdb505e118c545631
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivale

Started job with id: aac83d6de6c26f1de3063d0f2ca3985167b3b689b5585e7cdff95e02faecb7d3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: e2f2284d077c1274e305343af84fe58c6b95296cbcd399fbb93975d51e1bbca2
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 882,675
1   Cash and securities segregated and on deposit ...     1,819,488
2          Financial instruments owned, at fair value     1,000,221
3     Securi

Started job with id: 9f62d4d710162515c74df52cc3f2a65ab85c6485d843b88c94d05a5e7f84ae99
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: 816817faec2be694faefbf32fb71a616a7453444a9e2d4ecbe7b1c52c351c528
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                              Assets     $ 433,458
1                           Cash and cash equivalents              
2   Cash and securities segregated and on deposit ...     2,459,092
3              

-----------------------------------------------------
Saved 200565-2011-03-02.csv file to s3 bucket

Performing OCR for 200565-2012-02-29.csv
Started job with id: 09410b59354832e1717a168c61473a619b1637cb79d2117f76cd3a34da09614b
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [6]

Started job with id: 04e1a6f71d0325bd4cc8e4086da207d132f08dfbe6d8f603e808503dcf7f66e1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Jo

-----------------------------------------------------
Saved 200565-2012-02-29.csv file to s3 bucket

Performing OCR for 200565-2013-03-05.csv
Started job with id: 42a73ac0ca3a5dd6093af21e35f2781ed87a243da5e78b1d689ce1326c65dc5b
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [6]

Started job with id: bd524a66ec33beea7bcfad4ffceccf150f88e1fe4164e7abbb8c329ef188adc8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                         

-----------------------------------------------------
Saved 200565-2013-03-05.csv file to s3 bucket

Performing OCR for 200565-2014-03-04.csv
Started job with id: 7fde97f2ca59e474ebfd9164474b876b9b018d2af87b3de5570903ba2fe1b62c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 8918328de37f46656847e3d908e4bdec9f581738d31de9322258076cdf554092
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 682,077
1   Cash and securities segreg

-----------------------------------------------------
Saved 200565-2014-03-04.csv file to s3 bucket

Performing OCR for 200565-2015-03-02.csv
Started job with id: a71a00cba76465031752e5b0498a937f9ed4f9f6ce99195fe8d785a3fd9c9d2f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: a351f415fab6d7bd17a48381d8904826031ad5671a404110c792da1d60a1f4fc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0 

-----------------------------------------------------
Saved 200565-2015-03-02.csv file to s3 bucket

Performing OCR for 200565-2016-04-07.csv
Started job with id: abf0239fde83faefb42b0f93adb410d6099ee098b22737df3f0d22ff0d245209
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [1]

Started job with id: b22f3448096345147efc51361ea7e243639d270c8d081fe1e0bd7191d6284144
Job status: IN_PROGRESS
Job status: IN_PROGRESS

-----------------------------------------------------
Saved 200565-2016-04-07.csv file to s3 bucket

Performing OCR for 200565-2017-03-03.csv
Started job with id: f9c4ce6b6d0f4ef48b207f3c5c0fa967b0846f27c5a29e20af9f6c2072a77dff
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 16e475ac17e23c5d0d35a679b1d33ba9481f026f38c57ae717eedd5ff179192e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 925,934
1   Ca

-----------------------------------------------------
Saved 200565-2017-03-03.csv file to s3 bucket

Performing OCR for 200565-2018-03-02.csv
Started job with id: 8989dd4e4603098ff9fcf103d7d6343b35c7f4904d1880ce9a6fa5820dcd8107
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5]

Started job with id: 3026f4cb40b95f812eaaa1cdf13dd177b4ec6f7ce6b3bddeecda91b57df66e76
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0 

Started job with id: 5fc4ee7e7191a5b72852d6ac319c3646bca5ed6f514a3d9323a977360b462e95
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Performing OCR for 200565-2020-03-03.csv
Started job with id: 6a61e043e222c993c6d3be369f6b24b05268c68b447c6dc021abf8002b38222f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [0]

Start

Started job with id: 06abafe8753d20706e8f7a05683d15ac1dcb652fe33f3b20e461fe42de0ff0d0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [0]

Started job with id: a0d4ae5eef432df7cfc7f64e5e4b5d2a8ba80db0473ea7d9606c14b9ebc97ecb
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                              Assets              
1                           Cash and cash equivalents   $ 1,162,440
2   Cash and securities segregated 

-----------------------------------------------------
Saved 200565-2021-03-01.csv file to s3 bucket

Performing OCR for 230611-2002-03-01.csv
230611-2002-03-01.csv has been downloaded

Performing OCR for 230611-2003-03-03.csv
230611-2003-03-03.csv has been downloaded

Performing OCR for 230611-2004-03-01.csv
230611-2004-03-01.csv has been downloaded

Performing OCR for 230611-2005-03-01.csv
230611-2005-03-01.csv has been downloaded

Performing OCR for 230611-2006-03-01.csv
230611-2006-03-01.csv has been downloaded

Performing OCR for 230611-2007-03-01.csv
230611-2007-03-01.csv has been downloaded

Performing OCR for 230611-2008-02-29.csv
230611-2008-02-29.csv has been downloaded

Performing OCR for 230611-2009-03-02.csv
230611-2009-03-02.csv has been downloaded

Performing OCR for 230611-2009-03-09.csv
230611-2009-03-09.csv has been downloaded

Performing OCR for 230611-2010-03-01.csv
Started job with id: d34c53eab77b9a302d04976c6e6eb7da86f4d689b7286ac1c3590577fbdb25ec
Job status: IN_P

-----------------------------------------------------
Saved 276523-2002-03-01.csv file to s3 bucket

Performing OCR for 276523-2003-03-03.csv
Started job with id: 0a95d9d65c98427a5c3d5dfe673fcce80cc23173211597caa249544cc60f01ef
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4

Page number(s) for extraction in PNG are [4]

Started job with id: 5dcdf196e8310b4d8e513a8b76418f71bf63c1e630f5731530a82fb844363e75
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0           

-----------------------------------------------------
Saved 276523-2004-03-04.csv file to s3 bucket

Performing OCR for 276523-2005-03-01.csv
Started job with id: 5dc32dca0b08f7e302fa5feb3fb022a5b5cbf17edd22b606008e080db2a60053
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3

Page number(s) for extraction in PNG are [4]

Started job with id: b81942b128d6c8ac9e5991db54183aff3cfccd051b23f4f3b8688269f0df34d4
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents  $ 84,753
1   Receivable from brokers, dealers, and clearing...    11,363
2          Securities owned, at market value (note 4)  

{'WELLS FARGO SECURITIES, LLC': 99.55668640136719, 'Statement of Financial Condition': 99.76056671142578, 'December 31, 2006': 99.70559692382812, '(In thousands)': 99.75387573242188, 'Assets': 99.73333740234375, 'Cash and cash equivalents': 99.91542053222656, '$': 99.83255004882812, '56,812': 99.69587707519531, 'Receivable from brokers, dealers, and clearing organizations (note 3)': 99.23706817626953, '24,194': 99.2357406616211, 'Securities owned, at market value': 99.89978790283203, '86': 99.94348907470703, 'Furniture, equipment, and leasehold improvements, at cost, less accumulated': 99.23908996582031, 'depreciation and amortization of $1,719 (note 4)': 99.58783721923828, '795': 99.93586730957031, 'Receivable from affiliated parties (note 5)': 98.70177459716797, '1,397': 99.8542251586914, 'Other receivables': 99.88062286376953, '21': 99.86229705810547, 'Goodwill (note 10)': 99.5582046508789, '76,312': 99.79216003417969, 'Other assets': 99.9430923461914, '448': 99.61892700195312, 'Tot

-----------------------------------------------------
Saved 276523-2009-03-02.csv file to s3 bucket

Performing OCR for 29648-2003-03-03.csv
29648-2003-03-03.csv has been downloaded

Performing OCR for 29648-2004-03-01.csv
29648-2004-03-01.csv has been downloaded

Performing OCR for 29648-2005-03-01.csv
29648-2005-03-01.csv has been downloaded

Performing OCR for 29648-2006-03-01.csv
29648-2006-03-01.csv has been downloaded

Performing OCR for 29648-2007-03-01.csv
29648-2007-03-01.csv has been downloaded

Performing OCR for 29648-2008-02-29.csv
29648-2008-02-29.csv has been downloaded

Performing OCR for 29648-2008-04-07.csv
29648-2008-04-07.csv has been downloaded

Performing OCR for 29648-2008-05-13.csv
29648-2008-05-13.csv has been downloaded

Performing OCR for 29648-2009-03-02.csv
29648-2009-03-02.csv has been downloaded

Performing OCR for 29648-2010-03-01.csv
29648-2010-03-01.csv has been downloaded

Performing OCR for 29648-2011-03-01.csv
29648-2011-03-01.csv has been downloade

Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4

Performing OCR for 42352-2011-03-01.csv
42352-2011-03-01.csv has been downloaded

Performing OCR for 42352-2012-02-29.csv
42352-2012-02-29.csv has been downloaded

Performing OCR for 42352-2013-03-01.csv
42352-2013-03-01.csv has been downloaded

Performing OCR for 42352-2014-07-31.csv
42352-2014-07-31.csv has been downloaded

Performing OCR for 42352-2015-03-10.csv
42352-2015-03-10.csv has been downloaded

Performing OCR for 42352-2016-02-23.csv
42352-2016-02-23.csv has been downloaded

Performing OCR for 42352-2017-03-01.csv
42352-2017-03-01.csv has been downloaded

Performing OCR for 42352-2018-02-27.csv
42352-2018-02-27.csv has been downloaded

Performing OCR for 42352-2019-03-01.csv
42352-2019-03-01.csv has been downloaded

Performing OCR for 42352-2020-02-28.csv
Started job with id: c347fb9b4b060b7493ad749ed0d5a59ce7dd148f21ef44f96d5bd2ad28629929
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job st

-----------------------------------------------------
Saved 48966-2002-03-04.csv file to s3 bucket

Performing OCR for 48966-2003-03-03.csv
Started job with id: 52145660ef837c42ec39d35d33da7467ea21855080d0920b517229b3400d067c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: 1dc10263d8de451e7b6520cc2c7a52dd55592bd726e6bcf37f491535f8551072
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0           1  \
0                                                Cash               
1   Cash 

Started job with id: 6dc5f8b1c5565cce93d3efa1eaa061df6f7939c0f635973798de0c7e0fceadbd
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [5]

Started job with id: 145e70bd9a483e16a5cf62a3418eebeb5451ff5704a0f63645ee56988fd1d877
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0            1  \
0                                                Cash                
1   Cash segregated under Federal and other regula...                
2                     Securities owned, at fair value  $ 1,246,875   
3              Securities owned

-----------------------------------------------------
Saved 48966-2004-03-01.csv file to s3 bucket

Performing OCR for 48966-2005-03-08.csv
Started job with id: 59172818a6baf621a3cad70e097f0fecbcac640fc261ac7a8c33ff5a52b51900
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [3]

Started job with id: 4fa32961cc8cdc98d27c951f57bf4fbe13424e9c01049ff6c68ee734f9610065
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0          1
0                                                Cash  

-----------------------------------------------------
Saved 48966-2006-03-02.csv file to s3 bucket

Performing OCR for 48966-2007-03-01.csv
Started job with id: c4965518e985dc4d5a315ba2d5d0f35183b29e422eff4e7f40c61802ba17aae3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: 71fb885b0ed79662260b6251a56d42e8ad1d45ff1e5aba5b5447d4b24699984d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
J

-----------------------------------------------------
Saved 48966-2008-02-29.csv file to s3 bucket

Performing OCR for 48966-2009-03-02.csv
Started job with id: 77a8cd472837ef220622eddd3a9984aaa890ef4ed20b568c296c3c1f5c6d3a37
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: b68d7fc4b29c42e0363dc5caff3219f3f5c18e1e356fdd57f7ec2eacc6123f90
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0          1
0      

-----------------------------------------------------
Saved 48966-2010-03-01.csv file to s3 bucket

Performing OCR for 48966-2010-03-02.csv
Started job with id: e35d58430a970e6d48b796e39b5c125e392feb6d074b79bc08eaa4cbfb0952fe
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3

Performing OCR for 48966-2011-03-01.csv
Started job with id: 3956e16044154545d42e80baa53b4bd04af966f70844c5985b2dadff98d14b15
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_P

-----------------------------------------------------
Saved 48966-2012-03-01.csv file to s3 bucket

Performing OCR for 48966-2013-03-04.csv
Started job with id: d65c2f502eafce411d5fa79a81d8ed4f5ae18a244ead785da6a2e1cf4eed0337
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [6]

Started job with id: 2ae19494db85a56d8a105df903ae1b2aecc581fa75968167e2f25a5c546aa8f4
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0          1
0                           Cash and cash equivalents  

-----------------------------------------------------
Saved 48966-2014-03-04.csv file to s3 bucket

Performing OCR for 48966-2015-02-27.csv
Started job with id: e2703ba5805af43070cd4247561ae55802620af69d5eee9ff88ec64422638d1e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: f437501f00906c867b2e42f086c7305ea6b976c77a9f3a0855150869f72a5da6
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                     

-----------------------------------------------------
Saved 48966-2016-02-29.csv file to s3 bucket

Performing OCR for 48966-2017-02-28.csv
Started job with id: ed12984ed9cffca4cd08e4e1b236275fec3bdc83e3b57d3a3ab0873ce3cb5510
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: df8cea5f955188ac8f1fe29df99ee64eb315a05ca6d0e81234648a203d6ebde2
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0          1
0                                              Assets           
1                           Cash an

-----------------------------------------------------
Saved 48966-2018-03-01.csv file to s3 bucket

Performing OCR for 48966-2018-03-06.csv
Started job with id: d8e4b0d41c2f3865df7be0e9ecd2397612e3235b6ad0c00ee8b8282e596cf29d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: 0d5a2fc91bd59ea6a9653e32f027b7d6ca6fd4e088ce199d564dfe9518a19edd
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0          1
0                           

-----------------------------------------------------
Saved 48966-2019-03-01.csv file to s3 bucket

Performing OCR for 48966-2020-02-28.csv
Started job with id: 3888ad429a65e65a1af7297a22719b8f061526ea3fa4f29143a3fbfa51074b4d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: 134be7760383b33691fb532e6045ccd74c92c35deae1a0e55b092af956aab0f9
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0          1
0                                              Assets           
1          

-----------------------------------------------------
Saved 48966-2021-02-26.csv file to s3 bucket

Performing OCR for 58056-2002-03-01.csv
58056-2002-03-01.csv has been downloaded

Performing OCR for 58056-2003-03-03.csv
58056-2003-03-03.csv has been downloaded

Performing OCR for 58056-2004-02-27.csv
58056-2004-02-27.csv has been downloaded

Performing OCR for 58056-2004-03-26.csv
58056-2004-03-26.csv has been downloaded

Performing OCR for 58056-2005-02-25.csv
58056-2005-02-25.csv has been downloaded

Performing OCR for 58056-2006-03-24.csv
58056-2006-03-24.csv has been downloaded

Performing OCR for 58056-2007-03-01.csv
58056-2007-03-01.csv has been downloaded

Performing OCR for 58056-2008-02-29.csv
58056-2008-02-29.csv has been downloaded

Performing OCR for 58056-2009-03-16.csv
58056-2009-03-16.csv has been downloaded

Performing OCR for 58056-2010-06-04.csv
58056-2010-06-04.csv has been downloaded

Performing OCR for 58056-2012-02-29.csv
58056-2012-02-29.csv has been downloaded

-----------------------------------------------------
Saved 703004-2003-02-28.csv file to s3 bucket

Performing OCR for 703004-2004-02-27.csv
Started job with id: 5404cd12e58e55fcc6820cc29e74c96422ec2b2a2343ce33508ea7886d4d0ab1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [7]

Started job with id: f9f6d88c0bbb5bfa6c0e6209c8d69d847f90200b32cc015e10f83bee9ae3f620
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0           Cash segregated under federal regulations

-----------------------------------------------------
Saved 703004-2005-03-01.csv file to s3 bucket

Performing OCR for 703004-2006-03-02.csv
Started job with id: c073a62f1f981a2d78bb71220607e37e2800230db36bee8c6d744eb9d8030efe
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Res

-----------------------------------------------------
Saved 703004-2007-03-01.csv file to s3 bucket

Performing OCR for 703004-2008-02-29.csv
Started job with id: a081827281dff7fd6cae6692d49c220106c84919e229a5867423b39dfc90dd0c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: 6827f58aac16ade28ad989b1caa93d827b3684384b8d0b1ab8cbe01ea91fac49
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash      $ 30,472
1   Cash segregated in complia

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [4]

Started job with id: 81b6bfb2403d46d0851b58e7b76b016fdb85bf71e80201a2dbc1771ded07e409
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash      $ 11,416
1     Securities purchased under agreements to resell    22,705,542
2                                 Securities borrowed    17,441,704
3                  Securities

-----------------------------------------------------
Saved 753835-2003-03-04.csv file to s3 bucket

Performing OCR for 753835-2004-03-01.csv
Started job with id: f02ce7d0f6fd1f620c96d2fda9fa12f1ee315cb53e3913f8a287d18ae00c8c4b
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5

Page number(s) for extraction in PNG are [4]

Started job with id: f1e6fb2c4022b08dfe902a65003b9c84541e6a14bb981e4b2160d051eeee278e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash           $ 3
1   Cash and securities purchased under agreements...    

{'BNP Paribas Securities Corp.': 99.55541229248047, '2': 99.70420837402344, '(An indirectiy wholly owned subsidiary of BNP PARIBAS)': 98.00608825683594, 'Statement of Financial Condition': 99.86306762695312, 'December 31, 2004': 99.07044219970703, '(in thousands)': 99.49676513671875, 'Assets': 99.9417495727539, 'Cash and securities purchased under agreements to resell': 99.97238159179688, 'segregated under Federal and other regulations': 99.9727554321289, '$': 99.86287689208984, '21,302': 99.92894744873047, 'Securities purchased under agreements to resell': 99.97116088867188, '35,875,394': 99.82748413085938, 'Securities borrowed': 99.94378662109375, '38,054,385': 99.66825866699219, 'Securities owned - at market value': 99.35249328613281, '(including securities owned, pledged to counterparties of $ 7,178,755)': 98.02511596679688, '13,319,219': 99.84170532226562, 'Securities failed to deliver': 99.9756088256836, '2,555,602': 99.89102172851562, 'Receivable from brokers, dealers, and clear

-----------------------------------------------------
Saved 753835-2006-03-10.csv file to s3 bucket

Performing OCR for 753835-2007-03-01.csv
Started job with id: d45cc86b5743037846ecc4dd5ee1a48f78495168a03501a58d8c0fd46ce615d5
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: a4126fb013e002072b734d7ccee5698c41b09f65477f1271c02a7e68df64777e
Job status: IN_PROGRESS
Job status: IN_PROGR

Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                                Cash      $ 420,442
1   Cash and securities segregated under federal a...        511,066
2     Securities purchased under agreements to resell     28,979,166
3                                 Securities borrowed     95,037,760
4   Securities owned - at fair value (including se...     57,306,386
5   Receivable from brokers, dealers, and clearing...      4,029,549
6                        Securities failed to deliver      2,574,243
7                           Receivable from customers        111,732
8           Exchange memberships (fair value $15,927)            441
9                                        Other assets        872,351
10                                       Total assets  $ 189,843,136
11               Liabilities and stockholder's equity               
12    

-----------------------------------------------------
Saved 753835-2009-03-02.csv file to s3 bucket

Performing OCR for 753835-2010-03-01.csv
Started job with id: 26d088facc960ef01effd42aada9f4e55ab8184619aad6cbb578cf3d7c3b592f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: ddf810d3bf5c1ec5bb3abf9ed06dbebf0270a1b0cac2d54a3ace7a3fbba00366
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGR

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [2]

Started job with id: d98ff8a90966ea322d200db7c4190a8cb2c78f785d313f111fe9b0f50ec0fe0a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                                Cash       $ 13,637
1   Cash deposited with clearing organizations or ...        375,888
2     Securities purchased under agreements to resell     63,934,266
3                                 Securities borrowed     33,479,982
4                    Securities owned - at fair value              

-----------------------------------------------------
Saved 753835-2012-02-29.csv file to s3 bucket

Performing OCR for 753835-2013-03-01.csv
Started job with id: 6f4b0bb40c3a27a5def62876969171b02b60145c1933f396501fd2bb075b53bc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: 1592f0ea33a62f0b78943bd0625019429e50b38a5f0e16b8ac0f87720615e627
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                              

-----------------------------------------------------
Saved 753835-2013-03-01.csv file to s3 bucket

Performing OCR for 753835-2014-03-04.csv
Started job with id: d88471831ee4bf087b1790d11b72374df036845f70f63fc63c0393154fdbee6a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 7529fd5145ba4bac28b6b71392286433f8f2377c502537d321bd09e5418f886f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGR

-----------------------------------------------------
Saved 753835-2014-03-04.csv file to s3 bucket

Performing OCR for 753835-2015-03-02.csv
Started job with id: 368692d99539579dfb4c9310aa09569221983b7d8b663c05097fa33ec31a3e1f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: e0da214b40ff5e1c68133ca9109e528685014267219e94896df92b6565f63081
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                                Cash       $ 37,785
1   Cash deposited with c

Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 5eedcdd5b2824049783a5e306d34289981fc3deef8c72517354c58cdc6d329f2
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash      $ 31,090
1   Cash deposited with clearing organizations or ...       564,708
2     Securities purchased under agreements to resell    49,513,123
3                    Securities owned - at fair value              
4   (including securities owned, pledged to counte...    16,028,229
5                                 Securities borrowed     3,807,301
6   Receivable from brokers, dealers, and clearing...     1,690,427
7   Securities received as collateral - at 

-----------------------------------------------------
Saved 753835-2017-03-01.csv file to s3 bucket

Performing OCR for 753835-2018-03-14.csv
Started job with id: 00c222043abe7593245c352d0a5c3268ce0d51cf90024d60b7c4b2127f396dc8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [2]

Started job with id: cab6f7994947db9f17407eef2e719f85883fa683591f7d6fa3eea28150b96af3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0           

-----------------------------------------------------
Saved 753835-2019-03-01.csv file to s3 bucket

Performing OCR for 753835-2020-02-26.csv
Started job with id: 8cc0b38f1b47cf31add0fe0e11abb2d86c19ffe12ee47202443c634f658d68f9
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: 24efd09b7ac1d1b6ddc502a15ee22f34c6479cbf800725372c2f714598159e76
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 411,008
1                   Cash se

Started job with id: 7a6dc64d6444a21c40b05a49387e269ea0516eb86d237ea20b5b439d27c95d73
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Performing OCR for 754542-2002-03-08.csv
Started job with id: 4387224cec49a98049ba06b9552a124aafc35517a522ec3a96c740d44e9e36fd
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [3]

St

-----------------------------------------------------
Saved 754542-2003-03-03.csv file to s3 bucket

Performing OCR for 754542-2004-03-01.csv
Started job with id: aaa7c8c3418ae2582dd5fe33b76c25ae404bfb5f387447426fae3652baad8cc6
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 14eb85bf0d6dbccfe435039b81b4393fabb96f2b900c3b32f849fa53f8febb09
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0   in thousands of dol

Textract-PNG dataframe
                                                    0             1
0                           Cash and cash equivalents     $ 123,201
1             Cash segregated for regulatory purposes       354,563
2   Receivables from brokers, dealers, and clearin...     1,037,163
3                   Securities received as collateral     1,493,066
4                          Receivables from customers        96,036
5     Securities purchased under agreements to resell     1,553,541
6                                 Securities borrowed    18,699,187
7                                    Securities owned         4,590
8              Securities owned, pledged to creditors     3,278,341
9                                        Other assets       383,584
10                                       Total assets  $ 27,023,272
11               Liabilities and stockholders' equity              
12                                       Liabilities:              
13                       

-----------------------------------------------------
Saved 754542-2006-03-01.csv file to s3 bucket

Performing OCR for 754542-2007-03-01.csv
Started job with id: c0d6c0966981bcca7c63d69c314276f8529a7a9cea961757804175b7cbd71de6
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [4]

Started job with id: da9e0b2c5f7abeb37a411e6998787b62a3f7ef0ee126eb1345979b13708ed970
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                              

-----------------------------------------------------
Saved 754542-2008-02-29.csv file to s3 bucket

Performing OCR for 754542-2009-03-02.csv
Started job with id: 7ff3f4587a90fd868aa30419813cb40baea5ea00f016d0e33acee2c513e24019
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Performing OCR for 782124-2002-01-29.csv
782124-2002-01-29.csv has been downloaded

Performing OCR for 782124-2003-01-30.csv
782124-2003-01-30.csv has been downloaded

Performing OCR for 782124-2004-01-30.csv
782124-2004-01-30.csv has been downloaded

Performing OCR for 782124-2004-05-07.csv
782124-2004-05-07.csv has been downloaded

Performing OCR for 782124-2005-01-31.csv
782124-2005-01-31.csv has been downloaded

Performing OCR for 782124-2006-01-30.csv
782124-2006-01-30.csv has been downloaded

Performing OCR for 782124-2007-01-29.csv
782124-2007-01-29.csv has been downloaded

Performing OCR for 782124-2008-01-31.csv
782124-2008-01-31.csv

-----------------------------------------------------
Saved 806135-2002-03-01.csv file to s3 bucket

Performing OCR for 806135-2004-03-01.csv
Started job with id: e1cffd2bcbdd063f1d34dba03e2c82e44bb3673688b20025ed6ac8524302ec7f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: c4a4b86d0cc0ba925548558e5bbaa35dd93363bbc61a7ea022a0ddf2f5208db8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                     

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: e37a4b6e05141429bedceaa11cdfcc7e37320a3c613174a8c232f3681ec2812d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                                Cash      $ 537,027
1   Receivable from brokers, dealers and clearing ...      2,557,374
2                           Receivable from customers      5,611,948
3                         Receivable from noncustomer        174,840
4        Securities purchased under resale agreements    119,718,511
5   (included $1,068 mill

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: b8b23a386031f9711f59b25765508bc53037c7feb73bb629fae6f89f99f23f6e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents      $ 52
1   Cash and securities

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents     $ 144
1        Cash and securities segregated under federal          
2                               and other regulations        73
3   Receivables from brokers, dealers and other in...     3,285
4   Securities purchased under agreements to resel...    39,383
5                                      Trading assets          
6   Securities and other financial instruments own...    19,109
7                                Derivative contracts       171
8                         Accrued interest receivable       169
9                                        Other assets        33
10                                       Total Assets  $ 62,367
11               LIABILITIES AND STOCKHOLDER'S EQUITY          
12                     

-----------------------------------------------------
Saved 808379-2004-03-01.csv file to s3 bucket

Performing OCR for 808379-2005-02-28.csv
Started job with id: 0a8db7104e5e98777fc4669b75cf5de6bde2ed550facb8d150e3ee7dda4b1097
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [3]

Started job with id: 8461f242bb4602117b2584f9563a0e70751256abe505b30bfa1dd0adfa8f7de3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equiva

Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [3]

Started job with id: 8f31d05a3b7cac915d8978ba4259813783912051a7c5b437a91ac05ab808c5c6
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents       $ 4
1   Cash and securities segregated under federal a...       295
2   Receivables from brokers, dealers and other in...     3,407
3   Securities purchased under agreements to resel...    50,135
4                                      Trading assets          
5   Securities and other financial instruments own...    36,447
6                                Derivative contracts       111
7                         Accrued interest receivable       350
8                                        Oth

Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 32dee82c83c285ddbc3342eef42aeb11662bf9744c8cd9e29e7a55fc1ccbaf68
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0          1
0                           Cash and cash equivalents       $ 26
1   Cash and securities segregated under federal a...        482
2   Receivables from brokers, dealers and other in...      2,489
3   Securities purchased under agreements to resel...     64,586
4   Financial instruments owned, at fair value ($6...     64,773
5                         Accrued interest receivable        944
6                                        Other assets        222
7                                        Total Assets  $ 133,522
8                LIABILITI

-----------------------------------------------------
Saved 808379-2010-03-01.csv file to s3 bucket

Performing OCR for 808379-2011-06-14.csv
Started job with id: 56251faf97d50cacd41d41e3977b14d0d5eb629a41b92abc8ada2ea9ee1f937c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [3]

Started job with id: 799de2dc10f774bfbd0554c9397f8a9216bd26cced56d22bcbb760835dbfc603
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PR

-----------------------------------------------------
Saved 808379-2012-03-05.csv file to s3 bucket

Performing OCR for 808379-2013-03-01.csv
Started job with id: 5f2ba2e439e28678dc8a8732d66cd9be55783e96e48675a1e7f62dffd7497af7
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [3]

Started job with id: 17ee289bb512c5ede76fc15fb9c5ea89a647f4b3ab8f0eda3f566cd837b26093
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [4]

Started job with id: 147e2e03e46486b92b89fb3cb9325a989176ff831fa16795cd78b5a358219a9a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0       1
0                                                          $m
1                                              ASSETS        
2                           Cash and cash equivalents     194
3   Cash and securities segregated under federal a...     817
4   Receivables from brokers, dealers and other in...     771
5                          Receivables from customers      95
6   Securities purchased u

-----------------------------------------------------
Saved 808379-2015-03-02.csv file to s3 bucket

Performing OCR for 808379-2016-02-29.csv
Started job with id: eb0052e2ee88cc4406d664a958c2a24513100ad38edb5578e8b047af3c400ae0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [6]

Started job with id: e8c7f9d10e627c3be8bcafe0b9eeffbec00b42a902b929460ff72f9f15b845f3
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGR

-----------------------------------------------------
Saved 808379-2017-03-01.csv file to s3 bucket

Performing OCR for 808379-2018-03-01.csv
Started job with id: cefea40d243f038fc5a210f1b4dfdb5761f88192490583455d41014e4bacbddf
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [12]

Started job with id: 5e8c84cfd85c5af1b4d1bf1edbfd91639d6f01973f9a688f2e52658cd377a479
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page reciev

-----------------------------------------------------
Saved 808379-2019-02-28.csv file to s3 bucket

Performing OCR for 808379-2020-02-20.csv
Started job with id: 9c7a48333c106e010dee378a839494613fe7713f0d02d678a200acc62f8e4e40
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8
Resultset page recieved: 9
Resultset page recieved: 10
Resultset page recieved: 11
Resultset page recieved: 12
Resultset page recieved: 13

Page number(s) for extraction in PNG are [4]

Started job with id: 5c7a47dd5edae35cdd261d4c3b3a6611d34f61e39f32bbe240e775ae51b67d3a
Job status: IN_PROGRESS
Job status: IN_PROGRES

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 9953ed4478d648f9028951942a16eeba1e79df757735310cf72b18fa7ee0cb3d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0            1
0                                                Cash    $ 107,081
1   Cash and securities segregated under federal r...    2,659,000
2     Securities purchased under agreements to resell    3,280,166
3                                    Securities own

Started job with id: bce97c0fb3e0f283d85cc57e0004693a5f00eca44def907384c1bb2f139e4025
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [6]

Started job with id: da7d3f1fb8de371436d573b6699726c2d88dd0580f111a4a628cf7399010e25c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRE

-----------------------------------------------------
Saved 860220-2003-03-03.csv file to s3 bucket

Performing OCR for 860220-2004-03-01.csv
Started job with id: 6d25b5a271de7f37e81e34c8d1e8d65f48e1e96b703d3e83041d2e4c8d925c6d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [6]

Started job with id: a1817aff3fed7b82d4a6beb5325dc7c2496fe85449d06938727a41f78051b716
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                   

-----------------------------------------------------
Saved 860220-2004-03-01.csv file to s3 bucket

Performing OCR for 860220-2005-03-01.csv
Started job with id: 7b741d65838e83d605795477b5ff51a36065b35fcca43c7e5d2430b3e522b227
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [6]

Started job with id: e8484004debf29e1cdd605e445dd076a78d9072ccf5f1b382885e4b3b11f0c83
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 105,207
1   Ca

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: 3a5ec3f42c00f2f95e84e620b80f72f7c1dd48296b525da894e035f8fcf3ae0e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 361,760
1   Cash and securities segregated unde

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5]

Started job with id: 3ce5e8b0775a8ca0ea2ebfe1566447ee1f80874787188c9cc2d625f69b05af99
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                              Assets              
1                                                Cash     $ 329,003
2   Cash and securities segregated under federal r...     5,749,724
3     Securities purchased under agreements to 

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [4]

Started job with id: ae4abc848b646b190aee241dd603f17bb28485bb1f15b4835e68fd0355c1fbb5
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                              Assets              
1                                                Cash     $ 475,657
2   Cash and securities segregated under federal r...     5,651,240
3     Securities purchased under agreements to resell    90,973,137
4   Securities borrowed and

Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [4]

Started job with id: 2c753b9ac2190a397d13f5aa5b9063e36674b0edc159db6d540940b0a6a0781f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                                                Cash     $ 410,558
1   Cash and securities segregated under federal r...     2,235,398
2     Securities purchased under agreements to resell    95,857,621
3   Securities borrowed and securities received as...    40,343,937
4   Securities owned, at fair value (includes $16,...    58,086,803
5         Receivable from brokers, dealers and others     6,024,213
6                           Receivable from customers     2,417,399
7            

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0         1
0                           Cash and cash equivalents     $ 143
1   Cash and securities segregated under federal r...       691
2   Securities purchased under agreements to resel...    85,326
3   Securities borrowed and securities received as...    42,902
4   Securities owned, at fair value (includes $11,...    54,348
5         Receivable from brokers, dealers and others     7,324
6                           Receivable from customers     2,744
7                         Accrued interest receivable       580
8                  Investment banking fees receivable       127
9                                            Goodwill       985
10                                       Other assets       175
11                                       Total assets  $195,345
12                    Liabilities and Member's 

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Performing OCR for 867626-2011-03-01.csv
867626-2011-03-01.csv has been downloaded

Performing OCR for 867626-2012-02-29.csv
867626-2012-02-29.csv has been downloaded

Performing OCR for 867626-2013-02-28.csv
Started job with id: e8f018e97a843b08ce278bda2ed6ef69140e5a64429e3e89389457cf2d860c8e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: I

-----------------------------------------------------
Saved 874362-2002-01-29.csv file to s3 bucket

Performing OCR for 874362-2003-01-30.csv
Started job with id: 35a17c9445c1256333221dfcef9b09a47b37fe29759a0edae259cf349a4aba06
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: b99c04894a7289e7cb593fb2753e89b1007ce9333b51942861dd8cbdda24d606
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents   $ 4,864,897
1  

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: c6bb12ceb8b744237067de25ea109cfa968c60ccd2d3190de288b3196747f703
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0             1
0                           Cash and cash equivalents   $ 2,393,129
1   Cash and securities deposited with clearing or...              
2              in compliance with federal regulations     8,532,447
3     Securities purchased under agreements to resell     5,175,620
4                                 Securities borrow

-----------------------------------------------------
Saved 874362-2004-01-30.csv file to s3 bucket

Performing OCR for 874362-2005-01-31.csv
Started job with id: 9cda8e8dcee90168fc4bc46800edae0b2136162534b7a7a88195939fd58a6b2a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: d6bc2532a1d91abfa2f021391f723388c3930b83ce896ee0f6e7ceb9a6b0884a
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                     

-----------------------------------------------------
Saved 874362-2005-01-31.csv file to s3 bucket

Performing OCR for 874362-2006-01-30.csv
Started job with id: 1a829b3bc257354761bfb93c69a1b314f8c40f84f8fdacf7a4d7d0c07cc70407
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [3]

Started job with id: 4a300d987112945be5db4d7a72a239cc62a14ad297864be733a9e2356215e7ed
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                     

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [2]

Started job with id: b2111de148cbd2308908046585c2ca133783cb1a1fa9a2c7a5439e76e04556d7
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Tex

-----------------------------------------------------
Saved 874362-2007-01-29.csv file to s3 bucket

Performing OCR for 874362-2008-01-31.csv
Started job with id: 05c5e608ced5776bb95c357f2d6258c31b049b1631e6b93894e827d3370e4972
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [4]

Started job with id: 8e19a8e212e219eb508a39a718d008cbc9e1c5d2b6fdacce73da843fce7139df
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                  

Started job with id: 220282d73bd646905f04b9d4af84f6dbe24896d8e3321f71455cd5e20f21dfd5
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [5]

Started job with id: 535ac0b633264f9b8463c1e941a1f49103a03abf38386511aa244e55750caf01
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                        

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                                Cash      $ 488,656
1   Cash and securities segregated under federal a...      1,539,875
2                                 Securities borrowed     76,236,512
3                   Securities received as collateral      7,766,915
4                                        Receivables:               
5                                           Customers     19,005,709
6   Brokers, dealers, clearing organizations and o...      3,639,428
7                              Interest and dividends         58,122
8                                            Goodwill        496,500
9                                        Other assets          2,465
10                                       Total assets  $ 109,234,182
11 

{'J.P. Morgan Securities LLC and Subsidiaries': 99.7786636352539, '(An Indirect subsidiary of JPMorgan Chase & Co.)': 95.0886459350586, 'Consolidated Statement of Financial Condition': 99.91539764404297, '(Dollars in millions)': 99.83312225341797, 'Year ended December 31, 2010': 99.70030975341797, 'Assets': 99.89960479736328, 'Cash': 99.93684387207031, '$': 99.73211669921875, '555': 98.07274627685547, 'Cash and securities segregated under federal and other regulations': 99.87763977050781, '1,583': 95.39354705810547, 'Securities purchased under resale agreements': 99.87109375, '129,951': 99.78358459472656, '(included $423 at fair value at December 31, 2010)': 99.73541259765625, 'Securities borrowed': 99.8533935546875, '78,954': 99.80045318603516, 'Securities received as collateral': 99.88258361816406, '5,379': 99.87892150878906, 'Receivable from brokers, dealers and clearing organizations': 98.81902313232422, '5,712': 99.9258804321289, 'Receivable from customers': 99.85543823242188, '32

-----------------------------------------------------
Saved 874362-2012-02-29.csv file to s3 bucket

Performing OCR for 874362-2013-03-01.csv
Started job with id: a1b224a023e10473dde6f7c1c038056595ea4a08e3703444d5655db4958f9097
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [6]

Started job with id: c61f16a2777bf410ddbe0e0eec9f4bcf5835001f3582fb2d3375b0ef20efe3ab
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                              Assets               
1   

-----------------------------------------------------
Saved 874362-2014-03-05.csv file to s3 bucket

Performing OCR for 874362-2015-02-27.csv
Started job with id: 0c7d0379d1f67c7ed813992058c8ffffca570f0ee61116215a139cfd83f8ea75
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [5]

Started job with id: 4d65327e569b45ae08adf3b427f876f4d61ec15713dcf4b11fb4c734439edd06
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PR

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8
Resultset page recieved: 9
Resultset page recieved: 10

Page number(s) for extraction in PNG are [1]

Started job with id: 260679252ae0878fab5d723eff8abc35da6b3ce5ffb4d7f3a65bfb7ee4056d74
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                                                    0              1
0                                              Assets               
1                                                Cash      $ 349,147
2   Cash and securities segregated under federal a... 

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Performing OCR for 87634-2005-02-28.csv
87634-2005-02-28.csv has been downloaded

Performing OCR for 87634-2006-02-28.csv
87634-2006-02-28.csv has been downloaded

Performing OCR for 87634-2006-09-21.csv
87634-2006-09-21.csv has been downloaded

Performing OCR for 87634-2007-03-01.csv
87634-2007-03-01.csv has been downloaded

Performing OCR for 87634-2008-03-03.csv
87634-2008-03-03.csv has been downloaded

Performing OCR for 87634-2009-02-27.csv
87634-2009-02-27.csv has been downloaded

Performing OCR for 87634-2010-03-01.csv
87634-2010-03-01.csv has been downloaded

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6

Page number(s) for extraction in PNG are [5, 6]

Started job with id: 524a0cd6b2e8fa8300afa5d83d9079225c93e421f8253c4f78908253c679fbf0
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Started job with id: 077fbecd16e2fc82e0f2cea7914aba34e0620e631013073c770a9146efa36e89
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1

Textract-PDF dataframe
                

-----------------------------------------------------
Saved 89562-2002-01-29.csv file to s3 bucket

Performing OCR for 89562-2003-01-29.csv
Started job with id: 20382f817539e7f6ce40f102707ad8d476d1ddf496f26d4bfe8b6e0b7f05eef7
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7

Page number(s) for extraction in PNG are [5, 6]

Started job with id: d37ac3f89453a011aa6fd56349ab3b26ef567a5b97d7d040193c4eece78bb53e
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Started job with id: 6923267d33bc77c06369f

-----------------------------------------------------
Saved 89562-2003-01-29.csv file to s3 bucket

Performing OCR for 89562-2004-01-29.csv
Started job with id: 9ba2ca7e7342b4ef240df57a785f31ec84bff575a3a287a279c6c1b3b0d70dfb
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Page number(s) for extraction in PNG are [5, 6]

Started job with id: 55e8b10bbbe1ca54edd9a5ae0f088b75b193edbedec9a8453cf9d58c014ecf8f
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Started job with id: 6fa4aa482a9f84e44e41fe22ea914627db1ef4360778e5c1267adab43977a274
J

-----------------------------------------------------
Saved 89562-2004-01-29.csv file to s3 bucket

Performing OCR for 89562-2005-01-31.csv
Started job with id: afb562a91a98d6aca0e54efbf380d576fd6dd69fef12ca4064d6f2ef411da2d8
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
Resultset page recieved: 5
Resultset page recieved: 6
Resultset page recieved: 7
Resultset page recieved: 8

Performing OCR for 89562-2006-01-30.csv
Started job with id: 863b3723c9b1b35f83ae5745721094cef5156a3e27319c18a37e87c2293d5f80
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Res

In [18]:
# # single reading for testing purposes and debugging Textract results e.g. 853784-2002-03-01
# textractParse('Input/X-17A-5-PDF-SUBSETS/1146184-2004-03-01-subset.pdf', 
#               'Input/X-17A-5-PNG-SUBSETS/1146184-2004-03-01/', 'ran-s3-systemic-risk')