In [1]:
%%bash
pip install --upgrade pip
pip install smart_open minecart
pip install textract-trp
pip install jupyterthemes

Collecting pip
  Using cached pip-20.2.4-py2.py3-none-any.whl (1.5 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 20.0.2
    Uninstalling pip-20.0.2:
      Successfully uninstalled pip-20.0.2
Successfully installed pip-20.2.4
Collecting smart_open
  Downloading smart_open-3.0.0.tar.gz (113 kB)
Collecting minecart
  Downloading minecart-0.3.0-py3-none-any.whl (23 kB)
Collecting pdfminer3k
  Downloading pdfminer3k-1.3.4-py3-none-any.whl (100 kB)
Building wheels for collected packages: smart-open
  Building wheel for smart-open (setup.py): started
  Building wheel for smart-open (setup.py): finished with status 'done'
  Created wheel for smart-open: filename=smart_open-3.0.0-py3-none-any.whl size=107097 sha256=2d5ae760b8f1843440d6dcdce008e2e297297be8fbf9d557589e7e812426f3a3
  Stored in directory: /home/ec2-user/.cache/pip/wheels/88/2a/d4/f2e9023989d4d4b3574f268657cb6cd23994665a038803f547
Successfully built smart-open
Installing coll

In [2]:
# create a darker background template (better for my eyes)
from jupyterthemes.stylefx import set_nb_theme
set_nb_theme('chesterish')

In [3]:
import time 
import re
import os
import trp
import random 
import boto3
import minecart
import numpy as np
import pandas as pd

from smart_open import open
from sagemaker.session import Session
from io import BytesIO

%matplotlib inline

In [4]:
# initiate s3 bucket and corresponding data folder
# bucket = "ran-s3-systemic-risk"
# data_folder ="Input/X-17A-5/"

bucket = "ran-s3-systemic-risk"
data_folder ="FOCUS-OCR/SubsetTest/"

# balance_sheet_idx = 3  # the page of the report you care about

# Amazon Textract client and Sagemaker session
textract = boto3.client('textract')
s3 = boto3.client('s3')
session = Session()

# discover all of the pdfs that you want to parse
paths = np.array(session.list_s3_files(bucket, data_folder))

# AWS Asynchronous Textract Script (requesting Job)
**Content modified from Amazon AWS Textract repository (refer to [URL](https://github.com/aws-samples/amazon-textract-code-samples/blob/master/python/12-pdf-text.py) below)** 

In [5]:
def startJob(s3BucketName:str, objectName:str) -> str:
    """
    Starts a Textract job on AWS server 
    """
    response = None
    client = boto3.client('textract')
    
    # issue response to AWS to start Textract job for table analysis 
    response = client.start_document_analysis(
        DocumentLocation={
            'S3Object': {
                'Bucket': s3BucketName,
                'Name': objectName
            }
        },
        FeatureTypes=['TABLES']
    )
    
    # return response job ID for service
    return response["JobId"]

In [6]:
def isJobComplete(jobId:str) -> str:
    """
    Tracks the completion status of the Textract job when qued
    """
    time.sleep(5)
    client = boto3.client('textract')
    response = client.get_document_analysis(JobId=jobId)
    status = response["JobStatus"]
    print("Job status: {}".format(status))
    
    # check current status of AWS job (ask server every 5 seconds for data)
    while(status == "IN_PROGRESS"):
        time.sleep(5)                   # lag before reporting status
        response = client.get_document_analysis(JobId=jobId)
        status = response["JobStatus"]
        print("Job status: {}".format(status))
    
    return status

In [7]:
def getJobResults(jobId:str) -> list:
    """
    Returns the contents of the Textract job, after completion status met
    """
    pages = []          # initialize list object to track pages

    client = boto3.client('textract')
    response = client.get_document_analysis(JobId=jobId)
    
    pages.append(response)
    print("Resultset page recieved: {}".format(len(pages)))
    
    nextToken = None
    if('NextToken' in response):
        nextToken = response['NextToken']
    
    # iterate through the pages and append to response figure
    while(nextToken):
        response = client.get_document_analysis(JobId=jobId, NextToken=nextToken)
        pages.append(response)
        print("Resultset page recieved: {}".format(len(pages)))
        nextToken = None
        if('NextToken' in response):
            nextToken = response['NextToken']

    return pages

In [8]:
def runJob(bucket:str, key:str):
    """
    Function designed to call an AWS Textract 
    """
    # S3 storage for files on AWS site   
    jobId = startJob(bucket, key)   # intialize Textract job 
    print("Started job with id: {}".format(jobId))

    # if job is complete from AWS return response object 
    if(isJobComplete(jobId)):
        response = getJobResults(jobId)
        
    return response

# OCR Wrapper Functions
**The scripts perform an OCR job from AWS Textract, converting tabular data into dataframes**

In [9]:
def trp2df(table:trp.Table) -> pd.DataFrame:
    """
    Function designed to convert a trp table into a dataframe
    :param table: a trp table object parsed from a pdf  
    :return: a DataFrame object housing a textracted trp table
    
    Complexity -> O(n^2)
    """
    N = len(table.rows)               # number of rows in table
    M = len(table.rows[0].cells)      # number of columns in table
    arr = [0]*N
    
    # itterate through each row within the provided table
    for row in range(N):
        
        # strip the text from the cell references to construct (N X M) matrix
        arr[row] = [table.rows[row].cells[col].text.strip() for col in range(M)]
        
    return pd.DataFrame(arr)

In [10]:
def readPDF(response:list) -> pd.DataFrame:
    """
    Function to transform AWS Textract object to a pdf
    :param response: AWS Textract response object
    """
    # in the event multiple tables detected on one page (concat them)
    catDF = []
    
    # format the Textract response type 
    doc = trp.Document(response)
    
    # iterate through document pages
    for page in doc.pages:
        # itterate through page tables
        for table in page.tables: 
            # convert trp-table into dataframe object
            df = trp2df(table)
            
            # remove columns that are completly empty (column 0 = line items)
            empty_cols = [col for col in df.columns if (df[col] == '').all()]
            df = df.drop(empty_cols, axis=1)
            
            # make sure dataframe is not empty  
            if df.values.size > 0:
                colIndex = df.columns[0]

                # check for the word "cash" in a string at the begining, ignoring case sensitivity
                assetCheck = df[colIndex].str.contains('^Cash', regex=True, flags=re.IGNORECASE)

                # check for the word "Liabilities" in a string at the end, ignoring case sensitivity
                debtCheck1 = df[colIndex].str.contains('Liabilities$|^Liabilities', 
                                                      regex=True, flags=re.IGNORECASE)
                debtCheck2 = df[colIndex].str.contains('Liability$|^Liability', 
                                                      regex=True, flags=re.IGNORECASE)

                # check if the key words have been found 
                check1 = df[assetCheck | debtCheck1 | debtCheck2].empty
                check2 = debtCheck1[debtCheck1 == True].empty
                check3 = debtCheck2[debtCheck2 == True].empty

                # if figure matched exactly, we assume this is balance sheet 
                if not check1:
                    catDF.append(df)

                    if check2 == False or check3 == False:  
                        return pd.concat(catDF)
                

## Extract Balance Sheet information

In [11]:
subsetFolder = np.array(session.list_s3_files(bucket, 'Input/SubSets/'))[1:]

In [12]:
subsetFolder

array(['Input/SubSets/1000146-02_subset.pdf',
       'Input/SubSets/1000146-03_subset.pdf',
       'Input/SubSets/1000146-04_subset.pdf', ...,
       'Input/SubSets/99947-17_subset.pdf',
       'Input/SubSets/99947-18_subset.pdf',
       'Input/SubSets/99947-19_subset.pdf'], dtype='<U35')

In [13]:
print('Total files needed to be converted {}'.format(subsetFolder.size))

Total files needed to be converted 83129


In [None]:
# script to perform OCR (using Textract) for X-17A-5 subsets
outFolder = 'Output/BalanceSheet/'
csvDirectory = np.array(session.list_s3_files(bucket, outFolder))
errorLog = []

# iterate through X-17A-5 subsets stored in s3 
for i, key in enumerate(subsetFolder[9800:20000]):
    # baseFile name (CIK)-{Year}
    baseFile = key.split('/')[-1].split('_')[0]
    fileName = baseFile+'.csv'
    print('\nFile {}'.format(i))
    # if file is not found in directory 
    if outFolder+fileName not in csvDirectory:

        # temporary data frame object for balance sheet information
        res = runJob("ran-s3-systemic-risk", key)
        
        if res[0]['JobStatus'] != 'FAILED':
            tempDF = readPDF(res)
            print(tempDF)
            # checks for type of return 
            if type(tempDF) == pd.DataFrame:
                # writing data frame to .csv file
                tempDF.to_csv(fileName)

                # the folder used to export to
                xFolder = 'Output/BalanceSheet/'+fileName

                # save contents to AWS S3 bucket
                with open(fileName, 'rb') as data:
                    s3.put_object(Bucket=bucket, Key=xFolder, Body=data)

                # remove local file after it has been created
                os.remove(fileName)
                print('\tSaved {} file to s3 bucket'.format(baseFile+'.csv'))
            else:
                print('No Balance Sheet found in {}'.format(baseFile))
                errorLog.append(baseFile)
        else:
            print('{} could not be parsed'.format(baseFile))
            errorLog.append(baseFile)
    else:
        print('{} has been downloaded'.format(fileName))

print('==========================\nOCR is completed')


File 0
1072353-12.csv has been downloaded

File 1
1072353-13.csv has been downloaded

File 2
1072353-14.csv has been downloaded

File 3
1072353-15.csv has been downloaded

File 4
1072353-16.csv has been downloaded

File 5
1072353-17.csv has been downloaded

File 6
1072353-18.csv has been downloaded

File 7
107238-02.csv has been downloaded

File 8
107238-03.csv has been downloaded

File 9
107238-04.csv has been downloaded

File 10
107238-05.csv has been downloaded

File 11
107238-06.csv has been downloaded

File 12
107238-07.csv has been downloaded

File 13
Started job with id: 92e99e7efd9eec29fdf93523adb8d2703ae4a303c06bd1d5e945d29276bd2835
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
None
No Balance Sheet found in 107238-08

File 14
Started job with id: bcd04e7eee2720411ba8e6979c9e0bf4182ae

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
None
No Balance Sheet found in 1072624-13

File 114
1072624-14.csv has been downloaded

File 115
1072624-15.csv has been downloaded

File 116
1072624-16.csv has been downloaded

File 117
Started job with id: 33ea84fb10e30601a318542be066891df8690d00aed4bfff30d173a2bde5f075
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
None
No Balance Sheet found in 1072624-17

File 118
Started job with id: d48aac992997fd499d29c1bb979984ecdc8f05ae6c9055911494a75f339f6c54
Job s

Started job with id: 860fcb101ccbd66f4ec0735bbc8d71bd68cadddb8b80c13e1bde725cad694491
Job status: FAILED
Resultset page recieved: 1
1073993-03 could not be parsed

File 275
1073993-04.csv has been downloaded

File 276
1073993-05.csv has been downloaded

File 277
1073993-06.csv has been downloaded

File 278
1073993-07.csv has been downloaded

File 279
1073993-08.csv has been downloaded

File 280
1073993-09.csv has been downloaded

File 281
1073995-02.csv has been downloaded

File 282
1073996-02.csv has been downloaded

File 283
1073996-03.csv has been downloaded

File 284
1073996-04.csv has been downloaded

File 285
1073998-02.csv has been downloaded

File 286
1073998-03.csv has been downloaded

File 287
1073998-04.csv has been downloaded

File 288
1074060-02.csv has been downloaded

File 289
1074060-03.csv has been downloaded

File 290
1074060-04.csv has been downloaded

File 291
1074060-05.csv has been downloaded

File 292
1074060-06.csv has been downloaded

File 293
1074060-07.csv ha

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
None
No Balance Sheet found in 1075679-02

File 410
Started job with id: 5ea05d3246ea793d55e1e1fcb6c84d924e3672576aa6ba31f6ea0c295b46af5c
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
None
No Balance Sheet found in 1075679-03

File 411
1075679-04.csv has been downloaded

File 412
1075679-05.csv has been downloaded

File 413
1075679-06.csv has been downloaded

File 414
Started job with id: d3cc81979dd5f2684d2878115956c563031a3d9e2ca9bccb55137f792411fe79
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Jo

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
None
No Balance Sheet found in 1075922-09

File 489
1075922-10.csv has been downloaded

File 490
1075922-11.csv has been downloaded

File 491
1075922-12.csv has been downloaded

File 492
1075923-02.csv has been downloaded

File 493
1075923-03.csv has been downloaded

File 494
1075923-04.csv has been downloaded

File 495
1075923-05.csv has been downloaded

File 496
1075923-06.csv has been downloaded

File 497
1075923-07.csv has been downloaded

File 498
1075923-08.csv has been downloaded

File 499
1075923-09.csv has been downloaded

File 500
1075924-02.csv has been downloaded

Fi


File 658
1077125-03.csv has been downloaded

File 659
1077125-04.csv has been downloaded

File 660
1077127-02.csv has been downloaded

File 661
1077127-03.csv has been downloaded

File 662
1077127-04.csv has been downloaded

File 663
1077127-05.csv has been downloaded

File 664
1077127-06.csv has been downloaded

File 665
1077127-07.csv has been downloaded

File 666
1077127-08.csv has been downloaded

File 667
1077127-09.csv has been downloaded

File 668
1077127-10.csv has been downloaded

File 669
1077127-11.csv has been downloaded

File 670
1077127-12.csv has been downloaded

File 671
1077127-13.csv has been downloaded

File 672
1077127-14.csv has been downloaded

File 673
1077127-15.csv has been downloaded

File 674
1077127-16.csv has been downloaded

File 675
1077127-17.csv has been downloaded

File 676
1077128-02.csv has been downloaded

File 677
1077128-03.csv has been downloaded

File 678
1077128-04.csv has been downloaded

File 679
1077128-05.csv has been downloaded

File 680


Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
None
No Balance Sheet found in 1077687-04

File 776
1077687-05.csv has been downloaded

File 777
1077687-06.csv has been downloaded

File 778
1077687-07.csv has been downloaded

File 779
1077687-08.csv has been downloaded

File 780
1077687-09.csv has been downloaded

File 781
1077687-10.csv has been downloaded

File 782
1077687-11.csv has been downloaded

File 783
1077687-12.csv has been downloaded

File 784
1077687-13.csv has been downloaded

File 785
1077687-14.csv has been downloaded

File 786
1077687-15.csv has been downloaded

File 787
1077687-16.csv has been downloaded

File 788
1077687-17.csv has been downloaded

File 789
1077687-18.csv has been downloaded

File 790
1077687-19.csv has been downloaded

File 791
1078015-02.csv has been downloaded

File 792
1078015-03.csv has been downloaded

File 793
1078015-04.csv has been downloaded

File 794
1078015-05.csv has been downloaded

File 795
1078015-06.csv has b

Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
                                                  0          1
0                         Cash and Cash Equivalents    $ 2,509
1                          Receivables From Brokers    195,118
2                                 Securities Owned:           
3                      Marketable - at market value     41,804
4  Not Readily Marketable - At Estimated Fair Value     25,709
5         Prepaid Expenses and Other Current Assets        250
6                                                             
7                                                      265,390
8                             Other Assets De

	Saved 1079506-07.csv file to s3 bucket

File 933
Started job with id: 0713bf30a5b572583907bbcea814aab806f3f0c7b97c1410b00d160b64f0b2cd
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
                                                    3          4
0                                                Cash   $ 87,516
1                      Receivables from broker-dealer    487,820
2   Furniture and equipment (net of accumulated de...     86,887
3                                        Other assets     91,887
4                                                      $ 754,110
5                     LIABILITIES AND MEMBERS' EQUITY           
6                                         Liabilities           
7               Accounts payable and accrued expenses  $ 285,175
8                            

Started job with id: a668266153ed4532b3e48402da4bcb42d97c94f73eece08a03b2384b2ef3fbb1
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
                                                    0           1
0                                                Cash    $ 27,560
1                              Commissions receivable  11,442 502
2                           Other accounts receivable         845
3                                    Prepaid expenses            
4                                                        $ 40,349
5                Liabilities and Stockholder's Equity            
6                                         Liabilities            
7                                 Commissions payable     $ 6,632
8                                Income taxes payable         731
9                                   Total liabilities       7,363
10                               Stockholder's Equity            
1

Started job with id: ce8335536cf625c98b8439f8f471f7d7b51796a33f9d7803fe2ef8d9e02d95dd
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
                                                    0         1         2
0                                                          2012      2011
1                                              ASSETS                    
2                           Cash and cash equivalents  $ 12,412  $ 23,995
3                              Commissions receivable     8,507     7,743
4                             Refundable income taxes         -         -
5                              

	Saved 1079773-17.csv file to s3 bucket

File 950
Started job with id: f8cee0b96f7a6d99515832f29d29af1a3e09914f4258c313f22ab992f21e0985
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
                                                    0         1
0                                      CURRENT ASSETS          
1                           Cash and cash equivalents  $ 25,505
2                              Commissions receivable     5,688
3                                    Prepaid expenses       390
4                                                        31,583
5                 LIABILITIES AND STOCKHOLDERS EQUITY          
6                           

	Saved 1080325-04.csv file to s3 bucket

File 954
Started job with id: 44439e8d80260b81916d0c962ca6e8e73b9d970d5162b0c50de09a4c07183111
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
                                                    0          1
0                           Cash and cash equivalents  $ 263,623
1              Marketable securities, at market value    259,822
2   Accounts receivable, less allowance for doubtf...     89,354
3                                   Other receivables     10,183
4                 Receivable from officer/shareholder      3,500
5                                    Prepaid expenses     42,647
6                             Income taxes receivable     12,599
7                                      Deferred taxes      1,300
8                                Total current assets    683,028
9                    Property and equipmen

Started job with id: bd71818686cacfaf932580da787cb2c033762f7b0d1f9dfeb3fe55ed2aae22e5
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
                                                    0          1
0                                                               
1                           Cash and cash equivalents  $ 535,179
2                                     Due from broker    257,534
3   Equipment and organization expense - net of ac...           
4                                          of $12,926          -
5                                        Other assets      3,333
6                                        Total assets  $ 796,046
7                LIABILITIES AND STOCKHOLDERS' EQUITY           
8                                        Liabilities:           
9               A

Started job with id: bff232cf3fdde6d4c7cc248a04ff53b3789876a14d02746119a12be1ddff7ebc
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
                                                    0          1
0                                              ASSETS           
1                           Cash and cash equivalents      $ 112
2                              Commissions receivable     13,605
3                                 Deposit from 

	Saved 1080570-15.csv file to s3 bucket

File 969
Started job with id: 877ba3bf08036bddd84e0259581cbd2df9ad684ea7f282f197456b8033fdd949
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
                                                    0          1
0                           Cash and cash equivalents   $ 31,741
1                                     Due from broker    350,000
2   Office equipment, net of accumulated depreciat...          -
3                                        Other assets      1,549
4                                        Total assets  $ 383,290
5                                                               
6                LIABILITIES AND STOCKHOLDERS' EQUITY           
7                                        Liabilities:           
8               Accounts payable and accrued expenses   $ 23,457
9                                   Total 

Started job with id: 2bed08e2bbe30580d0079a1ea7091558abfec6e10685290a4f9d449942fa3a5d
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
                                                    0             1
0                                                Cash   $ 1,905,099
1   Receivable from clearing brokers, including cl...       537,655
2                Receivable from NeoNet Securities AB     4,736,225
3                      Receivable from broker-dealers     3,069,050
4                         Property and equipment, net        55,762
5                                        Other assets       156,274
6                                        Total assets  $ 10,460,065
7                Liabilities and Stockholder's Equity 

Started job with id: 5f45579904f79ceba4a9c95e91bb497593ad39b9c4264bba7133ced987edc626
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
Resultset page recieved: 4
                                                    0             1
0                                              Assets              
1                                                Cash   $ 1,060,025
2                     Securities owned, at fair value     5,232,849
3   Receivable from clearing brokers, including cl...       345,958
4           Receivable from NeoNet Securities AB, net     5,420,619
5                      Receivable from broker-dealers     2,308,

	Saved 1080572-12.csv file to s3 bucket

File 982
Started job with id: 1c10cbaf7cf4afed93dae64a532ddaa2cf04bdedc8a54da4a19f4ca551d3e1f2
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
                            0         1
0        CURRENT LIABILITIES:          
1         ACCRUED LIABILITIES     $ 135
2  DUE TO OWNER (Notes 1 & 2)     1,138
3                                 1,273
4       STOCKHOLDER'S EQUITY:          
5  ADDITIONAL PAID-IN CAPITAL    34,002
6       DEFICIT (Notes 1 & 2)  (19,010)
7  TOTAL STOCKHOLDER'S EQUITY    14,992
8                              $ 16,265
	Saved 1080574-02.csv file to s3 bucket

File 983
Started job with id: 1a9b13b1df79470e1e5c534ab710155527ce61c684a68d224e127aeda7c51ae4
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: 

Started job with id: cae78ed326bf6c6e5b83cbeb8f07c23cc0f0301f11e4ba6a1ce23397e8953a06
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: IN_PROGRESS
Job status: SUCCEEDED
Resultset page recieved: 1
Resultset page recieved: 2
Resultset page recieved: 3
                                        0          1
0                                  Assets           
1                                    Cash   $ 31,521
2                         Due from parent    162,819
3                        Prepaid expenses      6,313
4                            Total assets  $ 200,653
5         Liabilities and member's equity           
6                             Liabilities           
7                        Accrued expenses    $18,287
8                    Accrued income taxes      6,500
9                       Total liabilities     24,787
10                        Member's equity    175,866
11  Total liabilities and membe

In [None]:
np.array(errorLog)