In [1]:
%pip install fuzzywuzzy
%pip install python-Levenshtein

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0
Note: you may need to restart the kernel to use updated packages.
Collecting python-Levenshtein
  Downloading python-Levenshtein-0.12.2.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 3.2 MB/s eta 0:00:011
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25ldone
[?25h  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.2-cp36-cp36m-linux_x86_64.whl size=155932 sha256=af338f00ba765a62f9b6e87c8e7bb7aa0d37491075d6215ecc927f42ef62d643
  Stored in directory: /home/ec2-user/.cache/pip/wheels/4a/a4/bf/d761b0899395c75fa76d003d607b3869ee47f5035b8afc30a2
Successfully built python-Levenshtein
Installing collected packages: python-Levenshtein
Successfully installed python-Levenshtein-0.12.2
Note: you may need to restart the kernel 

In [2]:
import os
import re
import botocore
import boto3
import json
import time
import requests
import datetime

import pandas as pd
import numpy as np

from sagemaker.session import Session
from difflib import SequenceMatcher
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz

## Accounting term matching
**Check to see if we report totals (e.g. Total Liability & Shareholder Equity) or sub-totals (e.g. Total Financial Instruments), these figure are not needed for construction of the unstructured database (avoid classification issue)**

In [3]:
# numpy exception for handling invalid log10 RunTime error (we opt to not show)
# switch 'ignore' to 'warn', if you would like to flag the RunTime error 
np.seterr(invalid = 'ignore') 

def multiple_check(x1:float, x2:float):
    """
    Determine whether the two values are the same number scaled by 10
    """
    # prevent zero division error since x1 is the denominator and log10 zero division error
    if (x1 == 0) or (x2 == 0): return (x1, False)
    else:
        # if our backward sum is a multiple of 10, we return True 
        # (e.g. Total Assets (x1) 745.2322 vs Backward Sum (x2) 7452322)
        check1 = np.log10(x2 / x1).is_integer()

        # if our backward sum is a substring of a line item, with a difference of one in length, we return True 
        # (e.g. Total Assets (x1) 174182935 vs Backward Sum (x2) 74182935)
        check2 = (str(x2) in str(x1) ) & (len(str(x2)) == len(str(x1)) - 1)

        if check1 or check2:
            return (x2, True)
        else: 
            return (x1, False)

def epsilon_error(x1:float, x2:float, tol:float=0.01):
    """
    Determine whether the two values are within a similar epsilon bound. We default our error tolerance
    to 0.01, implying that if two numbers are within a specified toleracnce (default 1% ) of one another 
    """
    if (x1 == 0) or (x2 == 0): return False
    else:
        # first we convert the numeric quantities into strings
        current = str(x1)
        lookback = str(x2)

        # we only want to check against the relative difference if one element in the number is read wrong
        if len(current) == len(lookback):
            
            # we iterate linearly through each string and check to see the positional match 
            # if we catch a mismatch we flag it with a 1, othewise skip with 0
            changes = [0 if (current[i] == lookback[i]) else 1 for i in range(len(current))]

            # check set differences produce a set with exactly 1 in length
            if sum(changes) == 1:

                diff = abs(x1 - x2)      # compute numeric differences

                # check to see whether an accounting condition was met wihtin a boundary condition
                if abs(diff / x1) <= tol:
                    return True

        return False

In [4]:
def totals_check(df:pd.DataFrame) -> tuple:
    """
    Checks to see if a line row meets the conditon of a total, if true we remove these rows as we make 
    have checked the terms before have meet our conditions (these include major and minor totals)
    ------------------------------------------------------------------------------------
    :param: (type pandas.DataFrame)
        A DataFrame that represents the Asset or Liability & Equity portion of the balance sheet
        
    :return: (type tuple)
        Return a cleaned DataFrame that strips the rows that represent totals
    """
    m, n = df.shape                  # unpack the shape of dataframe
    data_col = df.columns[1]         # the values column for balance sheet
    
    total_flag = 2       # default 2 (no measure found), 1 (sum is correct), 0 (sum is not correct)
    total_amt = np.nan
    
    # iterate through each of the line items
    for i in range(m):
        
        # check the value of line items at a given index (forward index)
        item1 = df.loc[i].values[1]
        name = df.loc[i].values[0]
        
        # ------------------------------------------------------------------
        # Perform regex search to determine "special" total rows
        # ------------------------------------------------------------------
        a_check = re.search('total assets', name, flags=re.I)
        le_check = re.search('(?=.*(liability|liabilities))(?=.*(equity|deficit|capital))', 
                             name, flags=re.I)
        # ------------------------------------------------------------------
        
        # if we find either total measure we re-write indicators
        if a_check is not None or le_check is not None:
            total_flag = 0; total_amt = item1;
        
        # compute backward sum (lookback index) 
        for j in range(i):
            
            # check whether dataframe empty (if so we skip to avoid fitting errors)
            # NOTE: Index position (i-1)   = line above current line
            #                      (i-j-1) = trailing look up line 'j' lines above the line above current line
            lookback = df.loc[i-j-1:i-1][data_col]
            
            # we check whether the lookback period is empty (if so we most likely deleted the row)
            if not lookback.empty:
                # backward sum for line items (index minus j-periods before)
                item2 = lookback.sum()

                # if we achieve this then we strip totals and break, no need to continue backward sum
                check1 = item1 == item2
                val, check2 = multiple_check(item1, item2)
                check3 = epsilon_error(item1, item2, tol=0.01)
                
                if check1 or check2 or check3:
                    df = df.drop(index=i)
                    
                    # if we drop the "Total" line-item then we re-assign flag to 1
                    if a_check is not None or le_check is not None:
                        total_flag = 1
                        total_amt = val
                    
                    # Error Handling for row deletions (uncomment for when not in use)
                    print('\tWe dropped row {}, {}, with lookback window of {}.'.format(i, name, j+1))
                    print('\t\tOur row is valued at {}, our lookback sum is {}'.format(item1, item2))
                    
                    # we break from inner loop to avoid key error flag 
                    break     
                
    return (df, total_flag, total_amt)

## Merging PDFs and PNGs 
**Functions to combine PDFs and PNGs where rows may be omitted**

In [36]:
def special_merge(df1:pd.DataFrame, df2:pd.DataFrame, col:str) -> pd.DataFrame:
    """
    Special type of merge for dataframes, combining all unique row items for a specified column. 
    This is designed to combine PDF and PNG balance sheets that differ in one or more rows.
    ------------------------------------------------------------------------------------
    :param: (type pandas.DataFrame)
        A DataFrame that represents either the PDF or PNG retreived from Balance Sheet
    :param: (type pandas.DataFrame)
        A DataFrame that represents either the PDF or PNG retreived from Balance Sheet
    :param: (type str)
        A shared column name that exists in both pandas.DataFrames (i.e. df1, df2)
        
    :return: (type pandas.DataFrame)
        Return a cleaned DataFrame that merges any row that was omitted, changed or missing
    """
    arr1 = df1[col].values
    arr2 = df2[col].values
    concat_pdf = []
    
    # find the sequences that match between either lineitems
    sm = SequenceMatcher(a=arr1, b=arr2)
    
    # The SequenceMathcer returns a 5-tupled for each correspond "obj"
    # 'replace'     a[i1:i2] should be replaced by b[j1:j2].
    # 'delete'      a[i1:i2] should be deleted. Note that j1 == j2 in this case.
    # 'insert'      b[j1:j2] should be inserted at a[i1:i1]. Note that i1 == i2 in this case.
    # 'equal'       a[i1:i2] == b[j1:j2] (the sub-sequences are equal)
    for (obj, i1, i2, j1, j2) in sm.get_opcodes():
        
        # implies that we want to "replace" the left side elment with the corresponding
        # right side element at the same index position (we perseve both)
        if obj == 'replace':
            
            # check the value of a fuzzy match, only insert both rows if they vastly different
            left_names = arr1[i1:i2]
            right_names = arr2[j1:j2]
            
            # iterate through each of the checks
            for it, (left, right) in enumerate(zip(left_names, right_names)):
                
                # compute the fuzz match between string (how close are these values)
                score = fuzz.partial_ratio(left.lower(), right.lower())

                # if not close in match then we append both values
                if score < 90:
                    concat_pdf.append(df1.iloc[i1:i1+it+1])
                    concat_pdf.append(df2.iloc[j1:j1+it+1])
                else:
                    print(df1.iloc[i1:i1+it+1])
                    concat_pdf.append(df1.iloc[i1:i1+it+1])
        
        # implies that we want to "delete" the left side element (we preserve this side)
        elif obj == 'delete':
            concat_pdf.append(df1.iloc[i1:i2])
        
        # implied that we want to "insert" the right side element (we preserve this side)
        elif obj == 'insert':
            concat_pdf.append(df2.iloc[j1:j2])
            
        elif obj == 'equal':
            concat_pdf.append(df1.iloc[i1:i2])
    
    # return concated pandas.DataFrame and reset index, removing old index
    return pd.concat(concat_pdf).reset_index(drop=True)
    

## Unstructured Database construction
**We develop our unstructured database from each of the non-total rows (concating the line items)**

In [6]:
def unstructured_data(df, filing_d, fiscal_y, cik, cik2name:dict) -> pd.DataFrame:
    """
    Forms unstructured row for larger database to be stored in s3 bucket
    ------------------------------------------------------------------------------------------
    Input:
        :param: df (type pandas.DataFrame)
            The balance sheet for a particular  
        :paran: filing_d (type str)
            The filing date for release of X-17A-5 filings for a broker dealer e.g. 2013-03-21
        :paran: fiscal_y (type str)
            The fiscal year for the balance sheet to cover e.g. 2012 (usually 1-year prior to filing date)
        :paran: cik (type str)
            The CIK number for a broker dealer e.g. 887767
        :paran: cik2name (type dict)
            A dictionary that maps CIK to Broker Deale names 
    Output:  
        :return: (type pandas.DataFrame)
             Return a transposed dataframe with additional columns corresponding to filing data
    """
    
    # intialize the first column (line items)
    first_column = df.columns[0]
    
    # clean dataframe should be of size greater than 1
    if len(df.columns) > 1:
        
        # transpose split balance sheet figure (our line items are now columns for DataFrame)
        # we first groupby the first column (this become index) and sum to group congruent names
        row = df.groupby(first_column).sum(min_count=1).T
        
        # creating additional columns in row
        row['CIK'] = cik                                  # CIK number for firm 
        row['Filing Date'] = filing_d                     # Filing Date for firm filing
        row['Filing Year'] = fiscal_y                     # Year for balance sheet filing
        row['Name'] = cik2name['broker-dealers'][cik]     # returns the name of associated with the CIK
        
        return row
    
    else:
        print('{}-{}.csv - encountered issue reading PDF'.format(cik, filing_d))
        return None
    

In [7]:
def extra_cols(csv_name:str):
    """
    Check to see whether their exists the presence of a total term from the line items
    ------------------------------------------------------------------------------------------
    Input:
        :param: csv_name (type str)
            The file directory on the s3 where data is stored (e.g. )
    Output:  
        :return: (type tuple)
            Returns a tuple for corresponding (file_name, filing_date, fiscal_year, cik)
    """
    
    file_name = csv_name.split('/')[-1]        # e.g. '1224385-2005-03-01.csv'
    csv_strip = file_name[:-4]                 # ignore last four elements from the back (i.e. .csv)

    # construct a string measure of important data measures 
    data_split = csv_strip.split('-')              
    filing_date = '-'.join(data_split[1:])         # join YYYY-mm-dd component for filing date
    fiscal_year = int(data_split[1]) - 1           # fiscal year are generally the previous year of filing
    cik = data_split[0]                            # extract the CIK number  
    
    return (file_name, filing_date, fiscal_year, cik)   

In [8]:
def reorder_columns(df:pd.DataFrame, col_preserve:list) -> pd.DataFrame:
    """
    Re-order the completed DataFrame by ordering the CIK, Name, Filing Data and Filing Year 
    ------------------------------------------------------------------------------------------
    Input:
        :param: df (type pandas.DataFrame)
            The unstructured database for balance sheet figures
    Output:  
        :return: (type pandas.DataFrame)
            Return a dataframe with dimensions less than or equal to input dataframe (MxN) -> (MxK), 
            where K <= N
    """
    # re-order the CIK and Year columns to appear as the first two columns
    remap = df.columns[~np.isin(df.columns, col_preserve)]                             
    df = df[np.insert(remap,                                       # pass all other columns, not in preserve list
                      np.zeros(len(col_preserve), dtype=int),      # map the location to the first index (i.e. 0)
                      col_preserve)]                               # insert columns we wished to preserve 

    filterNaN = df.isnull().all()                      # find if any column is all NaN 
    cleanCols = filterNaN[filterNaN == False].index    # select columns with at least one value

    # clean dataframe for unstructured asset terms
    return df[cleanCols]

## Final Main Execution

In [9]:
if __name__ == "__main__":
    
    # initiate s3 bucket and corresponding data folder
    bucket = "ran-s3-systemic-risk"
    
    pdf_asset_folder = "Output/X-17A-5-SPLIT-PDFS/Assets/"
    pdf_liable_folder = "Output/X-17A-5-SPLIT-PDFS/Liability & Equity/"
    
    png_asset_folder = "Output/X-17A-5-SPLIT-PNGS/Assets/"
    png_liable_folder = "Output/X-17A-5-SPLIT-PNGS/Liability & Equity/"
    
    out_folder = "Output/"

    # Amazon Textract client and Sagemaker session
    s3 = boto3.client('s3')
    session = Session()
    
    # ==============================================================================
    # ALL TEMPORARY FILE INFORMATION 
    # ==============================================================================
    # retrieving CIK-Dealers JSON file from s3 bucket
    s3.download_file(bucket, 'Temp/CIKandDealers.json', 'temp.json')
    with open('temp.json', 'r') as f: cik2brokers = json.loads(f.read())

    # remove local file after it has been created (variable is stored in memory)
    os.remove('temp.json')
    # ==============================================================================
    
    # s3 paths where asset and liability paths are stored
    asset_paths = session.list_s3_files(bucket, pdf_asset_folder)
    liable_paths = session.list_s3_files(bucket, pdf_liable_folder)
    
    # intialize list to store dataframes for asset and liability & equity
    asset_concat = [0] * len(asset_paths)
    liable_concat = [0] * len(liable_paths)
    
    # --------------------------------------------
    # Asset Unstructured Database
    # --------------------------------------------
    print('Assets Unstructured Database')
    for idx, csv in enumerate(asset_paths):
        
        # decompose csv name into corresponding terms
        fileName, filing_date, fiscal_year, cik = extra_cols(csv)
        
        # first load in both the PNG and PDF split balance sheets
        # NOTE: All these balance sheets are cleaned numerical values
        try:
            s3.download_file(bucket, csv, 'temp.csv')
            pdf_df = pd.read_csv('temp.csv')
            s3.download_file(bucket, png_asset_folder + fileName, 'temp.csv')
            png_df = pd.read_csv('temp.csv')
            os.remove('temp.csv')

            print('Working on {}-{}'.format(cik, filing_date))

            # do a special merge that combines unique line items names between PDF & PNG
            temp_df1 = special_merge(pdf_df, png_df, '0')

            # run accounting check to remove sub-totals for each respective line-item
            df, total_flag, total_amt = totals_check(temp_df1)
            
            # construct row for the unstructured data frame 
            export_df = unstructured_data(df, filing_date, fiscal_year, cik, cik2brokers)
            
            # we have that no "total asset" figure was found
            if total_flag == 2:
                export_df["Total asset check"] = "Total asset not found"
                export_df["Total asset"] = total_amt

            # we have that "total asset" was found and matches
            elif total_flag == 1:
                export_df["Total asset check"] = "Total asset found & match"
                export_df["Total asset"] = total_amt

            # we have that "total asset" was found, but did not match correctly
            elif total_flag == 0:
                export_df["Total asset check"] = "Total asset found & no match"

            # stores the reported data frame 
            asset_concat[idx] = export_df
        
        # in the event we can't download file from s3 (i.e. does not exist, we ignore the )
        except botocore.exceptions.ClientError:
            
            # assign an empty DataFrame and print out error
            asset_concat[idx] = pd.DataFrame()
            
            print('\nCLIENT-ERROR: WE COULD NOT DOWNLOAD SPLIT DATA FOR {}\n'.format(fileName))
     
    print('\n\n\n\n')
        
    # --------------------------------------------
    # Liability & Equity Unstructured Database
    # --------------------------------------------
    print('\nLiability & Equity Unstructured Database')
    for idx, csv in enumerate(liable_paths):
        
        # decompose csv name into corresponding terms
        fileName, filing_date, fiscal_year, cik = extra_cols(csv)
        
        try:
            # first load in both the PNG and PDF split balance sheets
            # NOTE: All these balance sheets are cleaned numerical values
            s3.download_file(bucket, csv, 'temp.csv')
            pdf_df = pd.read_csv('temp.csv')
            s3.download_file(bucket, png_liable_folder + fileName, 'temp.csv')
            png_df = pd.read_csv('temp.csv')
            os.remove('temp.csv')

            print('Working on {}-{}'.format(cik, filing_date))

            # do a special merge that combines unique line items names between PDF & PNG
            temp_df1 = special_merge(pdf_df, png_df, '0')

            # run accounting check to remove sub-totals for each respective line-item
            df, total_flag, total_amt = totals_check(temp_df1)
            
            # construct row for the unstructured data frame 
            export_df = unstructured_data(df, filing_date, fiscal_year, cik, cik2brokers)
            
            # we have that no "total asset" figure was found
            if total_flag == 2:
                export_df["Total liabilities & shareholder's equity check"] = "Total liabilities & shareholder's equity not found"
                export_df["Total liabilities & shareholder's equity"] = total_amt

            # we have that "total asset" was found and matches
            elif total_flag == 1:
                export_df["Total liabilities & shareholder's equity check"] = "Total liabilities & shareholder's equity found & match"
                export_df["Total liabilities & shareholder's equity"] = total_amt
        
            # we have that "total asset" was found, but did not match correctly
            elif total_flag == 0:
                export_df["Total liabilities & shareholder's equity check"] = "Total liabilities & shareholder's equity found & no match"
                
            # stores the reported data frame 
            liable_concat[idx] = export_df
        
        # in the event we can't download file from s3 (i.e. does not exist, we ignore the )
        except botocore.exceptions.ClientError:
            
            # assign an empty DataFrame and print out error
            liable_concat[idx] = pd.DataFrame()
            
            print('\nCLIENT-ERROR: WE COULD NOT DOWNLOAD SPLIT DATA FOR {}\n'.format(fileName))
    
    # --------------------------------------------
    # Database exportation
    # --------------------------------------------
    
    # writing data frame to .csv file
    asset_df = pd.concat(asset_concat)        # asset dataframe combining all rows from 
    asset_df = reorder_columns(asset_df,      # re-order columns for dataframe
                               col_preserve=['CIK', 'Name', 'Filing Date', 'Filing Year', 
                                             'Total asset check'])      
    
    filename1 = 'unstructured_assets.csv'
    asset_df.to_csv(filename1, index=False)
    with open(filename1, 'rb') as data:
        s3.put_object(Bucket=bucket, Key=out_folder + filename1, Body=data)
    
    
    # writing data frame to .csv file
    liable_df = pd.concat(liable_concat)     
    liable_df = reorder_columns(liable_df, 
                                col_preserve=['CIK', 'Name', 'Filing Date', 'Filing Year', 
                                              "Total liabilities & shareholder's equity check"])    
    
    filename2 = 'unstructured_liable.csv'
    liable_df.to_csv(filename2, index=False)
    with open(filename2, 'rb') as data:
        s3.put_object(Bucket=bucket, Key=out_folder + filename2, Body=data)
    
    # remove local file after it has been created
    os.remove(filename1)
    os.remove(filename2)
    
    print('\nWe created an unstructured asset and liability & equity')

Assets Unstructured Database
Working on 1146184-2004-03-01
	We dropped row 8, Total assets, with lookback window of 8.
		Our row is valued at 1987636228.0, our lookback sum is 1987636228.0
Working on 1146184-2006-03-01
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 5769615468.0, our lookback sum is 5769615468.0
Working on 1146184-2007-02-26
	We dropped row 8, Total assets, with lookback window of 8.
		Our row is valued at 5691263000.0, our lookback sum is 5691263000.0
Working on 1146184-2008-02-29
	We dropped row 8, Total assets, with lookback window of 8.
		Our row is valued at 6762481000.0, our lookback sum is 6762481000.0
Working on 1146184-2009-03-02
	We dropped row 10, Total assets, with lookback window of 10.
		Our row is valued at 12991558000.0, our lookback sum is 12991558000.0

CLIENT-ERROR: WE COULD NOT DOWNLOAD SPLIT DATA FOR 1146184-2010-02-25.csv


CLIENT-ERROR: WE COULD NOT DOWNLOAD SPLIT DATA FOR 1146184-2011-02-25.csv

Working on 11

	We dropped row 16, Total assets, with lookback window of 16.
		Our row is valued at 31397420000.0, our lookback sum is 31397420000.0
Working on 1261467-2008-02-29
	We dropped row 9, Total securities owned, at fair value, with lookback window of 2.
		Our row is valued at 8289560000.0, our lookback sum is 8289560000.0
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 26950162000.0, our lookback sum is 26950162000.0
Working on 1261467-2009-03-02
	We dropped row 9, Total securities owned, at fair value, with lookback window of 2.
		Our row is valued at 2950385000.0, our lookback sum is 2950385000.0
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 11333700000.0, our lookback sum is 11333700000.0
Working on 1261467-2010-03-01
	We dropped row 9, Total securities owned, at fair value, with lookback window of 2.
		Our row is valued at 6662217000.0, our lookback sum is 6662217000.0
	We dropped row 11, Total assets, with look

Working on 230611-2021-03-01
	We dropped row 9, Total financial instruments, at fair value, with lookback window of 2.
		Our row is valued at 9424271000.0, our lookback sum is 9424271000.0
Working on 26617-2002-05-30
	We dropped row 10, Total assets, with lookback window of 10.
		Our row is valued at 12964589.0, our lookback sum is 12964589.0
Working on 26617-2003-06-02
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 14870817000.0, our lookback sum is 14870817000.0
Working on 26617-2004-05-27
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 17911077000.0, our lookback sum is 17911077000.0
Working on 26617-2005-05-26
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 19437929000.0, our lookback sum is 19437929000.0
Working on 26617-2006-05-26
	We dropped row 10, Total assets, with lookback window of 10.
		Our row is valued at 23998473000.0, our lookback sum is 23998473000.0
Working

Working on 318336-2011-03-01
	We dropped row 16, Total assets, with lookback window of 16.
		Our row is valued at 312685000000.0, our lookback sum is 312685000000.0
Working on 318336-2012-02-29
	We dropped row 16, Total assets, with lookback window of 16.
		Our row is valued at 309495000000.0, our lookback sum is 309495000000.0
Working on 318336-2013-03-01
	We dropped row 16, Total assets, with lookback window of 16.
		Our row is valued at 292165000000.0, our lookback sum is 292165000000.0
Working on 318336-2014-03-05
	We dropped row 13, Total assets, with lookback window of 13.
		Our row is valued at 275929000000.0, our lookback sum is 275929000000.0
Working on 318336-2015-03-02
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 272892000000.0, our lookback sum is 272892000000.0
Working on 318336-2016-02-29
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 204426000000.0, our lookback sum is 204426000000.0
Working on

Working on 42352-2016-02-23
	We dropped row 8, Total assets, with lookback window of 8.
		Our row is valued at 454751000000.0, our lookback sum is 454751000000.0
Working on 42352-2017-03-01
	We dropped row 7, Total assets, with lookback window of 7.
		Our row is valued at 424849000000.0, our lookback sum is 424649000000.0
Working on 42352-2018-02-27
	We dropped row 7, Total assets, with lookback window of 7.
		Our row is valued at 453927000000.0, our lookback sum is 453927000000.0
Working on 42352-2019-03-01
	We dropped row 7, Total assets, with lookback window of 7.
		Our row is valued at 416665000000.0, our lookback sum is 416665000000.0
Working on 42352-2021-02-25
	We dropped row 7, Total assets, with lookback window of 7.
		Our row is valued at 507647000000.0, our lookback sum is 507647000000.0
Working on 58056-2002-03-01
Working on 58056-2003-03-03
Working on 58056-2004-02-27
Working on 58056-2004-03-26
Working on 58056-2005-02-25
	We dropped row 15, Total assets, with lookback wi

Working on 72267-2007-05-29
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 87542832000.0, our lookback sum is 87542832000.0
Working on 72267-2008-05-30
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 16266635000.0, our lookback sum is 16266635000.0
Working on 72267-2009-06-03
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 14279792000.0, our lookback sum is 14279792000.0
Working on 72267-2010-06-01
	We dropped row 4, Trading assets ($7,567,059 were pledged to, with lookback window of 2.
		Our row is valued at 36294531000.0, our lookback sum is 36294531000.0
Working on 72267-2012-03-15
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 104827762000.0, our lookback sum is 104827762000.0
Working on 72267-2012-05-30
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 114059495000.0, our lookback sum is 11405949500

Working on 803012-2014-12-30
	We dropped row 13, Total Assets, with lookback window of 13.
		Our row is valued at 35918709000.0, our lookback sum is 35918709000.0
Working on 803012-2015-12-22
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 34818830000.0, our lookback sum is 34818830000.0
Working on 803012-2016-12-22
	We dropped row 12, Total assets, with lookback window of 12.
		Our row is valued at 37497845000.0, our lookback sum is 37497845000.0
Working on 803012-2018-03-22
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 46381560000.0, our lookback sum is 46381560000.0
Working on 803012-2019-12-20
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 32682640000.0, our lookback sum is 32682640000.0
Working on 803012-2020-12-22
	We dropped row 11, Total assets, with lookback window of 11.
		Our row is valued at 39653089000.0, our lookback sum is 39653089000.0
Working on 851376-2002

Working on 867626-2009-03-02
	We dropped row 10, TOTAL ASSETS, with lookback window of 10.
		Our row is valued at 6536706704.0, our lookback sum is 6536706704.0
Working on 867626-2011-03-01
	We dropped row 11, TOTAL ASSETS, with lookback window of 11.
		Our row is valued at 20895600220.0, our lookback sum is 20895600220.0
Working on 867626-2012-02-29
	We dropped row 11, TOTAL ASSETS, with lookback window of 11.
		Our row is valued at 29545945000.0, our lookback sum is 29545945000.0
Working on 867626-2014-03-04
	We dropped row 13, TOTAL ASSETS, with lookback window of 13.
		Our row is valued at 29086436000.0, our lookback sum is 29086436000.0
Working on 867626-2016-02-26
	We dropped row 13, TOTAL ASSETS, with lookback window of 13.
		Our row is valued at 34167171000.0, our lookback sum is 34167171000.0
Working on 867626-2017-02-28
	We dropped row 13, TOTAL ASSETS, with lookback window of 13.
		Our row is valued at 29251885000.0, our lookback sum is 29251885000.0
Working on 867626-2018-0

Working on 890203-2007-05-30
	We dropped row 7, Total securities owned, with lookback window of 4.
		Our row is valued at 10830472.0, our lookback sum is 10830472.0
	We dropped row 10, TOTAL ASSETS, with lookback window of 10.
		Our row is valued at 12382376.0, our lookback sum is 12382376.0
Working on 890203-2008-05-30
	We dropped row 7, Total securities owned, with lookback window of 4.
		Our row is valued at 11752295.0, our lookback sum is 11752295.0
	We dropped row 10, TOTAL ASSETS, with lookback window of 10.
		Our row is valued at 13270613.0, our lookback sum is 13270613.0
Working on 890203-2009-05-28
	We dropped row 7, Total securities owned, with lookback window of 3.
		Our row is valued at 8963865.0, our lookback sum is 8963865.0
	We dropped row 10, TOTAL ASSETS, with lookback window of 10.
		Our row is valued at 14087251.0, our lookback sum is 14087251.0
Working on 890203-2010-05-28
	We dropped row 5, Total securities owned, with lookback window of 2.
		Our row is valued at 4

	We dropped row 21, Total assets, with lookback window of 21.
		Our row is valued at 314356000000.0, our lookback sum is 314356000000.0
Working on 91154-2011-03-01
	We dropped row 4, Trading account assets (approximately $20 billion were pledged to various parties at December 31, 2010):, with lookback window of 2.
		Our row is valued at 156385000000.0, our lookback sum is 156385000000.0
	We dropped row 18, Property, equipment and leasehold improvements, net of, with lookback window of 3.
		Our row is valued at 22540000000.0, our lookback sum is 22540000000.0
	We dropped row 23, Total assets, with lookback window of 23.
		Our row is valued at 288409000000.0, our lookback sum is 288409000000.0
Working on 91154-2012-02-29
	We dropped row 4, Trading account assets (approximately $18 billion were pledged to, with lookback window of 2.
		Our row is valued at 157348000000.0, our lookback sum is 157348000000.0
	We dropped row 14, Receivables:, with lookback window of 9.
		Our row is valued at 

Working on 1146184-2006-03-01
	We dropped row 9, Total liabilities, with lookback window of 9.
		Our row is valued at 5195563754.0, our lookback sum is 5195563754.0
	We dropped row 11, Total liabilities and members' capital, with lookback window of 11.
		Our row is valued at 5769615468.0, our lookback sum is 5769615468.0
Working on 1146184-2007-02-26
	We dropped row 6, Total liabilities, with lookback window of 6.
		Our row is valued at 5027462000.0, our lookback sum is 5027462000.0
	We dropped row 8, Total liabilities and members' capital, with lookback window of 8.
		Our row is valued at 5691263000.0, our lookback sum is 5691263000.0
Working on 1146184-2008-02-29
	We dropped row 4, Total liabilities, with lookback window of 4.
		Our row is valued at 5902899000.0, our lookback sum is 5902899000.0
	We dropped row 6, Total liabilities and members' capital, with lookback window of 6.
		Our row is valued at 6762481000.0, our lookback sum is 6762481000.0
Working on 1146184-2009-03-02
	We d

Working on 1215680-2019-03-01
	We dropped row 4, Total liabilities, with lookback window of 4.
		Our row is valued at 35140321140.0, our lookback sum is 35140321140.0
	We dropped row 6, Total liabilities and member's equity, with lookback window of 6.
		Our row is valued at 35316268917.0, our lookback sum is 35316268917.0
Working on 1215680-2020-03-02
	We dropped row 5, Total liabilities, with lookback window of 5.
		Our row is valued at 39573830474.0, our lookback sum is 39573830474.0
	We dropped row 7, Total liabilities and member's equity, with lookback window of 7.
		Our row is valued at 39779281667.0, our lookback sum is 39779281667.0
Working on 1215680-2021-03-02
	We dropped row 5, Total liabilities, with lookback window of 5.
		Our row is valued at 35583947619.0, our lookback sum is 35583947619.0
	We dropped row 7, Total liabilities and member's equity, with lookback window of 7.
		Our row is valued at 35824538684.0, our lookback sum is 35824538684.0
Working on 1224385-2004-03-0

	We dropped row 14, Total liabilities and member's equity, with lookback window of 14.
		Our row is valued at 141834542000.0, our lookback sum is 141834542000.0
Working on 1224385-2021-02-26
	We dropped row 9, Total liabilities, with lookback window of 9.
		Our row is valued at 118977964000.0, our lookback sum is 118977964000.0
	We dropped row 13, Total member's equity, with lookback window of 2.
		Our row is valued at 9130317000.0, our lookback sum is 9130317000.0
	We dropped row 14, Total liabilities and member's equity, with lookback window of 14.
		Our row is valued at 132758281000.0, our lookback sum is 132758281000.0
Working on 1261467-2005-03-08
	We dropped row 9, Total liabilities, with lookback window of 9.
		Our row is valued at 28302848000.0, our lookback sum is 28302848000.0
	We dropped row 11, Total liabilities and member's equity, with lookback window of 11.
		Our row is valued at 28698599000.0, our lookback sum is 28698599000.0
Working on 1261467-2006-03-01
	We dropped r

Working on 230611-2008-02-29
Working on 230611-2009-03-02
Working on 230611-2009-03-09
Working on 230611-2011-03-01
Working on 230611-2012-02-29
Working on 230611-2014-03-06
Working on 230611-2015-03-02
Working on 230611-2016-02-29
Working on 230611-2017-03-03
Working on 230611-2018-03-01
Working on 230611-2020-03-03
Working on 230611-2021-03-01
Working on 26617-2002-05-30
	We dropped row 8, Total liabilities, with lookback window of 8.
		Our row is valued at 12853568.0, our lookback sum is 12853568.0
Working on 26617-2003-06-02
	We dropped row 8, Total liabilities, with lookback window of 8.
		Our row is valued at 14770817000.0, our lookback sum is 14770817000.0
	We dropped row 11, Accumulated deficit, with lookback window of 1.
		Our row is valued at -4402000.0, our lookback sum is 4402000.0
Working on 26617-2004-05-27
	We dropped row 9, Total liabilities, with lookback window of 9.
		Our row is valued at 17813189000.0, our lookback sum is 17813189000.0
	We dropped row 13, Total stoc

Working on 29648-2003-03-03
	We dropped row 8, Other, with lookback window of 1.
		Our row is valued at 2000000.0, our lookback sum is 2000000.0
	We dropped row 12, Total liabilities, with lookback window of 12.
		Our row is valued at 9112000000.0, our lookback sum is 9110000000.0
	We dropped row 16, Total member's equity, with lookback window of 2.
		Our row is valued at 904000000.0, our lookback sum is 904000000.0
	We dropped row 17, Total Liabilities and Member's Equity, with lookback window of 17.
		Our row is valued at 10048000000.0, our lookback sum is 10046000000.0
Working on 29648-2004-03-01
	We dropped row 7, Total liabilities, with lookback window of 7.
		Our row is valued at 13044000000.0, our lookback sum is 13044000000.0
	We dropped row 11, Total member's equity, with lookback window of 2.
		Our row is valued at 549000000.0, our lookback sum is 549000000.0
	We dropped row 12, Total liabilities and member's equity, with lookback window of 12.
		Our row is valued at 14323000

	We dropped row 12, Total liabilities and member's equity, with lookback window of 12.
		Our row is valued at 33005000000.0, our lookback sum is 33005000000.0
Working on 29648-2021-02-26
	We dropped row 8, Total liabilities, with lookback window of 8.
		Our row is valued at 32742000000.0, our lookback sum is 32742000000.0
	We dropped row 11, Total member's equity, with lookback window of 2.
		Our row is valued at 3484000000.0, our lookback sum is 3484000000.0
	We dropped row 12, Total liabilities and member's equity, with lookback window of 12.
		Our row is valued at 36226000000.0, our lookback sum is 36226000000.0
Working on 318336-2003-03-03
	We dropped row 14, Total Liabilities, with lookback window of 14.
		Our row is valued at 181259000000.0, our lookback sum is 181259000000.0
	We dropped row 18, Total member's equity, with lookback window of 3.
		Our row is valued at 4436000000.0, our lookback sum is 4436000000.0
	We dropped row 19, Total Liabilities and Member's Equity, with loo

Working on 318336-2018-03-01
	We dropped row 2, Securities loaned, with lookback window of 1.
		Our row is valued at 12876000000.0, our lookback sum is 12776000000.0
	We dropped row 15, Total member's equity, with lookback window of 3.
		Our row is valued at 11000000000.0, our lookback sum is 11000000000.0
	We dropped row 16, Total liabilities and member's equity, with lookback window of 5.
		Our row is valued at 109794000000.0, our lookback sum is 109794000000.0
Working on 318336-2019-03-08
	We dropped row 11, Total liabilities, with lookback window of 11.
		Our row is valued at 76332000000.0, our lookback sum is 76332000000.0
	We dropped row 15, Total member's equity, with lookback window of 3.
		Our row is valued at 11120000000.0, our lookback sum is 11120000000.0
	We dropped row 16, Total liabilities and member's equity, with lookback window of 16.
		Our row is valued at 87452000000.0, our lookback sum is 87452000000.0
Working on 318336-2020-02-28
	We dropped row 11, Total liabilit

Working on 42352-2002-01-30
Working on 42352-2003-01-28
Working on 42352-2004-01-27
Working on 42352-2005-01-25
	We dropped row 11, Total partners' capital, with lookback window of 2.
		Our row is valued at 4210751000.0, our lookback sum is 4210751000.0
	We dropped row 12, Total liabilities and partners' capital, with lookback window of 12.
		Our row is valued at 348590977000.0, our lookback sum is 348590977000.0
Working on 42352-2006-01-24
	We dropped row 11, Total partners' capital, with lookback window of 2.
		Our row is valued at 4535899000.0, our lookback sum is 4535899000.0
	We dropped row 12, Total liabilities and partners' capital, with lookback window of 12.
		Our row is valued at 475872139000.0, our lookback sum is 475872139000.0
Working on 42352-2007-01-23
	We dropped row 12, Total partners' capital, with lookback window of 2.
		Our row is valued at 4685687000.0, our lookback sum is 4685687000.0
	We dropped row 13, Total liabilities and partners' capital, with lookback windo

Working on 58056-2010-06-04
	We dropped row 7, Total liabilities, with lookback window of 7.
		Our row is valued at 232082804000.0, our lookback sum is 232082804000.0
	We dropped row 10, Total liabilities and stockholder's equity, with lookback window of 10.
		Our row is valued at 249693066000.0, our lookback sum is 249693066000.0
Working on 58056-2012-02-29
	We dropped row 8, Total liabilities, with lookback window of 8.
		Our row is valued at 225973793000.0, our lookback sum is 225973793000.0
	We dropped row 11, Total liabilities and stockholder's equity, with lookback window of 11.
		Our row is valued at 244079398000.0, our lookback sum is 244079398000.0
Working on 58056-2013-03-01
	We dropped row 8, Total liabilities, with lookback window of 8.
		Our row is valued at 221339000000.0, our lookback sum is 221339000000.0
	We dropped row 11, Total liabilities and stockholder's equity, with lookback window of 11.
		Our row is valued at 240046000000.0, our lookback sum is 240046000000.0
W

Working on 68136-2012-02-29
	We dropped row 7, Total financial instruments sold, not yet purchased, at fair value, with lookback window of 5.
		Our row is valued at 32850811000.0, our lookback sum is 32850811000.0
	We dropped row 16, Total liabilities, with lookback window of 16.
		Our row is valued at 332970945000.0, our lookback sum is 332970945000.0
	We dropped row 20, Total Morgan Stanley & Co. LLC member's equity, with lookback window of 2.
		Our row is valued at 9293431000.0, our lookback sum is 9293431000.0
	We dropped row 22, Total member's equity, with lookback window of 4.
		Our row is valued at 9297181000.0, our lookback sum is 9297181000.0
	We dropped row 23, Total liabilities and member's equity, with lookback window of 23.
		Our row is valued at 352293126000.0, our lookback sum is 352293126000.0
Working on 68136-2013-03-01
	We dropped row 7, Total financial instruments sold, not yet purchased, at fair value, with lookback window of 5.
		Our row is valued at 34047352000.0,

Working on 72267-2010-06-01
	We dropped row 13, Total liabilities, with lookback window of 13.
		Our row is valued at 67030133000.0, our lookback sum is 67030133000.0
	We dropped row 17, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 1072171000.0, our lookback sum is 1072171000.0
	We dropped row 18, Total liabilities and stockholder's equity, with lookback window of 18.
		Our row is valued at 68102304000.0, our lookback sum is 68102304000.0
Working on 72267-2012-03-15
	We dropped row 13, Total liabilities, with lookback window of 11.
		Our row is valued at 102085358000.0, our lookback sum is 102085358000.0
	We dropped row 17, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 2742404000.0, our lookback sum is 2742404000.0
	We dropped row 18, Total liabilities and stockholder's equity, with lookback window of 16.
		Our row is valued at 104827762000.0, our lookback sum is 104827762000.0
Working on 72267-2012-05-30
	We dropped row 

Working on 782124-2010-03-01
	We dropped row 10, Total liabilities, with lookback window of 10.
		Our row is valued at 320976000000.0, our lookback sum is 320976000000.0
	We dropped row 13, Total liabilities and stockholder's equity, with lookback window of 13.
		Our row is valued at 329205000000.0, our lookback sum is 329205000000.0
Working on 782124-2011-03-02
	We dropped row 10, Total liabilities (*), with lookback window of 10.
		Our row is valued at 351776000000.0, our lookback sum is 351776000000.0
	We dropped row 12, Total liabilities and member's equity, with lookback window of 12.
		Our row is valued at 362019000000.0, our lookback sum is 362019000000.0
Working on 782124-2012-02-29
	We dropped row 10, Total liabilities (A), with lookback window of 10.
		Our row is valued at 360040000000.0, our lookback sum is 360040000000.0
	We dropped row 12, Total liabilities and member's equity, with lookback window of 12.
		Our row is valued at 371293000000.0, our lookback sum is 371293000

Working on 803012-2013-12-26
	We dropped row 8, Total Liabilities, with lookback window of 8.
		Our row is valued at 32334728000.0, our lookback sum is 32334728000.0
	We dropped row 11, Total liabilities and member's equity, with lookback window of 11.
		Our row is valued at 33822818000.0, our lookback sum is 33822818000.0
Working on 803012-2014-12-30
	We dropped row 7, Total Liabilities, with lookback window of 7.
		Our row is valued at 34444714000.0, our lookback sum is 34444714000.0
	We dropped row 10, Total Liabilities and Member's Equity, with lookback window of 10.
		Our row is valued at 35918709000.0, our lookback sum is 35918709000.0
Working on 803012-2015-12-22
	We dropped row 9, Total liabilities, with lookback window of 9.
		Our row is valued at 34138609000.0, our lookback sum is 34138609000.0
	We dropped row 11, Total liabilities and member's equity, with lookback window of 11.
		Our row is valued at 34818830000.0, our lookback sum is 34818830000.0
Working on 803012-2016-12

Working on 851376-2019-03-01
	We dropped row 11, Total liabilities, with lookback window of 11.
		Our row is valued at 98750000000.0, our lookback sum is 98750000000.0
	We dropped row 16, Total stockholder's equity, with lookback window of 3.
		Our row is valued at 6916000000.0, our lookback sum is 6916000000.0
	We dropped row 17, Total liabilities and stockholder's equity, with lookback window of 17.
		Our row is valued at 105666000000.0, our lookback sum is 105666000000.0
Working on 851376-2020-03-06
	We dropped row 10, Total liabilities, with lookback window of 10.
		Our row is valued at 91805000000.0, our lookback sum is 91805000000.0
	We dropped row 15, Total stockholder's equity, with lookback window of 3.
		Our row is valued at 7362000000.0, our lookback sum is 7362000000.0
	We dropped row 16, Total liabilities and stockholder's equity, with lookback window of 16.
		Our row is valued at 99167000000.0, our lookback sum is 99167000000.0
Working on 851376-2021-03-01
	We dropped row

Working on 867626-2005-04-29
	We dropped row 8, Total liabilities, with lookback window of 8.
		Our row is valued at 1593624686.0, our lookback sum is 1593624686.0
	We dropped row 12, Total stockholder's equity, with lookback window of 3.
		Our row is valued at 76335710.0, our lookback sum is 76335710.0
	We dropped row 13, TOTAL LIABILITIES AND STOCKHOLDER'S EQUITY, with lookback window of 13.
		Our row is valued at 1669960396.0, our lookback sum is 1669960396.0
Working on 867626-2006-04-28
	We dropped row 8, Total liabilities, with lookback window of 8.
		Our row is valued at 4362115901.0, our lookback sum is 4362115901.0
	We dropped row 11, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 79592321.0, our lookback sum is 79592321.0
	We dropped row 12, TOTAL LIABILITIES AND STOCKHOLDER'S EQUITY, with lookback window of 12.
		Our row is valued at 4441708222.0, our lookback sum is 4441708222.0
Working on 867626-2007-04-26
	We dropped row 7, Total liabilities,

Working on 87634-2006-09-21
	We dropped row 6, Total liabilities, with lookback window of 6.
		Our row is valued at 26837000000.0, our lookback sum is 26837000000.0
	We dropped row 9, Common stock - 7,000,000 shares authorized; $.10 par value per share; 2,823,000 shares issued and outstanding, with lookback window of 1.
		Our row is valued at 0.0, our lookback sum is 0.0
	We dropped row 12, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 1697000000.0, our lookback sum is 1697000000.0
	We dropped row 13, Total, with lookback window of 13.
		Our row is valued at 28754000000.0, our lookback sum is 28754000000.0
Working on 87634-2007-03-01
	We dropped row 6, Total liabilities, with lookback window of 6.
		Our row is valued at 23124000000.0, our lookback sum is 23124000000.0
	We dropped row 9, per share; 2,823,000 shares issued and outstanding, with lookback window of 1.
		Our row is valued at 0.0, our lookback sum is 0.0
	We dropped row 12, Total stockholder's

Working on 87634-2021-02-26
	We dropped row 2, Total liabilities, with lookback window of 2.
		Our row is valued at 67246000000.0, our lookback sum is 67246000000.0
	We dropped row 4, Common stock - 7,000,000 shares authorized; $.10 par value per share; 2,823,000 shares issued and outstanding, with lookback window of 1.
		Our row is valued at 0.0, our lookback sum is 0.0
	We dropped row 7, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 7673000000.0, our lookback sum is 7673000000.0
	We dropped row 8, Total liabilities and stockholder's equity, with lookback window of 8.
		Our row is valued at 74919000000.0, our lookback sum is 74919000000.0
Working on 877559-2002-03-01
	We dropped row 7, Total liabilities, with lookback window of 7.
		Our row is valued at 7841718000.0, our lookback sum is 7841718000.0
	We dropped row 14, Total shareholder's equity, with lookback window of 5.
		Our row is valued at 212367000.0, our lookback sum is 212367000.0
	We dropped r

Working on 890203-2003-02-28
	We dropped row 1, Total liabilities, with lookback window of 1.
		Our row is valued at 176877.0, our lookback sum is 176877.0
	We dropped row 5, Total stockholder's equity, with lookback window of 3.
		Our row is valued at 11548020.0, our lookback sum is 11548020.0
	We dropped row 6, Total liabilities and stockholder's equity, with lookback window of 6.
		Our row is valued at 11724897.0, our lookback sum is 11724897.0
Working on 890203-2004-05-03
	We dropped row 2, Total liabilities, with lookback window of 2.
		Our row is valued at 344602.0, our lookback sum is 344602.0
	We dropped row 6, Total stockholder's equity, with lookback window of 3.
		Our row is valued at 11280622.0, our lookback sum is 11280622.0
	We dropped row 7, Total liabilities and stockholder's equity, with lookback window of 7.
		Our row is valued at 11625224.0, our lookback sum is 11625224.0
Working on 890203-2005-02-22
	We dropped row 1, Total liabilities, with lookback window of 1.
		

Working on 890203-2021-03-01
	We dropped row 10, Total liabilities, with lookback window of 10.
		Our row is valued at 40998286.0, our lookback sum is 40998286.0
	We dropped row 14, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 258566.0, our lookback sum is 258565.0
	We dropped row 15, Total liabilities and stockholder's equity, with lookback window of 15.
		Our row is valued at 41256852.0, our lookback sum is 41256852.0
Working on 895502-1998-11-13
	We dropped row 10, 35,000 shares; issued and outstanding 200 shares, with lookback window of 1.
		Our row is valued at 0.0, our lookback sum is 0.0
	We dropped row 13, Total shareholder's equity, with lookback window of 2.
		Our row is valued at 103485000.0, our lookback sum is 103485000.0
	We dropped row 14, Total liabilities and shareholder's equity, with lookback window of 14.
		Our row is valued at 1582979000.0, our lookback sum is 1582979000.0
Working on 895502-2002-12-30
	We dropped row 13, Total liabi

Working on 91154-2003-03-03
	We dropped row 4, Financial instruments sold, not yet purchased,, with lookback window of 2.
		Our row is valued at 114159000000.0, our lookback sum is 114159000000.0
	We dropped row 11, Payables and accrued liabilities:, with lookback window of 6.
		Our row is valued at 18012000000.0, our lookback sum is 18012000000.0
	We dropped row 18, Total liabilities, with lookback window of 18.
		Our row is valued at 189981000000.0, our lookback sum is 189981000000.0
	We dropped row 23, Total stockholder's equity, with lookback window of 3.
		Our row is valued at 7136000000.0, our lookback sum is 7126000000.0
	We dropped row 24, Total liabilities and stockholder's equity, with lookback window of 24.
		Our row is valued at 197117000000.0, our lookback sum is 197117000000.0
Working on 91154-2004-02-27
	We dropped row 10, Payables and accrued liabilities:, with lookback window of 6.
		Our row is valued at 24756000000.0, our lookback sum is 24756000000.0
	We dropped row 

Working on 91154-2016-03-01
	We dropped row 13, Total liabilities, with lookback window of 13.
		Our row is valued at 234329000000.0, our lookback sum is 234329000000.0
	We dropped row 18, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 9142000000.0, our lookback sum is 9132000000.0
	We dropped row 19, Total liabilities and stockholder's equity, with lookback window of 19.
		Our row is valued at 253416000000.0, our lookback sum is 253416000000.0
Working on 91154-2017-03-02
	We dropped row 14, Total liabilities, with lookback window of 14.
		Our row is valued at 252450000000.0, our lookback sum is 252450000000.0
	We dropped row 18, Total stockholder's equity, with lookback window of 2.
		Our row is valued at 9194000000.0, our lookback sum is 9184000000.0
	We dropped row 19, Total liabilities and stockholder's equity, with lookback window of 19.
		Our row is valued at 261644000000.0, our lookback sum is 261644000000.0
Working on 91154-2018-03-05
	We dropped 

Working on 922792-2019-03-01
	We dropped row 9, Total other payables, with lookback window of 4.
		Our row is valued at 354000000.0, our lookback sum is 354000000.0
	We dropped row 10, Total liabilities, with lookback window of 10.
		Our row is valued at 45591000000.0, our lookback sum is 45591000000.0
	We dropped row 12, Total liabilities and members' capital, with lookback window of 12.
		Our row is valued at 50852000000.0, our lookback sum is 50852000000.0
Working on 922792-2020-02-28
	We dropped row 9, Total other payables, with lookback window of 4.
		Our row is valued at 282000000.0, our lookback sum is 282000000.0
	We dropped row 10, Total liabilities, with lookback window of 10.
		Our row is valued at 54636000000.0, our lookback sum is 54636000000.0
	We dropped row 12, Total liabilities and members' capital, with lookback window of 12.
		Our row is valued at 60395000000.0, our lookback sum is 60395000000.0
Working on 922792-2021-02-26
	We dropped row 9, Total other payables, wi

In [9]:
# work on combining columns that are issued seperately
s3 = boto3.client('s3')
session = Session()
bucket = "ran-s3-systemic-risk"

# e.g. file name = 1224385-2004-03-01, 42352-2003-01-28
s3.download_file(bucket, 'Output/X-17A-5-SPLIT-PDFS/Assets/72267-2004-05-28.csv', 'temp.csv')
pdf_df = pd.read_csv('temp.csv')
s3.download_file(bucket, 'Output/X-17A-5-SPLIT-PNGS/Assets/72267-2004-05-28.csv', 'temp.csv')
png_df = pd.read_csv('temp.csv')
os.remove('temp.csv')

# do a special merge that combines unique line items names between PDF & PNG
temp_df1 = special_merge(pdf_df, png_df, '0')

# run accounting check to remove sub-totals for each respective line-item
df, total_flag, total_amt = totals_check(temp_df1)


	We dropped row 4, Financial instruments owned (approximately $10,900,474 were, with lookback window of 2.
		Our row is valued at 44343153000.0, our lookback sum is 44343153000.0
	We dropped row 9, Receivables:, with lookback window of 4.
		Our row is valued at 17731470000.0, our lookback sum is 17731470000.0
	We dropped row 13, Furniture, equipment and leasehold improvements, net of, with lookback window of 3.
		Our row is valued at 535815000.0, our lookback sum is 535815000.0


In [35]:
pdf_df

Unnamed: 0,0,1
0,Cash and cash equivalents,918203000.0
1,Cash and securities segregated pursuant to fed...,365178000.0
2,Securities purchased under agreements to resell,25660250000.0
3,Securities borrowed,18682900000.0
4,Financial instruments owned (approximately $10...,44343150000.0
5,Equity securities,7965816000.0
6,U.S. government and agency securities,7277881000.0
7,Corporate debt and collateralized mortgage obl...,2446369000.0
8,Options,41404000.0
9,Receivables:,17731470000.0


In [37]:
special_merge(pdf_df, png_df, '0')

['U.S. government and agency securities'] ['U. S. government and agency securities']
                                       0             1
6  U.S. government and agency securities  7.277881e+09
['Furniture, equipment and leasehold improvements, net of'
 'accumulated depreciation and amortization of $40,523'] ['Furniture, equipment and leasehold improvements, net of accumulated depreciation and amortization of $40,523']
                                                    0            1
13  Furniture, equipment and leasehold improvement...  535815000.0


Unnamed: 0,0,1
0,Cash and cash equivalents,918203000.0
1,Cash and securities segregated pursuant to fed...,365178000.0
2,Securities purchased under agreements to resell,25660250000.0
3,Securities borrowed,18682900000.0
4,Financial instruments owned (approximately $10...,44343150000.0
5,Equity securities,7965816000.0
6,U.S. government and agency securities,7277881000.0
7,Corporate debt and collateralized mortgage obl...,2446369000.0
8,Options,41404000.0
9,Receivables:,17731470000.0


In [38]:
a = ['Furniture, equipment and leasehold improvements, net of', 'accumulated depreciation and amortization of $40,523'] 
b = ['Furniture, equipment and leasehold improvements, net of accumulated depreciation and amortization of $40,523']

In [42]:
for idx, i in enumerate(zip(a, b)):
    print(idx, i)

0 ('Furniture, equipment and leasehold improvements, net of', 'Furniture, equipment and leasehold improvements, net of accumulated depreciation and amortization of $40,523')


In [43]:
i

('Furniture, equipment and leasehold improvements, net of',
 'Furniture, equipment and leasehold improvements, net of accumulated depreciation and amortization of $40,523')