# Current progress (8/25/2020)
Currently working on extracting Principal and Interest Remittance to Trust data, as well as Swap due to Admin vs. Provider for the old data
NEXT STEP: extract WAC data, which comes from "Pool Detail" in the old-format reports and from "Remittance Summary Group" in the new-format reports

In [1]:
import xlwings as xw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle
xw.App.DisplayAlerts = False

## Stage 1: Data extraction functions

In [2]:
# The core data extraction function for January 2007 to May 2013
def extract_jan07_may13():
    reports_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + \
        '2007 Through May 2013' + '/'
    reports_dir = os.listdir(reports_loc)
    reports_dir.sort(key=lambda z: z[-8:]+z[-14:-12])
    # Create a list to hold the monthly dataframes, which we'll concatenate at the end
    data_list = [x for x in range(len(reports_dir) - 1)]
    for i in range(len(reports_dir) - 1):
        filename = reports_loc + reports_dir[i]
        print(reports_dir[i])
        investor_rpt = xw.Book(filename)
        # Access the Pool Non-Pool Funds sheet, which contains a lot of useful summary information
        pool_nonpool = investor_rpt.sheets['Pool Non-Pool Funds']
        # Set up a dataframe to hold an entire month's numbers
        one_month = pd.DataFrame(columns=['Remittance Interest','Remittance Principal','Swap Due to Administrator',
                                         'Swap Due to Provider','Date'],index=['Group I','Group II','Total'])
        if filename == '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + \
        '2007 Through May 2013/' + 'bear-stearns-2006-he10-investor-report-04-25-2007.XLS':
            # Deal with the unique format of the April 2007 report
            one_month.loc['Group I','Remittance Interest'] = 2154697.44
            one_month.loc['Group II','Remittance Interest'] = 2468357.43 + 1645800.26 + 1069999.1
            one_month.loc['Total','Remittance Interest'] = 7338854.24

            one_month.loc['Group I','Remittance Principal'] = 7260221.35
            one_month.loc['Group II','Remittance Principal'] = 4816007.18 + 3664736.1 + 5157047.63
            one_month.loc['Total','Remittance Principal'] = 20898012.26

            one_month.loc['Group I','Swap Due to Administrator'] = 99276.31
            one_month.loc['Group II','Swap Due to Administrator'] = 258327.77
            one_month.loc['Total','Swap Due to Administrator'] = 99276.31+258327.77

            one_month.loc['Group I','Swap Due to Provider'] = 0
            one_month.loc['Group II','Swap Due to Provider'] = 0
            one_month.loc['Total','Swap Due to Provider'] = 0
        else:
            # Make sure we're using the correct sheet name to access the Cash Reconciliation Summary info
            for sheet in investor_rpt.sheets:
                if sheet.name == 'Cash Reconciliation Summary':
                    CRS = investor_rpt.sheets['Cash Reconciliation Summary']
                    break
                elif sheet.name == 'Cash Recon Summary':
                    CRS = investor_rpt.sheets['Cash Recon Summary']
                    break
            # Extract the data for each group and each column, for all sheets other than April 2007
            one_month.loc['Group I','Remittance Interest'] = CRS.range('K14').value
            one_month.loc['Group II','Remittance Interest'] = CRS.range('K54').value + CRS.range('K94').value + \
            CRS.range('K134').value
            one_month.loc['Total','Remittance Interest'] = pool_nonpool.range('D17').value

            one_month.loc['Group I','Remittance Principal'] = CRS.range('K32').value
            one_month.loc['Group II','Remittance Principal'] = CRS.range('K72').value + CRS.range('K112').value + \
            CRS.range('K152').value
            one_month.loc['Total','Remittance Principal'] = pool_nonpool.range('I21').value

            one_month.loc['Group I','Swap Due to Administrator'] = pool_nonpool.range('O15').value
            one_month.loc['Group II','Swap Due to Administrator'] = pool_nonpool.range('O22').value
            one_month.loc['Total','Swap Due to Administrator'] = pool_nonpool.range('O15').value + \
            pool_nonpool.range('O22').value

            one_month.loc['Group I','Swap Due to Provider'] = pool_nonpool.range('O16').value
            one_month.loc['Group II','Swap Due to Provider'] = pool_nonpool.range('O23').value
            one_month.loc['Total','Swap Due to Provider'] = pool_nonpool.range('O16').value + \
            pool_nonpool.range('O23').value
        # Add on the date
        one_month.loc['Group I', 'Date'] = pd.to_datetime(filename[-14:-4])
        one_month.loc['Group II', 'Date'] = pd.to_datetime(filename[-14:-4])
        one_month.loc['Total', 'Date'] = pd.to_datetime(filename[-14:-4])
        # Put the current month's dataframe into the proper spot in the 'master list'
        data_list[i] = one_month
        investor_rpt.close()
        
    output_df = pd.concat(data_list)
    # Set up the MultiIndex of the March 2008-May 2013 output dataframe
    output_df.index.rename('Group',inplace=True)
    output_df.set_index('Date',append=True,inplace=True)
    output_df = output_df.reorder_levels(['Date','Group'])
    
    return output_df

In [3]:
# The function that grabs data from a single investor report
def extract_new_format_month(reports_loc, filename):
    full_path = reports_loc + filename
    investor_rpt = xw.Book(full_path)
    remit_summary_group = investor_rpt.sheets['Remittance Summary Group']
    
    # Set up a dataframe to hold the entire month's numbers
    one_month = pd.DataFrame(columns=['Remittance Interest','Remittance Principal'])
    
    # Find the correct locations in the sheet, depending on the date of the investor report being accessed
    if int(filename[-8:-4]) < 2014 or (int(filename[-8:-4]) == 2014 and int(filename[-14:-12]) < 3):
        # June 2013 - February 2014
        interest_dict = {'Group I':'D36','Group II':'E36','Total':'C36'}
        principal_dict = {'Group I':'D26','Group II':'E26','Total':'C26'}
    elif int(filename[-8:-4]) < 2015 or (int(filename[-8:-4]) == 2015 and int(filename[-14:-12]) < 2):
        # March 2014 - January 2015
        interest_dict = {'Group I':'G54','Group II':'H54','Total':'F54'}
        principal_dict = {'Group I':'G35','Group II':'H35','Total':'F35'}
    else:
        # February 2015 - March 2020
        interest_dict = {'Group I':'F33','Group II':'G33','Total':'E33'}
        principal_dict = {'Group I':'F23','Group II':'G23','Total':'E23'}
    
    # Grab the Remittance Interest numbers from the sheet, then the Remittance Principal numbers
    one_month.loc['Group I','Remittance Interest'] = remit_summary_group.range(interest_dict['Group I']).value
    one_month.loc['Group II','Remittance Interest'] = remit_summary_group.range(interest_dict['Group II']).value
    one_month.loc['Total', 'Remittance Interest'] = remit_summary_group.range(interest_dict['Total']).value
    one_month.loc['Group I','Remittance Principal'] = remit_summary_group.range(principal_dict['Group I']).value
    one_month.loc['Group II','Remittance Principal'] = remit_summary_group.range(principal_dict['Group II']).value
    one_month.loc['Total', 'Remittance Principal'] = remit_summary_group.range(principal_dict['Total']).value
    
    investor_rpt.close()
                      
    one_month['Date'] = pd.to_datetime(filename[-14:-4])
    one_month.index.rename('Group',inplace=True)
    return one_month

In [4]:
# The wrapper function for gettiing all data from the new-format reports
def extract_new_format_all():
    reports_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + \
        'June 2013 Through March 2020' + '/'
    reports_dir = os.listdir(reports_loc)
    reports_dir.sort(key=lambda z: z[-8:]+z[-14:-12])
    # Create a list to hold the monthly dataframes, which we'll concatenate at the end
    data_list = [x for x in range(len(reports_dir) - 1)]
    for i in range(len(reports_dir) - 1):
        data_list[i] = extract_new_format_month(reports_loc, reports_dir[i])
    data_df = pd.concat(data_list)
    # Set up the MultiIndex of the final output dataframe
    data_df.set_index('Date',append=True,inplace=True)
    data_df = data_df.reorder_levels(['Date', 'Group'])
    return data_df

# Stage 2: Get the data

In [5]:
old_format_data = extract_jan07_may13()

bear-stearns-2006-he10-investor-report-01-25-2007.XLS
bear-stearns-2006-he10-investor-report-02-26-2007.XLS
bear-stearns-2006-he10-investor-report-03-26-2007.XLS
bear-stearns-2006-he10-investor-report-04-25-2007.XLS
bear-stearns-2006-he10-investor-report-05-25-2007.XLS
bear-stearns-2006-he10-investor-report-06-25-2007.XLS
bear-stearns-2006-he10-investor-report-07-25-2007.XLS
bear-stearns-2006-he10-investor-report-08-27-2007.XLS
bear-stearns-2006-he10-investor-report-09-25-2007.XLS
bear-stearns-2006-he10-investor-report-10-25-2007.XLS
bear-stearns-2006-he10-investor-report-11-26-2007.XLS
bear-stearns-2006-he10-investor-report-12-26-2007.XLS
bear-stearns-2006-he10-investor-report-01-25-2008.XLS
bear-stearns-2006-he10-investor-report-02-25-2008.XLS
bear-stearns-2006-he10-investor-report-03-25-2008.XLS
bear-stearns-2006-he10-investor-report-04-25-2008.XLS
bear-stearns-2006-he10-investor-report-05-27-2008.XLS
bear-stearns-2006-he10-investor-report-06-25-2008.XLS
bear-stearns-2006-he10-inves

In [6]:
new_format_data = extract_new_format_all()

# Stage 3: Merge the two dataframes, and save the result

In [7]:
# Create the final realized-losses dataframe
remit_trust_with_swap = pd.concat([old_format_data, new_format_data],sort=True)

In [8]:
# Save the final realized-losses-only dataframe
with open('../../data/mbs_data_pickled/remit_trust_with_swap.pkl','wb') as f:
    pickle.dump(remit_trust_with_swap,f)