# Current progress (9/2/2020)
Prepared to extract the dollar value of prepayments for each month of the deal
NEXT STEP: extract WAC data, which comes from "Pool Detail" in the old-format reports and from "Remittance Summary Group" in the new-format reports

In [1]:
import xlwings as xw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle
xw.App.DisplayAlerts = False

## Stage 1: Data extraction functions

In [2]:
# The core data extraction function for January 2007 to May 2013
def extract_jan07_may13():
    reports_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + \
        '2007 Through May 2013' + '/'
    reports_dir = os.listdir(reports_loc)
    reports_dir.sort(key=lambda z: z[-8:]+z[-14:-12])
    # Create a list to hold the monthly dataframes, which we'll concatenate at the end
    data_list = [x for x in range(len(reports_dir) - 1)]
    for i in range(len(reports_dir) - 1):
        filename = reports_loc + reports_dir[i]
        investor_rpt = xw.Book(filename)
        # Access the Pool Non-Pool Funds sheet, which contains a lot of useful summary information
        pool_nonpool = investor_rpt.sheets['Pool Non-Pool Funds']
        # Set up a dataframe to hold an entire month's numbers
        one_month = pd.DataFrame(columns=['Prepayments','Date'],index=['Group I','Group II','Total'])
        if filename == '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + \
        '2007 Through May 2013/' + 'bear-stearns-2006-he10-investor-report-04-25-2007.XLS':
            # Deal with the unique format of the April 2007 report
            one_month.loc['Group I','Prepayments'] = 6179640.39
            one_month.loc['Group II','Prepayments'] = 4681732.34 + 3545799.35 + 5081073.23
            one_month.loc['Total','Prepayments'] = 19488245.31
        else:
            # Make sure we're using the correct sheet name to access the Cash Reconciliation Summary info
            for sheet in investor_rpt.sheets:
                if sheet.name == 'Cash Reconciliation Summary':
                    CRS = investor_rpt.sheets['Cash Reconciliation Summary']
                    break
                elif sheet.name == 'Cash Recon Summary':
                    CRS = investor_rpt.sheets['Cash Recon Summary']
                    break
            # Extract the data for each group and each column, for all sheets other than April 2007
            one_month.loc['Group I','Prepayments'] = CRS.range('K27').value
            one_month.loc['Group II','Prepayments'] = CRS.range('K67').value + CRS.range('K107').value + \
            CRS.range('K147').value
            one_month.loc['Total','Prepayments'] = CRS.range('K27').value + CRS.range('K67').value + CRS.range('K107').value + \
            CRS.range('K147').value

        # Add on the date
        one_month.loc['Group I', 'Date'] = pd.to_datetime(filename[-14:-4])
        one_month.loc['Group II', 'Date'] = pd.to_datetime(filename[-14:-4])
        one_month.loc['Total', 'Date'] = pd.to_datetime(filename[-14:-4])
        # Put the current month's dataframe into the proper spot in the 'master list'
        data_list[i] = one_month
        investor_rpt.close()
        
    output_df = pd.concat(data_list)
    # Set up the MultiIndex of the March 2008-May 2013 output dataframe
    output_df.index.rename('Group',inplace=True)
    output_df.set_index('Date',append=True,inplace=True)
    output_df = output_df.reorder_levels(['Date','Group'])
    
    return output_df

In [3]:
# The wrapper function for gettiing all data from the new-format reports
def extract_new_format_all():
    reports_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + \
        'June 2013 Through March 2020' + '/'
    reports_dir = os.listdir(reports_loc)
    reports_dir.sort(key=lambda z: z[-8:]+z[-14:-12])
    # Create a list to hold the monthly dataframes, which we'll concatenate at the end
    data_list = [x for x in range(len(reports_dir) - 1)]
    for i in range(len(reports_dir) - 1):
        data_list[i] = extract_new_format_month(reports_loc, reports_dir[i])
    data_df = pd.concat(data_list)
    # Set up the MultiIndex of the final output dataframe
    data_df.set_index('Date',append=True,inplace=True)
    data_df = data_df.reorder_levels(['Date', 'Group'])
    return data_df

In [4]:
# The function that grabs data from a single investor report
def extract_new_format_month(reports_loc, filename):
    full_path = reports_loc + filename
    investor_rpt = xw.Book(full_path)
    remit_summary_group = investor_rpt.sheets['Remittance Summary Group']
    
    # Set up a dataframe to hold the entire month's numbers
    one_month = pd.DataFrame(columns=['Prepayments','Date'])
    
    # Find the correct locations in the sheet, depending on the date of the investor report being accessed
    if int(filename[-8:-4]) < 2014 or (int(filename[-8:-4]) == 2014 and int(filename[-14:-12]) < 3):
        # June 2013 - February 2014
        prepayments_dict = {'Group I':'D22','Group II':'E22','Total':'C22'}
    elif int(filename[-8:-4]) < 2015 or (int(filename[-8:-4]) == 2015 and int(filename[-14:-12]) < 2):
        # March 2014 - January 2015
        prepayments_dict = {'Group I':'G28','Group II':'H28','Total':'F28'}
    else:
        # February 2015 - March 2020
        prepayments_dict = {'Group I':'F19','Group II':'G19','Total':'E19'}
    
    # Grab the Prepayments numbers from the sheet
    one_month.loc['Group I','Prepayments'] = remit_summary_group.range(prepayments_dict['Group I']).value
    one_month.loc['Group II','Prepayments'] = remit_summary_group.range(prepayments_dict['Group II']).value
    one_month.loc['Total', 'Prepayments'] = remit_summary_group.range(prepayments_dict['Total']).value
    
    investor_rpt.close()
                      
    one_month['Date'] = pd.to_datetime(filename[-14:-4])
    one_month.index.rename('Group',inplace=True)
    return one_month

# Stage 2: Get the data

In [5]:
old_format_data = extract_jan07_may13()

In [6]:
new_format_data = extract_new_format_all()

# Stage 3: Merge the two dataframes, and save the result

In [7]:
# Create the final prepayments dataframe
prepayments = pd.concat([old_format_data, new_format_data],sort=True)

In [8]:
# Save the final prepayments dataframe
with open('../../data/mbs_data_pickled/prepayments.pkl','wb') as f:
    pickle.dump(prepayments,f)