In [1]:
import xlwings as xw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle
xw.App.DisplayAlerts = False

In [2]:
# Special function needed for April 2007 because that report was only available as a PDF
# I did a PDF-to-Excel in Adobe, but it led to crazy formatting, so I had to set up a special
# function to select the correct cell ranges in the resulting sheet
def april_2007_bond_pmts(subfolder):
    report_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/'
    filename = report_loc + subfolder + '/bear-stearns-2006-he10-investor-report-04-25-2007.xls'
    investor_rpt = xw.Book(filename)
    bond_pmts = investor_rpt.sheets['Bond Payments']
    group_I_df = bond_pmts.range('A2:L21').options(pd.DataFrame).value
    group_II_df = bond_pmts.range('A24:L43').options(pd.DataFrame).value
    output_df = pd.concat([group_I_df, group_II_df],sort=False)
    output_df['Date'] = filename[-14:-4]
    output_df.drop('Total',inplace=True)
    investor_rpt.close()
    return output_df

In [3]:
# The core data extraction function for January 2007 to May 2013
# This reads the Bond Payments sheet from every investor report in the given subfolder of my Office directory
def extract_old_layout(subfolder):
    reports_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + subfolder + '/'
    reports_dir = os.listdir(reports_loc)
    reports_dir.sort(key=lambda z: z[-8:]+z[-14:-12])
    data_list = [x for x in range(len(reports_dir) - 1)]
    for i in range(len(reports_dir) - 1):
        filename = reports_loc + reports_dir[i]
        if reports_dir[i] == 'bear-stearns-2006-he10-investor-report-04-25-2007.XLS':
            data_list[i] = april_2007_bond_pmts(subfolder)
            continue
        investor_rpt = xw.Book(filename)
        bond_pmts = investor_rpt.sheets['Bond Payments']
        bond_pmts.range("J10").value = "Interest Payment (2)"
        group_I_df = bond_pmts.range('A10:L33').options(pd.DataFrame).value
        group_I_df.drop([group_I_df.index[0],group_I_df.index[1],group_I_df.index[2]],inplace=True)
        group_II_df = bond_pmts.range('A48:L71').options(pd.DataFrame).value
        group_II_df.drop([group_II_df.index[0],group_II_df.index[1],group_II_df.index[2]],inplace=True)
        both_groups = pd.concat([group_I_df,group_II_df],sort=False)
        both_groups['Date'] = filename[-14:-4]
        both_groups.drop('Total',inplace=True)
        both_groups.reindex(['I-A-1','I-A-2','I-A-3','I-M-1','I-M-2','I-M-3','I-M-4','I-M-5','I-M-6', \
                   'I-M-7','I-M-8','I-M-9','I-CE','I-P','I-R-1','I-R-2','I-R-3','I-RX', \
                   'II-1A-1','II-1A-2','II-1A-3','II-2A','II-3A','II-M-1','II-M-2','II-M-3', \
                   'II-M-4','II-M-5','II-M-6','II-M-7','II-M-8','II-M-9','II-CE','II-P', \
                   'II-R-1','II-RX'])
        data_list[i] = both_groups
        investor_rpt.close()
    data_df = pd.concat(data_list)
    data_df['Interest Payment (2)'].fillna(data_df['Interest Payment(2)'],inplace=True)
    data_df.rename(columns={"Original Face Value (1)":"Original Face Value","Interest Payment (2)":"Interest Payment"},\
        inplace=True)
    data_df.drop(['Interest Adjustment', 'Deferred Interest','Interest Payment(2)'],axis=1,inplace=True)
    data_df['Date'] = pd.to_datetime(data_df['Date'])
    data_df.set_index('Date',append=True,inplace=True)
    data_df = data_df.reorder_levels(['Date','Class'])
    return data_df

In [4]:
# The core data extraction function for June 2013 to March 2020
# This reads the Bond Payments sheet from every investor report in the given subfolder of my Office directory
def extract_new_layout(subfolder):
    reports_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + subfolder + '/'
    reports_dir = os.listdir(reports_loc)
    reports_dir.sort(key=lambda z: z[-8:]+z[-14:-12])
    data_list = [x for x in range(len(reports_dir) - 1)]
    for i in range(len(reports_dir) - 1):
        filename = reports_loc + reports_dir[i]
        investor_rpt = xw.Book(filename)
        bond_pmts = investor_rpt.sheets['Payment Stmt Pg1']
        from_excel = bond_pmts.range('A13:J51').options(pd.DataFrame).value
        from_excel.index.name = 'Class'
        from_excel.rename(index={None:'Total'},inplace=True)
        from_excel.drop(['Class','Total'],inplace=True)
        from_excel['Date'] = filename[-14:-4]
        from_excel.reindex(['I-A-1','I-A-2','I-A-3','I-M-1','I-M-2','I-M-3','I-M-4','I-M-5','I-M-6', \
                           'I-M-7','I-M-8','I-M-9','I-CE','I-P','I-R-1','I-R-2','I-R-3','I-RX', \
                           'II-1A-1','II-1A-2','II-1A-3','II-2A','II-3A','II-M-1','II-M-2','II-M-3', \
                           'II-M-4','II-M-5','II-M-6','II-M-7','II-M-8','II-M-9','II-CE','II-P', \
                           'II-R-1','II-RX'])
        cusip_list = ['07389RAA4','07389RAB2','07389RAC0','07389RAD8','07389RAE6','07389RAF3', \
                     '07389RAG1', '07389RAH9','07389RAJ5','07389RAK2','07389RAL0','07389RAM8', \
                     '07389RBE5','07389RBD7','07389RBG0','07389RBH8','07389RBJ4','07389RBF2','07389RAN6', \
                     '07389RAP1','07389RAQ9','07389RAR7','07389RAS5','07389RAT3','07389RAU0', \
                     '07389RAV8','07389RAW6','07389RAX4','07389RAY2','07389RAZ9','07389RBA3', \
                     '07389RBB1','07389RBK1','07389RBN5','07389RBL9','07389RBM7']
        from_excel['CUSIP'] = cusip_list
        data_list[i] = from_excel
        investor_rpt.close()
    data_df = pd.concat(data_list)
    data_df['Loss Amount'][data_df['Loss Amount']=="N/A"] = ""
    data_df['Loss Amount'][data_df['Loss Amount']!="N/A"] = \
        data_df['Loss Amount'][data_df['Loss Amount']!="N/A"].multiply(-1)
    data_df['Principal Adjustment or Loss'] = data_df['Realized Loss'] + data_df['Loss Amount']
    # Reverses the negation of the Loss Amount Recovered column, if I decide I actually want that data
    #data_df['Loss Amount'][data_df['Loss Amount']!="N/A"] = \
    #    data_df['Loss Amount'][data_df['Loss Amount']!="N/A"].multiply(-1)   
    data_df.drop(["Realized Loss", "Loss Amount","Total"],axis=1,inplace=True)
    data_df.rename(columns={'Through': 'Pass-Through Rate','Beginning':'Beginning Certificate Balance', \
                        'Principal':'Principal Payment','Interest':'Interest Payment', \
                        'Original':'Original Face Value', 'Ending':'Ending Certificate Balance'}, \
                        inplace=True) 
    data_df['Date'] = pd.to_datetime(data_df['Date'])
    data_df.set_index('Date',append=True,inplace=True)
    data_df = data_df.reorder_levels(['Date','Class'])
    data_df = data_df[['CUSIP','Original Face Value','Beginning Certificate Balance','Principal Payment', \
                      'Principal Adjustment or Loss', 'Ending Certificate Balance', 'Interest Payment', \
                       'Pass-Through Rate']]
    return data_df

In [5]:
data_new_format = extract_new_layout("June 2013 Through March 2020")

In [6]:
data_old_format = extract_old_layout("2007 Through May 2013")

In [7]:
data_old_format

Unnamed: 0_level_0,Unnamed: 1_level_0,CUSIP,Original Face Value,NaN,Beginning Certificate Balance,Principal Payment,Principal Adjustment or Loss,Ending Certificate Balance,Interest Payment,Pass-Through Rate
Date,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-01-25,I-A-1,07389RAA4,1.475150e+08,,1.475150e+08,2576941.66,0.0,1.449381e+08,604073.92,0.0546
2007-01-25,I-A-2,07389RAB2,6.774700e+07,,6.774700e+07,0.00,0.0,6.774700e+07,281996.89,0.0555
2007-01-25,I-A-3,07389RAC0,1.121300e+07,,1.121300e+07,0.00,0.0,1.121300e+07,47010.50,0.0559
2007-01-25,I-M-1,07389RAD8,1.723500e+07,,1.723500e+07,0.00,0.0,1.723500e+07,73550.36,0.0569
2007-01-25,I-M-2,07389RAE6,1.981300e+07,,1.981300e+07,0.00,0.0,1.981300e+07,84700.58,0.057
...,...,...,...,...,...,...,...,...,...,...
2013-05-28,II-M-9,07389RBB1,1.120500e+07,,0.000000e+00,0.00,0.0,0.000000e+00,0.00,0.024502
2013-05-28,II-CE,07389RBK1,8.300254e+08,N,3.152241e+08,0.00,0.0,3.108594e+08,0.00,
2013-05-28,II-P,07389RBN5,1.000000e+02,,1.000000e+02,0.00,0.0,1.000000e+02,0.00,0.0
2013-05-28,II-R-1,07389RBL9,0.000000e+00,,0.000000e+00,0.00,0.0,0.000000e+00,0.00,0.0


In [8]:
combined = pd.concat([data_old_format,data_new_format],sort=False)
with open('../../data/mbs_data_pickled/bond_pmts.pkl','wb') as f:
    pickle.dump(combined,f)

In [9]:
# EXAMPLE FUNCTION: Find the data for the last month in which security I-M-4 still had any principal outstanding
combined.xs('I-M-4',level=1)[combined.xs('I-M-4',level=1)['Beginning Certificate Balance']!=0].iloc[-1]

CUSIP                            07389RAG1
Original Face Value              7732000.0
NaN                                    NaN
Beginning Certificate Balance     54582.22
Principal Payment                      0.0
Principal Adjustment or Loss      54582.22
Ending Certificate Balance             0.0
Interest Payment                     30.66
Pass-Through Rate                 0.006741
Name: 2013-09-25 00:00:00, dtype: object