In [1]:
import xlwings as xw
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle
xw.App.DisplayAlerts = False

# CURRENT PROGRESS 4/17/2020
ALL credit ratings data extracted and concatenated into one dataframe

Data saved as credit_ratings.pkl

In [2]:
# Special function needed for April 2007 because that report was only available as a PDF
# I did a PDF-to-Excel in Adobe, but it led to crazy formatting, so I had to set up a special
# function to select the correct cell ranges in the resulting sheet
def april_2007_ratings(subfolder):
    report_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/'
    filename = report_loc + subfolder + '/bear-stearns-2006-he10-investor-report-04-25-2007.xls'
    investor_rpt = xw.Book(filename)
    bond_pmts = investor_rpt.sheets['Ratings Information']
    output_df = bond_pmts.range('A1:J31').options(pd.DataFrame).value
    investor_rpt.close()
    output_df['Date'] = filename[-14:-4]
    output_df['Date'] = pd.to_datetime(output_df['Date'])
    output_df.set_index('Date',append=True,inplace=True)
    output_df = output_df.reorder_levels(['Date','Class'])
    output_df.columns = ['CUSIP','Fitch Original','Moody\'s Original','DBRS Original','S&P Original', \
                     'Fitch Current','Moody\'s Current','DBRS Current','S&P Current']
    output_df.drop(columns=['Fitch Original','DBRS Original','Fitch Current','DBRS Current'],inplace=True)
    return output_df

In [3]:
# The core data extraction function for June 2013 to March 2020
# This reads the Bond Payments sheet from every investor report in the given subfolder of my Office directory
def jan_feb_2007_ratings(subfolder):
    report_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + subfolder + '/'
    filename = report_loc + '/bear-stearns-2006-he10-investor-report-01-25-2007.xls'
    investor_rpt = xw.Book(filename)
    ratings_sheet = investor_rpt.sheets['Ratings Information']
    jan = ratings_sheet.range('A13:J51').options(pd.DataFrame).value
    jan['Date'] = filename[-14:-4]
    investor_rpt.close()
    filename = report_loc + '/bear-stearns-2006-he10-investor-report-02-26-2007.xls'
    investor_rpt = xw.Book(filename)
    feb = ratings_sheet.range('A13:J51').options(pd.DataFrame).value
    feb['Date'] = filename[-14:-4]
    investor_rpt.close()
    jan_feb = pd.concat([jan,feb],sort=False)
    jan_feb.index.name = 'Class'
    jan_feb.drop(labels=[jan_feb.iloc[28].name,jan_feb.iloc[0].name],inplace=True)
    jan_feb['Date'] = pd.to_datetime(jan_feb['Date'])
    jan_feb.set_index('Date',append=True,inplace=True)
    jan_feb = jan_feb.reorder_levels(['Date','Class'])
    jan_feb.columns = ['CUSIP','Fitch Original','Moody\'s Original','DBRS Original','S&P Original', \
                        'Fitch Current','Moody\'s Current','DBRS Current','S&P Current']
    jan_feb.drop(columns=['Fitch Original','DBRS Original','Fitch Current','DBRS Current'],inplace=True)
    return jan_feb.iloc[0:30],jan_feb.iloc[30:]

In [4]:
# The core data extraction function for January 2007 through May 2013
# This reads the Ratings Information sheet from every investor report in the given subfolder of my Office directory
def extract_old_layout(subfolder):
    reports_loc = '/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/' + subfolder + '/'
    reports_dir = os.listdir(reports_loc)
    reports_dir.sort(key=lambda z: z[-8:]+z[-14:-12])
    data_list = [x for x in range(len(reports_dir) - 1)]
    for i in range(len(reports_dir) - 1):
        filename = reports_loc + reports_dir[i]
        if reports_dir[i] == 'bear-stearns-2006-he10-investor-report-01-25-2007.XLS':
            jan_feb_data = jan_feb_2007_ratings(subfolder)
            data_list[i] = jan_feb_data[0]
            data_list[i+1] = jan_feb_data[1]
            continue
        if reports_dir[i] == 'bear-stearns-2006-he10-investor-report-02-26-2007.XLS':
            continue
        if reports_dir[i] == 'bear-stearns-2006-he10-investor-report-04-25-2007.XLS':
            data_list[i] = april_2007_ratings(subfolder)
            continue
        book = xw.Book(filename)
        sheet = book.sheets['Ratings Information']
        sheet_top = sheet.range('A13:P40').options(pd.DataFrame).value
        new_sheet_top = sheet_top[['CUSIP','Moody\'s','S&P']]
        new_sheet_top.columns = ['CUSIP','Moody\'s Original','Moody\'s Current','S&P Original','S&P Current']
        new_sheet_top = new_sheet_top[['CUSIP','Moody\'s Original','S&P Original','Moody\'s Current','S&P Current']]
        new_sheet_top.drop([None],inplace=True)
        sheet_bottom = sheet.range('A47:P52').options(pd.DataFrame).value
        
        book.close()
        
        new_sheet_bottom = sheet_bottom[['CUSIP','Moody\'s','S&P']]
        new_sheet_bottom.columns = ['CUSIP','Moody\'s Original','Moody\'s Current','S&P Original','S&P Current']
        new_sheet_bottom = new_sheet_bottom[['CUSIP','Moody\'s Original','S&P Original','Moody\'s Current','S&P Current']]
        new_sheet_bottom.drop([None],inplace=True)
        one_month = pd.concat([new_sheet_top,new_sheet_bottom])
        one_month['Date'] = filename[-14:-4]
        one_month['Date'] = pd.to_datetime(one_month['Date'])
        one_month.set_index('Date',append=True,inplace=True)
        one_month = one_month.reorder_levels(['Date','Class'])
        data_list[i] = one_month
    final = pd.concat(data_list)
    return final

In [5]:
def extract_post_may_2013(df_pre_june_2013):
    directory = "/Users/Alex/Library/Group Containers/UBF8T346G9.Office/BSABS_2006-HE10_INVESTOR_REPORTS/"
    filename = "Credit Ratings After May 2013.xlsx"
    path = directory + filename
    book = xw.Book(path)
    sheet = book.sheets['Sheet1']
    data_list = [0 for x in range(30)]
    for i in range(len(data_list)):
        data_range = sheet[:83,:i+2]
        ratings = data_range.options(pd.DataFrame).value
        ratings = ratings[[ratings.columns[i]]]
        ratings['Class'] = ratings.columns[0]
        # Fill in the CUSIP, Moody's Original, S&P Original and S&P Current columns
        # to prepare for concatenation with the pre-June-2013 data
        ratings['CUSIP'] = df_pre_june_2013.xs(ratings.columns[0],level=1)['CUSIP'].iloc[0]
        ratings['Moody\'s Original'] = df_pre_june_2013.xs(ratings.columns[0],level=1)['Moody\'s Original'].iloc[0]
        ratings['S&P Original'] = df_pre_june_2013.xs(ratings.columns[0],level=1)['S&P Original'].iloc[0]
        ratings['S&P Current'] = None
        
        ratings.set_index(pd.to_datetime(ratings.index),inplace=True)
        ratings.set_index('Class',append=True,inplace=True)
        ratings.rename(columns={ratings.columns[0]:'Moody\'s Current'},inplace=True)
        ratings = ratings[['CUSIP','Moody\'s Original','S&P Original','Moody\'s Current','S&P Current']]
        data_list[i] = ratings
    all_securities = pd.concat(data_list)
    all_securities.sort_index(level=0,inplace=True)
    return all_securities

In [6]:
data_old_format = extract_old_layout("2007 Through May 2013")
data_new_format = extract_post_may_2013(data_old_format)

In [7]:
# Run the last data extraction function and concatenate everything into a single DataFrame
credit_ratings = pd.concat([data_old_format,data_new_format])
# Clean up the columns so that the Moody's and S&P Current columns contain ALL ratings info
# I haven't yet found a way to get the S&P ratings data for after May 2013
for x in credit_ratings.index:
    if pd.isnull(credit_ratings.loc[x,'Moody\'s Current']):
        credit_ratings.loc[x,'Moody\'s Current'] = credit_ratings.loc[x,'Moody\'s Original']
    if pd.isnull(credit_ratings.loc[x,'S&P Current']) and x[0] < pd.to_datetime('2013-06-25'):
        credit_ratings.loc[x,'S&P Current'] = credit_ratings.loc[x,'S&P Original']
credit_ratings.drop(columns=['Moody\'s Original','S&P Original'],inplace=True)
credit_ratings.rename(columns={'Moody\'s Current':'Moody\'s','S&P Current':'S&P'},inplace=True)

In [8]:
credit_ratings

Unnamed: 0_level_0,Unnamed: 1_level_0,CUSIP,Moody's,S&P
Date,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-01-25,I-A-1,07389RAA4,Aaa,AAA
2007-01-25,I-A-2,07389RAB2,Aaa,AAA
2007-01-25,I-A-3,07389RAC0,Aaa,AAA
2007-01-25,I-M-1,07389RAD8,Aaa,AA+
2007-01-25,I-M-2,07389RAE6,Aa1,AA
...,...,...,...,...
2020-03-25,II-M-6,07389RAY2,C,
2020-03-25,II-M-7,07389RAZ9,C,
2020-03-25,II-M-8,07389RBA3,C,
2020-03-25,II-M-9,07389RBB1,C,


In [9]:
# EXPORT FINAL DATA TO STABLE FILE
with open('../../data/mbs_data_pickled/credit_ratings.pkl','wb') as f:
    pickle.dump(credit_ratings,f)