In [209]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import unicodedata
import os
import webbrowser
import html5lib
from openpyxl import workbook
from datetime import datetime
import requests

In [210]:
path = '../Master_tables_MFIC_Investment.xlsx'
# dataframes = pd.read_excel(path, sheet_name=None)

xls = pd.ExcelFile(path)
all_sheets = pd.read_excel(path, keep_default_na=False, na_values=[
                           '_'], sheet_name=None)

dataframes = {}
# Loop through each sheet and create a DataFrame in the dictionary
for sheet_name, sheet_df in all_sheets.items():
    dataframes[sheet_name.replace(' ', '_')] = sheet_df
    print(f"DataFrame name: {sheet_name} : {sheet_df.shape}")


process_tables = {}
process_tables_shape = {}
if not os.path.exists('../PT_csv_file'):
    os.makedirs('../PT_csv_file')

headers = {}


def run_process_function(dataframes, process_tables, process_tables_shape):
    path = '../process_tables_GBDC_Investment.xlsx'
    writer = pd.ExcelWriter(path=path, engine='openpyxl')
    for dataframe in dataframes:
        print(dataframe)
        processed_table = process_table_function(dataframes[dataframe])
        process_tables[dataframe] = processed_table
        process_tables_shape[dataframe] = processed_table.shape
        headers[dataframe] = processed_table.columns.values
        processed_table.to_excel(
            writer, sheet_name=dataframe.replace(',', ''), index=False)
        processed_table.to_csv(
            '../PT_csv_file/'+dataframe.replace(',', '')+'.csv')

        writer.book.save(path)
    writer.close()


def shape(count, df):
    print(f"{count} : shape : {df.shape}")
    count += 1
    return count


def dropna_col_row(df):
    df = df.dropna(how='all', axis=0).reset_index(drop=True)
    df = df.dropna(how='all', axis=1).reset_index(drop=True)
    return df


def drop_if_contain(pattern, df):
    matching_rows = df.apply(
        lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)
    df = df[~matching_rows]
    return df


column_pattern = {
    r'^Industry\s*\((\d+)\)$': 'Industry',
    r'^Industry\s+/\s+Company$': 'Company',
    r'^Industry\s*/\s*Company$': 'Company',
    r'^Investment\s+Type$': 'Investment_Type',
    r'^Interest\s+Rate\s*\((\d+)\)$': 'Interest_Rate',
    r'^Maturity\s+Date$': 'Maturity_Date',
    r'^Par[\s/]+Shares\s*\((\d+)\)$': 'Par_Shares',
    r'^Cost\s*\((\d+)\)$': 'Cost',
    r'^Fair\s+Value\s*\((\d+)\)$': 'Fair_Value',
    r'^Fair\s+Value\s+\((\d+)\)$': 'Fair_Value',
    r'^Fair\s+Value\s*\(\d+\)\(\d+\)$': 'Fair_Value',
    r'^ASC\s+820\s+Level\s*\((\d+)\)$': 'ASC_820_Level',
    r'^Par\s+Amount\*?$': 'Par Amount',
    r'^Par\s+Amount\s*\((\d+)\)$': 'Par Amount',
    r'^Par\s+Amount\*\s*\((\d+)\)$': 'Par Amount',
    r'^Par\s+Amount\(\d+\)$': 'Par Amount',
    r'^Par\s+Amount\s*\(\d+\)$': 'Par Amount',
    r'^INVESTMENTS\s+IN\s+NON-CONTROLLED/NON-AFFILIATED\s+INVESTMENTS\d+(\.\d+)?\(\d+\)$': 'Company',
    r'^Fair\s+Value\s*\(\d+\)\s*\(\d+\)$': 'Fair_Value',
    r'^Fair\s+Value\s+\(\d+\)\s+\(\d+\)$': 'Fair_Value',
    r'^Fair\s+Value\s+\(\d+\)$': 'Fair_Value',
    r'^Fair\s+Value\s*\(\d+\)$': 'Fair_Value',
    r'^Par\s*\(\d+\)$': 'Par',
    r'^Par\s*\(\d+\)\s*$': 'Par'
}


def rename_columns_with_pattern(df):
    df.columns = df.columns.to_series().replace(column_pattern, regex=True)

DataFrame name: September 30 2023 : (983, 45)
DataFrame name: June 30 2023 : (1043, 45)
DataFrame name: March 31 2023 : (986, 45)
DataFrame name: December 31 2022 : (890, 45)
DataFrame name: September 30 2022 : (950, 45)
DataFrame name: June 30 2022 : (961, 50)
DataFrame name: March 31 2022 : (915, 50)
DataFrame name: December 31 2021 : (935, 50)
DataFrame name: September 30 2021 : (982, 50)
DataFrame name: June 30 2021 : (1050, 50)
DataFrame name: March 31 2021 : (1006, 50)
DataFrame name: December 31 2020 : (1023, 50)
DataFrame name: September 30 2020 : (1059, 50)
DataFrame name: June 30 2020 : (1109, 50)
DataFrame name: March 31 2020 : (1114, 50)
DataFrame name: December 31 2019 : (872, 27)
DataFrame name: September 30 2019 : (639, 21)
DataFrame name: June 30 2019 : (582, 21)
DataFrame name: March 31 2019 : (492, 21)
DataFrame name: December 31 2018 : (422, 21)
DataFrame name: September 30 2018 : (403, 26)
DataFrame name: June 30 2018 : (369, 21)
DataFrame name: March 31 2018 : (367

In [215]:

def process_table_function(soi_table_df):
    count = 1
    count = shape(count, soi_table_df)
    soi_table_df = soi_table_df.replace(
        r'^\s*\$\s*$', '', regex=True).replace(r'\n', '', regex=True)
    soi_table_df = soi_table_df.replace('-', '0')
    soi_table_df = soi_table_df.replace('€', np.nan)
    soi_table_df = soi_table_df.replace('C$', np.nan)
    soi_table_df = soi_table_df.replace('£', np.nan)
    soi_table_df = soi_table_df.replace('CAD', np.nan)

    soi_table_df = dropna_col_row(soi_table_df)
    soi_table_df = soi_table_df.apply(
        lambda x: x.strip() if isinstance(x, str) else x)
    count = shape(count, soi_table_df)

    # drops all the extra top row
    pattern = r'Industry'
    matching_rows = soi_table_df.apply(
        lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)
    # Check if the pattern exists in the DataFrame
    if matching_rows.any():
        # Extract rows from the first occurrence onwards
        soi_table_df = soi_table_df.iloc[matching_rows.idxmax():].reset_index(
            drop=True)
    count = shape(count, soi_table_df)

    # drops all the extra bottom row
    pattern = r'Total\s+Investments'
    # Use the apply function to check if the pattern is in any column for each row
    matching_rows = soi_table_df.apply(
        lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)
    # Find the index of the first row that matches the pattern
    # Slice the DataFrame to keep only the rows up to and including the first matching row
    if soi_table_df[matching_rows].index[0] < 20:
        soi_table_df = soi_table_df.loc[:soi_table_df[matching_rows].index[1]].reset_index(
            drop=True)
    else:
        soi_table_df = soi_table_df.loc[:soi_table_df[matching_rows].index[0]].reset_index(
            drop=True)
    count = shape(count, soi_table_df)

    # drop nan col row
    soi_table_df = dropna_col_row(soi_table_df)
    count = shape(count, soi_table_df)

    # drops the sub total
    soi_table_df = soi_table_df.dropna(subset=[soi_table_df.columns[0]])
    count = shape(count, soi_table_df)

    soi_table_df = soi_table_df.replace('', np.nan)
    col_indices = [0]
    soi_table_df.iloc[:, col_indices] = soi_table_df.iloc[:, col_indices].fillna(
        method='ffill')
    count = shape(count, soi_table_df)

    for index, row in soi_table_df.iterrows():
        cleanedList = [x for x in list(row) if str(x) != 'nan']
        row = pd.Series(cleanedList)
        soi_table_df.loc[index] = row

    # Separate header and data
    soi_table_df = soi_table_df.rename(
        columns=soi_table_df.iloc[0]).drop(soi_table_df.index[0])
    # drops all the rows that contains header
    soi_table_df = soi_table_df[soi_table_df[soi_table_df.columns[0]]
                                != soi_table_df.columns[0]]

    soi_table_df = dropna_col_row(soi_table_df)
    count = shape(count, soi_table_df)

    pattern = r'(?:Spread\s*Above|cost|Percentage|Above)'
    soi_table_df = drop_if_contain(pattern, soi_table_df)
    pattern = r'^([Tt]otal)'
    soi_table_df = drop_if_contain(pattern, soi_table_df)
    count = shape(count, soi_table_df)

    if soi_table_df.columns[0].replace(" ", "") == 'Industry/Company':
        try:
            soi_table_df.insert(0, 'Industry', '')

            for index, row in soi_table_df.iterrows():
                if row.nunique() == 2:
                    soi_table_df.at[index, 'Industry'] = row.iloc[1]
            soi_table_df['Industry'] = soi_table_df['Industry'].replace(
                '', np.nan)
            col_indices = [0]
            soi_table_df.iloc[:, col_indices] = soi_table_df.iloc[:, col_indices].fillna(
                method='ffill')

        except Exception as e:
            print(f'Industry/Company can\'t be found: {e}')

    soi_table_df = soi_table_df.dropna(thresh=5)
    count = shape(count, soi_table_df)
    rename_columns_with_pattern(soi_table_df)

    headers = soi_table_df.columns
    print(headers)

    return soi_table_df


run_process_function(dataframes=dataframes, process_tables=process_tables,
                     process_tables_shape=process_tables_shape)

September_30_2023
1 : shape : (983, 45)
2 : shape : (983, 45)
3 : shape : (952, 45)
4 : shape : (633, 45)
5 : shape : (633, 45)
6 : shape : (633, 45)
7 : shape : (633, 45)
8 : shape : (615, 8)
9 : shape : (589, 8)
10 : shape : (308, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (308, 9)
June_30_2023
1 : shape : (1043, 45)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


2 : shape : (1043, 45)
3 : shape : (1011, 45)
4 : shape : (668, 45)
5 : shape : (668, 45)
6 : shape : (668, 45)
7 : shape : (668, 45)
8 : shape : (649, 8)
9 : shape : (624, 8)
10 : shape : (334, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (334, 9)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2023
1 : shape : (986, 45)
2 : shape : (986, 45)
3 : shape : (954, 45)
4 : shape : (618, 45)
5 : shape : (618, 45)
6 : shape : (618, 45)
7 : shape : (618, 45)
8 : shape : (601, 8)
9 : shape : (575, 8)
10 : shape : (307, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (307, 9)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2022
1 : shape : (890, 45)
2 : shape : (890, 45)
3 : shape : (860, 45)
4 : shape : (546, 45)
5 : shape : (546, 45)
6 : shape : (546, 45)
7 : shape : (546, 45)
8 : shape : (532, 8)
9 : shape : (509, 8)
10 : shape : (269, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (269, 9)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2022
1 : shape : (950, 45)
2 : shape : (950, 45)
3 : shape : (918, 45)
4 : shape : (584, 45)
5 : shape : (584, 45)
6 : shape : (584, 45)
7 : shape : (584, 45)
8 : shape : (569, 8)
9 : shape : (542, 8)
10 : shape : (285, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (285, 9)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2022
1 : shape : (961, 50)
2 : shape : (961, 50)
3 : shape : (929, 50)
4 : shape : (592, 50)
5 : shape : (592, 50)
6 : shape : (592, 50)
7 : shape : (592, 50)
8 : shape : (577, 9)
9 : shape : (549, 9)
10 : shape : (287, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (287, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2022
1 : shape : (915, 50)
2 : shape : (915, 50)
3 : shape : (915, 50)
4 : shape : (587, 50)
5 : shape : (587, 50)
6 : shape : (587, 50)
7 : shape : (587, 50)
8 : shape : (572, 9)
9 : shape : (545, 9)
10 : shape : (285, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (285, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2021
1 : shape : (935, 50)
2 : shape : (935, 50)
3 : shape : (935, 50)
4 : shape : (591, 50)
5 : shape : (591, 50)
6 : shape : (591, 50)
7 : shape : (591, 50)
8 : shape : (576, 9)
9 : shape : (549, 9)
10 : shape : (288, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (288, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2021
1 : shape : (982, 50)
2 : shape : (982, 50)
3 : shape : (982, 50)
4 : shape : (639, 50)
5 : shape : (639, 50)
6 : shape : (639, 50)
7 : shape : (639, 50)
8 : shape : (623, 9)
9 : shape : (596, 9)
10 : shape : (322, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (322, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2021
1 : shape : (1050, 50)
2 : shape : (1050, 50)
3 : shape : (1018, 50)
4 : shape : (692, 50)
5 : shape : (692, 50)
6 : shape : (692, 50)
7 : shape : (692, 50)
8 : shape : (673, 9)
9 : shape : (647, 9)
10 : shape : (379, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (379, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2021
1 : shape : (1006, 50)
2 : shape : (1006, 50)
3 : shape : (1006, 50)
4 : shape : (678, 50)
5 : shape : (678, 50)
6 : shape : (678, 50)
7 : shape : (678, 50)
8 : shape : (649, 9)
9 : shape : (623, 9)
10 : shape : (365, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (365, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2020
1 : shape : (1023, 50)
2 : shape : (1023, 50)
3 : shape : (1023, 50)
4 : shape : (690, 50)
5 : shape : (690, 50)
6 : shape : (690, 50)
7 : shape : (690, 50)
8 : shape : (667, 9)
9 : shape : (640, 9)
10 : shape : (370, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (370, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2020
1 : shape : (1059, 50)
2 : shape : (1059, 50)
3 : shape : (1059, 50)
4 : shape : (724, 50)
5 : shape : (724, 50)
6 : shape : (724, 50)
7 : shape : (724, 50)
8 : shape : (698, 9)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


9 : shape : (668, 9)
10 : shape : (391, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (391, 10)
June_30_2020
1 : shape : (1109, 50)
2 : shape : (1109, 50)
3 : shape : (1077, 50)
4 : shape : (745, 50)
5 : shape : (745, 50)
6 : shape : (745, 50)
7 : shape : (745, 50)
8 : shape : (714, 9)
9 : shape : (684, 9)
10 : shape : (406, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (406, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2020
1 : shape : (1114, 50)
2 : shape : (1114, 50)
3 : shape : (1114, 50)
4 : shape : (802, 50)
5 : shape : (802, 50)
6 : shape : (802, 50)
7 : shape : (802, 50)
8 : shape : (774, 9)
9 : shape : (744, 9)
10 : shape : (456, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (456, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2019
1 : shape : (872, 27)
2 : shape : (872, 27)
3 : shape : (839, 27)
4 : shape : (740, 27)
5 : shape : (740, 27)
6 : shape : (740, 27)
7 : shape : (740, 27)
8 : shape : (688, 9)
9 : shape : (658, 9)
10 : shape : (389, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (389, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2019
1 : shape : (639, 21)
2 : shape : (639, 21)
3 : shape : (606, 21)
4 : shape : (528, 21)
5 : shape : (528, 21)
6 : shape : (528, 21)
7 : shape : (528, 21)
8 : shape : (503, 9)
9 : shape : (475, 9)
10 : shape : (345, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (345, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2019
1 : shape : (582, 21)
2 : shape : (582, 21)
3 : shape : (549, 21)
4 : shape : (478, 21)
5 : shape : (478, 21)
6 : shape : (478, 21)
7 : shape : (478, 21)
8 : shape : (449, 9)
9 : shape : (422, 9)
10 : shape : (308, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (308, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2019
1 : shape : (492, 21)
2 : shape : (492, 21)
3 : shape : (458, 21)
4 : shape : (403, 21)
5 : shape : (403, 21)
6 : shape : (403, 21)
7 : shape : (403, 21)
8 : shape : (379, 9)
9 : shape : (353, 9)
10 : shape : (254, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (254, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2018
1 : shape : (422, 21)
2 : shape : (422, 21)
3 : shape : (385, 21)
4 : shape : (349, 21)
5 : shape : (349, 21)
6 : shape : (349, 21)
7 : shape : (349, 21)
8 : shape : (334, 9)
9 : shape : (309, 9)
10 : shape : (221, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (221, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2018
1 : shape : (403, 26)
2 : shape : (403, 26)
3 : shape : (365, 26)
4 : shape : (314, 26)
5 : shape : (314, 26)
6 : shape : (314, 26)
7 : shape : (314, 26)
8 : shape : (299, 9)
9 : shape : (273, 9)
10 : shape : (195, 10)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan,
                     nan],
      dtype='object')
11 : shape : (195, 10)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2018
1 : shape : (369, 21)
2 : shape : (369, 21)
3 : shape : (331, 21)
4 : shape : (294, 21)
5 : shape : (294, 21)
6 : shape : (294, 21)
7 : shape : (294, 21)
8 : shape : (277, 8)
9 : shape : (251, 8)
10 : shape : (177, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (177, 9)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2018
1 : shape : (367, 21)
2 : shape : (367, 21)
3 : shape : (331, 21)
4 : shape : (295, 21)
5 : shape : (295, 21)
6 : shape : (295, 21)
7 : shape : (295, 21)
8 : shape : (282, 8)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


9 : shape : (257, 8)
10 : shape : (184, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (184, 9)
December_31_2017
1 : shape : (337, 21)
2 : shape : (337, 21)
3 : shape : (300, 21)
4 : shape : (264, 21)
5 : shape : (264, 21)
6 : shape : (264, 21)
7 : shape : (264, 21)
8 : shape : (254, 8)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


9 : shape : (229, 8)
10 : shape : (164, 9)
Index([       'Industry',         'Company', 'Investment_Type',
         'Interest_Rate',   'Maturity_Date',      'Par_Shares',
                  'Cost',      'Fair_Value',               nan],
      dtype='object')
11 : shape : (164, 9)
September_30_2017
1 : shape : (367, 21)
2 : shape : (367, 21)
3 : shape : (329, 21)
4 : shape : (296, 21)
5 : shape : (296, 21)
6 : shape : (296, 21)
7 : shape : (296, 21)
8 : shape : (286, 8)
9 : shape : (190, 8)
10 : shape : (160, 8)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value', 'ASC_820_Level'],
      dtype='object')
11 : shape : (160, 8)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2017
1 : shape : (307, 22)
2 : shape : (307, 22)
3 : shape : (271, 22)
4 : shape : (241, 22)
5 : shape : (241, 22)
6 : shape : (241, 22)
7 : shape : (241, 22)
8 : shape : (232, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


9 : shape : (199, 7)
10 : shape : (167, 7)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (167, 7)
March_31_2017
1 : shape : (250, 22)
2 : shape : (250, 22)
3 : shape : (250, 22)
4 : shape : (188, 22)
5 : shape : (188, 22)
6 : shape : (188, 22)
7 : shape : (188, 22)
8 : shape : (180, 7)
9 : shape : (156, 7)
10 : shape : (139, 7)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (139, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2016
1 : shape : (311, 21)
2 : shape : (311, 21)
3 : shape : (272, 21)
4 : shape : (222, 21)
5 : shape : (222, 21)
6 : shape : (222, 21)
7 : shape : (222, 21)
8 : shape : (214, 7)
9 : shape : (181, 7)
10 : shape : (149, 7)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (149, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2016
1 : shape : (301, 19)
2 : shape : (301, 19)
3 : shape : (265, 19)
4 : shape : (219, 19)
5 : shape : (219, 19)
6 : shape : (219, 19)
7 : shape : (219, 19)
8 : shape : (212, 7)
9 : shape : (179, 7)
10 : shape : (147, 7)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (147, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2016
1 : shape : (255, 19)
2 : shape : (255, 19)
3 : shape : (219, 19)
4 : shape : (211, 19)
5 : shape : (211, 19)
6 : shape : (211, 19)
7 : shape : (211, 19)
8 : shape : (204, 7)
9 : shape : (173, 7)
10 : shape : (143, 7)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (143, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2016
1 : shape : (263, 19)
2 : shape : (263, 19)
3 : shape : (227, 19)
4 : shape : (219, 19)
5 : shape : (219, 19)
6 : shape : (219, 19)
7 : shape : (219, 19)
8 : shape : (211, 7)
9 : shape : (180, 7)
10 : shape : (150, 7)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (150, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2015
1 : shape : (276, 19)
2 : shape : (276, 19)
3 : shape : (241, 19)
4 : shape : (206, 19)
5 : shape : (206, 19)
6 : shape : (206, 19)
7 : shape : (206, 19)
8 : shape : (198, 7)
9 : shape : (169, 7)
10 : shape : (141, 7)
Index(['Investment', 'Industry', 'Interest_Rate', 'Maturity_Date', 'Par',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (141, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2015
1 : shape : (246, 19)
2 : shape : (246, 19)
3 : shape : (245, 19)
4 : shape : (146, 19)
5 : shape : (146, 19)
6 : shape : (146, 19)
7 : shape : (146, 19)
8 : shape : (139, 7)
9 : shape : (125, 7)
10 : shape : (106, 7)
Index(['Company', 'Interest_Rate', 'Maturity_Date', 'Industry', 'Par Amount',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (106, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2015
1 : shape : (246, 19)
2 : shape : (246, 19)
3 : shape : (245, 19)
4 : shape : (149, 19)
5 : shape : (149, 19)
6 : shape : (149, 19)
7 : shape : (149, 19)
8 : shape : (142, 7)
9 : shape : (128, 7)
10 : shape : (111, 7)
Index(['Company', 'Interest Rate', 'Maturity_Date', 'Industry', 'Par Amount',
       'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (111, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2015
1 : shape : (256, 19)
2 : shape : (256, 19)
3 : shape : (255, 19)
4 : shape : (212, 19)
5 : shape : (212, 19)
6 : shape : (212, 19)
7 : shape : (212, 19)
8 : shape : (206, 7)
9 : shape : (172, 7)
10 : shape : (137, 7)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED INVESTMENTS0121.6 (10)',
       'Interest Rate', 'Maturity_Date', 'Industry', 'Par Amount', 'Cost',
       'Fair_Value'],
      dtype='object')
11 : shape : (137, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2014
1 : shape : (257, 19)
2 : shape : (257, 19)
3 : shape : (256, 19)
4 : shape : (150, 19)
5 : shape : (150, 19)
6 : shape : (150, 19)
7 : shape : (150, 19)
8 : shape : (144, 7)
9 : shape : (131, 7)
10 : shape : (115, 7)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED INVESTMENTS0128.7 (18)',
       'Interest Rate', 'Maturity_Date', 'Industry', 'Par Amount', 'Cost',
       'Fair_Value'],
      dtype='object')
11 : shape : (115, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2014
1 : shape : (260, 19)
2 : shape : (260, 19)
3 : shape : (259, 19)
4 : shape : (167, 19)
5 : shape : (167, 19)
6 : shape : (167, 19)
7 : shape : (167, 19)
8 : shape : (160, 7)
9 : shape : (147, 7)
10 : shape : (129, 7)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED INVESTMENTS0138.0 (18)',
       'Interest Rate', 'Maturity_Date', 'Industry', 'Par Amount', 'Cost',
       'Fair_Value'],
      dtype='object')
11 : shape : (129, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2014
1 : shape : (280, 19)
2 : shape : (280, 19)
3 : shape : (279, 19)
4 : shape : (169, 19)
5 : shape : (169, 19)
6 : shape : (169, 19)
7 : shape : (169, 19)
8 : shape : (163, 7)
9 : shape : (150, 7)
10 : shape : (134, 7)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED INVESTMENTS0136.8',
       'Interest Rate', 'Maturity_Date', 'Industry', 'Par Amount', 'Cost',
       'Fair_Value'],
      dtype='object')
11 : shape : (134, 7)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


March_31_2014
1 : shape : (299, 15)
2 : shape : (299, 15)
3 : shape : (265, 15)
4 : shape : (156, 15)
5 : shape : (156, 15)
6 : shape : (156, 15)
7 : shape : (156, 15)
8 : shape : (151, 5)
9 : shape : (139, 5)
10 : shape : (125, 5)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED INVESTMENTS0134.1',
       'Industry', 'Par Amount', 'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (125, 5)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


December_31_2013
1 : shape : (265, 15)
2 : shape : (265, 15)
3 : shape : (265, 15)
4 : shape : (166, 15)
5 : shape : (166, 15)
6 : shape : (166, 15)
7 : shape : (166, 15)
8 : shape : (161, 5)
9 : shape : (149, 5)
10 : shape : (137, 5)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED INVESTMENTS 0137.3',
       'Industry', 'Par Amount', 'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (137, 5)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


September_30_2013
1 : shape : (262, 15)
2 : shape : (262, 15)
3 : shape : (262, 15)
4 : shape : (157, 15)
5 : shape : (157, 15)
6 : shape : (157, 15)
7 : shape : (157, 15)
8 : shape : (156, 5)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


9 : shape : (139, 5)
10 : shape : (128, 5)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED INVESTMENTS 0136.1',
       'Industry', 'Par Amount', 'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (128, 5)
March_31_2013
1 : shape : (185, 15)
2 : shape : (185, 15)
3 : shape : (185, 15)
4 : shape : (140, 15)
5 : shape : (140, 15)
6 : shape : (140, 15)
7 : shape : (140, 15)
8 : shape : (135, 5)
9 : shape : (122, 5)
10 : shape : (110, 5)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-  AFFILIATED INVESTMENTS 0 143.9',
       'Industry', 'Par Amount', 'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (110, 5)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


June_30_2013
1 : shape : (229, 23)
2 : shape : (229, 23)
3 : shape : (229, 23)
4 : shape : (151, 23)
5 : shape : (151, 23)
6 : shape : (151, 23)
7 : shape : (151, 23)
8 : shape : (149, 5)
9 : shape : (135, 5)
10 : shape : (123, 5)
Index(['INVESTMENTS IN NON-CONTROLLED/NON-AFFILIATED  INVESTMENTS 0140.3',
       'Industry', 'Par Amount', 'Cost', 'Fair_Value'],
      dtype='object')
11 : shape : (123, 5)


  lambda row: row.str.contains(pattern, flags=re.IGNORECASE, regex=True).any(), axis=1)


In [212]:
headers

{'September_30_2023': array(['Industry', 'Company', 'Investment_Type', 'Interest Rate',
        'Maturity_Date', 'Par_Shares', 'Cost', 'Fair_Value', nan, nan],
       dtype=object),
 'June_30_2023': array(['Industry', 'Company', 'Investment_Type', 'Interest Rate',
        'Maturity_Date', 'Par_Shares', 'Cost', 'Fair_Value', nan, nan],
       dtype=object),
 'March_31_2023': array(['Industry', 'Company', 'Investment_Type', 'Interest Rate',
        'Maturity_Date', 'Par_Shares', 'Cost', 'Fair_Value', nan, nan],
       dtype=object),
 'December_31_2022': array(['Industry', 'Company', 'Investment_Type', 'Interest Rate',
        'Maturity_Date', 'Par_Shares', 'Cost', 'Fair_Value', nan, nan],
       dtype=object),
 'September_30_2022': array(['Industry', 'Company', 'Investment_Type', 'Interest Rate',
        'Maturity_Date', 'Par_Shares', 'Cost', 'Fair_Value', nan, nan],
       dtype=object),
 'June_30_2022': array(['Industry', 'Company', 'Investment_Type', 'Interest Rate',
        'Maturity