In [41]:
from bs4 import BeautifulSoup
import re
import unicodedata
import pandas as pd
import numpy as np
import html5lib
import requests
from openpyxl import Workbook
from datetime import datetime

## parses and trims HTML content by removing all attributes from HTML tags and removing line break tags (<br>) from the content

In [42]:
def parse_and_trim(content, content_type):
    if content_type == 'HTML':
        soup = BeautifulSoup(content, 'html.parser')
    else:
        soup = BeautifulSoup(content, 'html.parser')
    for tag in soup.recursiveChildGenerator():
        try:
            tag.attrs = None
        except AttributeError:
            pass
    for linebreak in soup.find_all('br'):
        linebreak.extract()
    return soup

## remove_multiple_spaces to replace multiple spaces with a single space in a string, and find_qrt_date to extract and format a quarterly date from text content.

In [43]:
def remove_multiple_spaces(string):
    pattern = r'\s+'
    replaced_string = re.sub(pattern, ' ', string)
    return replaced_string


def find_qrt_date(content):
    qtr_date = content.find_all(text=re.compile(
        r'for\s+(the\s+)?(fiscal\s+)?year\s+ended\s+|for\s+the\s+quarter\s+ended\s+|for\s+the\s+quarterly\s+period\s+ended\s+', re.IGNORECASE))
    qtr_match = re.search(
        r'([A-Za-z]+)\s+(\d{1,2}),\s+(\d{4})', qtr_date[0].replace('\n', ''))
    if qtr_match is None:
        qtr_match = qtr_match = re.search(
            r'([A-Za-z]+) (\d{1,2}), (\d{4})', qtr_date[1])
    return remove_multiple_spaces(str(qtr_match.group()))

In [45]:
headers = {
    'User-Agent': 'ARES CAPITAL CORP'
}
filing_links = pd.read_excel(
    "../ARCC__sec_filing_links.xlsx")
filing_links.head()

Unnamed: 0,Form type,Form description,Filing date,Reporting date,Filings URL
0,10-Q,Quarterly report [Sections 13 or 15(d)],2010-11-04,2010-09-30,https://www.sec.gov/Archives/edgar/data/128775...
1,10-K,"Annual report [Section 13 and 15(d), not S-K I...",2011-03-01,2010-12-31,https://www.sec.gov/Archives/edgar/data/128775...
2,10-Q,Quarterly report [Sections 13 or 15(d)],2011-05-03,2011-03-31,https://www.sec.gov/Archives/edgar/data/128775...
3,10-Q,Quarterly report [Sections 13 or 15(d)],2011-08-04,2011-06-30,https://www.sec.gov/Archives/edgar/data/128775...
4,10-Q,Quarterly report [Sections 13 or 15(d)],2011-11-08,2011-09-30,https://www.sec.gov/Archives/edgar/data/128775...


In [32]:
# drops all the amendment filing
filing_links = filing_links.drop(filing_links[filing_links['Form description'].str.contains(
    'amendment', case=False)].index).reset_index(drop=True)
filing_links['Reporting date'] = pd.to_datetime(filing_links['Reporting date'])
filing_links = filing_links[filing_links['Reporting date'] >= '2013-03-31']
filing_links.head()

Unnamed: 0,Form type,Form description,Filing date,Reporting date,Filings URL
10,10-Q,Quarterly report [Sections 13 or 15(d)],2013-05-07,2013-03-31,https://www.sec.gov/Archives/edgar/data/128775...
11,10-Q,Quarterly report [Sections 13 or 15(d)],2013-08-06,2013-06-30,https://www.sec.gov/Archives/edgar/data/128775...
12,10-Q,Quarterly report [Sections 13 or 15(d)],2013-11-05,2013-09-30,https://www.sec.gov/Archives/edgar/data/128775...
13,10-K,"Annual report [Section 13 and 15(d), not S-K I...",2014-02-26,2013-12-31,https://www.sec.gov/Archives/edgar/data/128775...
14,10-Q,Quarterly report [Sections 13 or 15(d)],2014-05-06,2014-03-31,https://www.sec.gov/Archives/edgar/data/128775...


In [33]:
date_columns = ['Filing date', 'Reporting date']
for col in date_columns:
    filing_links[col] = pd.to_datetime(filing_links[col], format='%Y-%m-%d')
for col in date_columns:
    filing_links[col] = filing_links[col].dt.strftime("%B %d, %Y")
filing_links.head()

Unnamed: 0,Form type,Form description,Filing date,Reporting date,Filings URL
10,10-Q,Quarterly report [Sections 13 or 15(d)],"May 07, 2013","March 31, 2013",https://www.sec.gov/Archives/edgar/data/128775...
11,10-Q,Quarterly report [Sections 13 or 15(d)],"August 06, 2013","June 30, 2013",https://www.sec.gov/Archives/edgar/data/128775...
12,10-Q,Quarterly report [Sections 13 or 15(d)],"November 05, 2013","September 30, 2013",https://www.sec.gov/Archives/edgar/data/128775...
13,10-K,"Annual report [Section 13 and 15(d), not S-K I...","February 26, 2014","December 31, 2013",https://www.sec.gov/Archives/edgar/data/128775...
14,10-Q,Quarterly report [Sections 13 or 15(d)],"May 06, 2014","March 31, 2014",https://www.sec.gov/Archives/edgar/data/128775...


In [34]:
filing_links.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 42 entries, 10 to 51
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Form type         42 non-null     object
 1   Form description  42 non-null     object
 2   Filing date       42 non-null     object
 3   Reporting date    42 non-null     object
 4   Filings URL       42 non-null     object
dtypes: object(5)
memory usage: 2.0+ KB


In [35]:
def extract_tables_manual(soup_content, qtr_date):
    date_regex_pattern1 = r'([A-Za-z]+\s+\d{1,2},\s+\d{4})'
    date_regex_pattern2 = r'\bAs\s+of\s+([A-Za-z]+\s+\d{1,2},\s+\d{4})\b'
    master_table = None
    for tag in soup_content.find_all(text=re.compile(date_regex_pattern2)):
        date_str = re.search(date_regex_pattern1, tag.text)
        find_next = tag.find_next().text
        next_line = tag.next.text
        if re.search('dollar amounts', find_next) or re.search('dollar amounts', next_line):
            # print(date_str.group(1))
            if date_str is not None:
                date_str = str(date_str.group(1))
                date_str = unicodedata.normalize('NFKD', date_str)
            if qtr_date.replace(',', '').strip().lower() in date_str.replace(',', '').strip().lower():
                html_table = tag.find_next('table')
                while html_table:
                    new_table = pd.read_html(
                        html_table.prettify(), skiprows=0, flavor='bs4')[0]
                    new_table = new_table.applymap(lambda x: unicodedata.normalize(
                        'NFKD', x.strip().strip(u'\u200b').replace('—', '-')) if type(x) == str else x)
                    new_table = new_table.replace(
                        r'^\s*$', np.nan, regex=True).replace(r'^\s*\$\s*$', np.nan, regex=True)
                    new_table = new_table.dropna(how='all', axis=0)

                    if master_table is None:
                        master_table = new_table
                    else:
                        master_table = pd.concat(
                            [master_table, new_table], ignore_index=True)

                    if date_str.replace(',', '').strip().lower() in 'December 31, 2013'.replace(',', '').strip().lower() or date_str.replace(',', '').strip().lower() in 'December 31, 2014'.replace(',', '').strip().lower():
                        if html_table.find(text=re.compile(r'Food and Beverage', re.IGNORECASE)):
                            break
                    if date_str.replace(',', '').strip().lower() in 'December 31, 2015'.replace(',', '').strip().lower() or date_str.replace(',', '').strip().lower() in 'December 31, 2016'.replace(',', '').strip().lower():
                        if html_table.find(text=re.compile(r'Computers and Electronics', re.IGNORECASE)):
                            break
                    html_table = html_table.find_next('table')

    master_table = master_table.applymap(
        lambda x: x.strip().strip(u'\u200b') if type(x) == str else x)
    master_table = master_table.replace(r'^\s*$', np.nan, regex=True).replace(
        r'^\s*\$\s*$', np.nan, regex=True).replace(r'^\s*\)\s*$', np.nan, regex=True)
    print(master_table.shape)
    return master_table

In [36]:
def process_table(soi_table_df, append_str):
    soi_table_df = soi_table_df.replace(r'^\s*\$\s*$', np.nan, regex=True)
    soi_table_df = soi_table_df.dropna(how='all', axis=1)
    soi_table_df = soi_table_df.dropna(
        how='all', axis=0).reset_index(drop=True)
    # print('1: ' + str(soi_table_df.shape))

    # Separate header and data
    soi_table_header = soi_table_df.iloc[0].dropna(how='any')
    soi_table_data_df = soi_table_df.rename(
        columns=soi_table_df.iloc[0]).drop(soi_table_df.index[0])
    # print('2: ' + str(soi_table_data_df.shape))

    # drops all the rows that contains header
    soi_table_data_df = soi_table_data_df[soi_table_data_df[soi_table_data_df.columns[0]]
                                          != soi_table_data_df.columns[0]]

    # print('3: ' + str(soi_table_data_df.shape))

# keeps the Industry row
    soi_table_data_df['get_Industry'] = None

    for index, row in soi_table_data_df.iterrows():
        if row.count() == 1:
            soi_table_data_df.loc[index+1, 'get_Industry'] = row.iloc[0]
            soi_table_data_df = soi_table_data_df.drop(index)

    soi_table_data_df.insert(0, 'Industry', soi_table_data_df['get_Industry'])
    # Drop rows with only two non-null values becuase all the subtotal contain 2 value only
    soi_table_data_df = soi_table_data_df.dropna(thresh=3)
    soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)

    # replace all the - in the data table with 0
    soi_table_data_df = soi_table_data_df.replace('-', 0, regex=False)

    # fix the all the nan value column , Amortized Cost, Fair Value
    columns_to_fill = ['Amortized Cost', 'Fair Value']
    for col in columns_to_fill:
        col_index = soi_table_data_df.columns.str.replace(
            ' ', '').get_loc(col.replace(' ', ''))
        next_col_index = col_index + 1
        for i in range(len(soi_table_data_df)):
            current_value = soi_table_data_df.iat[i, col_index]
            if pd.isna(current_value) and next_col_index < len(soi_table_data_df.columns):
                next_valid_index = next((j for j, v in enumerate(
                    soi_table_data_df.iloc[i, next_col_index:], start=next_col_index) if pd.notna(v)), None)

                if next_valid_index is not None:
                    next_value = soi_table_data_df.iat[i, next_valid_index]
                    soi_table_data_df.iat[i, col_index] = next_value
                    soi_table_data_df.iat[i, next_valid_index] = pd.NA

    # drops everything after FairValue
    if 'FairValue' in soi_table_data_df.columns.str.replace(' ', ''):
        start_index = soi_table_data_df.columns.str.replace(
            ' ', '').get_loc('FairValue')
        soi_table_data_df = soi_table_data_df.iloc[:, :start_index+1]

    # Drop rows with only two non-null values this one recheacks

    # Forward fill the first two columns
    col_indices = [0, 1, 2]
    soi_table_data_df.iloc[:, col_indices] = soi_table_data_df.iloc[:, col_indices].fillna(
        method='ffill')
    # print('7: ' + str(soi_table_data_df.shape))

    # Drop rows with all missing values
    soi_table_df = soi_table_df.dropna(how='all', axis=1)
    # print('5: ' + str(soi_table_data_df.shape))

    # Drop columns with all missing values
    soi_table_data_df = soi_table_data_df.dropna(how='all', axis=1)
    # print('6: ' + str(soi_table_data_df.shape))

    cols_to_convert = ['Shares/Units', 'Principal',
                       'Amortized Cost', 'Fair Value']
    for col in cols_to_convert:
        if col.replace(' ', '') in soi_table_data_df.columns.str.replace(' ', ''):
            col_index = soi_table_data_df.columns.str.replace(
                ' ', '').get_loc(col.replace(' ', ''))
            converted_data = pd.to_numeric(
                soi_table_data_df.iloc[:, col_index], errors='coerce').fillna(0)
            soi_table_data_df[soi_table_data_df.columns[col_index]
                              ] = converted_data

    soi_table_data_df = soi_table_data_df.reset_index(drop=True)

    # print('Final: ' + str(soi_table_data_df.shape))

    return soi_table_data_df

In [37]:
count = 0
manual = ['December 31, 2013', 'December 31, 2014',
          'December 31, 2015', 'December 31, 2016']
path = '../ARCC_Investment.xlsx'
writer = pd.ExcelWriter(path, engine='openpyxl')
for qtr_date, html_link in zip(filing_links['Reporting date'], filing_links['Filings URL']):
    print(qtr_date)
    print('Starting file # ', count)
    response = requests.get(html_link, headers=headers)
    content = parse_and_trim(response.content, 'HTML')
    print('Getting content done for # ', count)
    if qtr_date in manual:
        master_table = extract_tables_manual(content, qtr_date)
    else:
        master_table = extract_tables(content, qtr_date)
    print("Done creating master_table # ", count)
    processed_table_ = process_table(
        master_table, qtr_date.replace(',', ''))
    processed_table_.to_excel(
        writer, sheet_name=qtr_date.replace(',', ''), index=False)
    processed_table_.to_csv(
        '../csv_file/'+qtr_date.replace(',', '')+'.csv')
    print("Done processed_table # ", count)
    count += 1
    writer.save()
writer.close()

March 31, 2013
Starting file #  0
Getting content done for #  0
(506, 18)
Done creating master_table #  0
Done processed_table #  0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


June 30, 2013
Starting file #  1
Getting content done for #  1
(536, 18)
Done creating master_table #  1
Done processed_table #  1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


September 30, 2013
Starting file #  2
Getting content done for #  2
(536, 18)
Done creating master_table #  2
Done processed_table #  2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


December 31, 2013
Starting file #  3
Getting content done for #  3
(571, 20)
Done creating master_table #  3


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  3
March 31, 2014
Starting file #  4
Getting content done for #  4
(596, 18)
Done creating master_table #  4
Done processed_table #  4


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


June 30, 2014
Starting file #  5
Getting content done for #  5
(589, 18)
Done creating master_table #  5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  5
September 30, 2014
Starting file #  6
Getting content done for #  6
(595, 18)
Done creating master_table #  6


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  6
December 31, 2014
Starting file #  7
Getting content done for #  7
(614, 20)
Done creating master_table #  7


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  7
March 31, 2015
Starting file #  8
Getting content done for #  8
(586, 18)
Done creating master_table #  8


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  8
June 30, 2015
Starting file #  9
Getting content done for #  9
(598, 18)
Done creating master_table #  9


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  9
September 30, 2015
Starting file #  10
Getting content done for #  10
(617, 18)
Done creating master_table #  10


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  10
December 31, 2015
Starting file #  11
Getting content done for #  11
(647, 20)
Done creating master_table #  11


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  11
March 31, 2016
Starting file #  12
Getting content done for #  12
(670, 18)
Done creating master_table #  12


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  12
June 30, 2016
Starting file #  13
Getting content done for #  13
(674, 18)
Done creating master_table #  13
Done processed_table #  13


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


September 30, 2016
Starting file #  14
Getting content done for #  14
(671, 18)
Done creating master_table #  14
Done processed_table #  14


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


December 31, 2016
Starting file #  15
Getting content done for #  15
(653, 20)
Done creating master_table #  15


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  15
March 31, 2017
Starting file #  16
Getting content done for #  16
(905, 20)
Done creating master_table #  16


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  16
June 30, 2017
Starting file #  17
Getting content done for #  17
(912, 20)
Done creating master_table #  17


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  17
September 30, 2017
Starting file #  18
Getting content done for #  18
(1007, 21)
Done creating master_table #  18


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  18
December 31, 2017
Starting file #  19
Getting content done for #  19
(1028, 20)
Done creating master_table #  19


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  19
March 31, 2018
Starting file #  20
Getting content done for #  20
(1059, 20)
Done creating master_table #  20


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  20
June 30, 2018
Starting file #  21
Getting content done for #  21
(1029, 20)
Done creating master_table #  21


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  21
September 30, 2018
Starting file #  22
Getting content done for #  22
(1033, 20)
Done creating master_table #  22


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  22
December 31, 2018
Starting file #  23
Getting content done for #  23
(1062, 20)
Done creating master_table #  23


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  23
March 31, 2019
Starting file #  24
Getting content done for #  24
(1125, 22)
Done creating master_table #  24


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  24
June 30, 2019
Starting file #  25
Getting content done for #  25
(1103, 22)
Done creating master_table #  25


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  25
September 30, 2019
Starting file #  26
Getting content done for #  26
(1180, 20)
Done creating master_table #  26


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  26
December 31, 2019
Starting file #  27
Getting content done for #  27
(1009, 48)
Done creating master_table #  27


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  27
March 31, 2020
Starting file #  28
Getting content done for #  28
(1112, 20)
Done creating master_table #  28


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  28
June 30, 2020
Starting file #  29
Getting content done for #  29
(1037, 20)
Done creating master_table #  29


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  29
September 30, 2020
Starting file #  30
Getting content done for #  30
(1029, 21)
Done creating master_table #  30


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  30
December 31, 2020
Starting file #  31
Getting content done for #  31
(1075, 45)
Done creating master_table #  31


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  31
March 31, 2021
Starting file #  32
Getting content done for #  32
(1078, 48)
Done creating master_table #  32


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  32
June 30, 2021
Starting file #  33
Getting content done for #  33
(1072, 48)
Done creating master_table #  33


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  33
September 30, 2021
Starting file #  34
Getting content done for #  34
(1089, 48)
Done creating master_table #  34


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  34
December 31, 2021
Starting file #  35
Getting content done for #  35
(1164, 45)
Done creating master_table #  35


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  35
March 31, 2022
Starting file #  36
Getting content done for #  36
(1219, 45)
Done creating master_table #  36


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  36
June 30, 2022
Starting file #  37
Getting content done for #  37
(1326, 45)
Done creating master_table #  37
Done processed_table #  37


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


September 30, 2022
Starting file #  38
Getting content done for #  38
(1353, 66)
Done creating master_table #  38


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  38
December 31, 2022
Starting file #  39
Getting content done for #  39
(1389, 66)
Done creating master_table #  39


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  39
March 31, 2023
Starting file #  40
Getting content done for #  40
(1405, 66)
Done creating master_table #  40


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  40
June 30, 2023
Starting file #  41
Getting content done for #  41
(1561, 66)
Done creating master_table #  41


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Done processed_table #  41


# NOW RUN THE `ARCC/Code/analyze_excel_file.ipynb`

# Testing
### to test one file at a time
#### you can use the index of the link to run

In [38]:

url = filing_links.iloc[-1]['Filings URL']
date = filing_links.iloc[-1]['Reporting date']
url, date
response = requests.get(url, headers=headers)
content = parse_and_trim(response.content, 'HTML')
master_table = extract_tables(content, date)
process_table_ = process_table(master_table, "")
#process_table_.to_excel("example.xlsx")
#process_table_.to_csv('example.csv')
process_table_

(1561, 66)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  soi_table_data_df['Industry'].fillna(method='ffill', inplace=True)


Unnamed: 0,Industry,Company (1),Business Description,Investment,Coupon (3),Reference (7),Spread (3),Acquisition Date,Maturity Date,Shares/Units,Principal,NaN,Amortized Cost,Fair Value
0,Software and Services,"2U, Inc.",Provider of course design and learning managem...,First lien senior secured loan,11.32 %,SOFR (M),6.50 %,01/2023,12/2026,0.0,0.0,4.7,4.4,4.5
1,Software and Services,"AffiniPay Midco, LLC and AffiniPay Intermediat...",Payment processing solution provider,First lien senior secured loan,10.20 %,SOFR (A),5.50 %,02/2020,06/2028,0.0,63.0,,63.0,61.8
2,Software and Services,"AffiniPay Midco, LLC and AffiniPay Intermediat...",Payment processing solution provider,First lien senior secured loan,10.39 %,SOFR (A),5.50 %,06/2022,06/2028,0.0,120.0,,118.0,117.6
3,Software and Services,"AffiniPay Midco, LLC and AffiniPay Intermediat...",Payment processing solution provider,Senior subordinated loan,15.06 % PIK,SOFR (Q),10.00 %,02/2020,06/2030,0.0,61.0,,61.0,59.8
4,Software and Services,"Anaplan, Inc. (15)",Provider of cloud-based connected planning pla...,First lien senior secured loan,11.60 %,SOFR (M),6.50 %,06/2022,06/2029,0.0,1.8,,1.8,1.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1180,Education,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",First lien senior secured loan,11.00 %,SOFR (Q),5.50 %,07/2017,08/2024,0.0,29.4,,29.4,29.4
1181,Education,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",First lien senior secured loan,11.00 %,SOFR (Q),5.50 %,08/2018,08/2024,0.0,1.1,,1.1,1.1
1182,Education,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",Series A preferred stock,,,,10/2014,,1272.0,0.0,,0.7,1.4
1183,Education,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",,,,,,,0.0,0.0,,50.7,51.4
