In [323]:
from bs4 import BeautifulSoup
import re
import unicodedata
import pandas as pd
import numpy as np
import html5lib
import requests
from openpyxl import Workbook
from datetime import datetime

In [324]:
def parse_and_trim(content, content_type):
    if content_type == 'HTML':
        soup = BeautifulSoup(content, 'html.parser')
    else:
        soup = BeautifulSoup(content, 'html.parser')

    for tag in soup.recursiveChildGenerator():
        try:
            tag.attrs = None
        except AttributeError:
            pass

    for linebreak in soup.find_all('br'):
        linebreak.extract()

    return soup

In [325]:
def remove_multiple_spaces(string):
    pattern = r'\s+'
    replaced_string = re.sub(pattern, ' ', string)
    return replaced_string


def find_qrt_date(content):
    qtr_date = content.find_all(text=re.compile(
        r'for\s+(the\s+)?(fiscal\s+)?year\s+ended\s+|for\s+the\s+quarter\s+ended\s+|for\s+the\s+quarterly\s+period\s+ended\s+', re.IGNORECASE))
    qtr_match = re.search(
        r'([A-Za-z]+)\s+(\d{1,2}),\s+(\d{4})', qtr_date[0].replace('\n', ''))
    if qtr_match is None:
        qtr_match = qtr_match = re.search(
            r'([A-Za-z]+) (\d{1,2}), (\d{4})', qtr_date[1])
    return remove_multiple_spaces(str(qtr_match.group()))

In [326]:
headers = {
    'User-Agent': 'ARES CAPITAL CORP'
}
filing_links = pd.read_excel(
    "/Users/fuadhassan/Desktop/BDC_RA/ARCC/ARCC__sec_filing_links.xlsx")
filing_links.head()

Unnamed: 0,Form type,Form description,Filing date,Reporting date,Filings URL
0,10-Q,Quarterly report [Sections 13 or 15(d)],2023-07-25,2023-06-30,https://www.sec.gov/Archives/edgar/data/128775...
1,10-Q,Quarterly report [Sections 13 or 15(d)],2023-04-25,2023-03-31,https://www.sec.gov/Archives/edgar/data/128775...
2,10-K/A,"Annual report [Section 13 and 15(d), not S-K I...",2023-03-31,2022-12-31,https://www.sec.gov/Archives/edgar/data/128775...
3,10-K,"Annual report [Section 13 and 15(d), not S-K I...",2023-02-07,2022-12-31,https://www.sec.gov/Archives/edgar/data/128775...
4,10-Q,Quarterly report [Sections 13 or 15(d)],2022-10-25,2022-09-30,https://www.sec.gov/Archives/edgar/data/128775...


In [327]:
# drops all the amendment filing
filing_links = filing_links.drop(filing_links[filing_links['Form description'].str.contains(
    'amendment', case=False)].index).reset_index(drop=True)
filing_links.head()

Unnamed: 0,Form type,Form description,Filing date,Reporting date,Filings URL
0,10-Q,Quarterly report [Sections 13 or 15(d)],2023-07-25,2023-06-30,https://www.sec.gov/Archives/edgar/data/128775...
1,10-Q,Quarterly report [Sections 13 or 15(d)],2023-04-25,2023-03-31,https://www.sec.gov/Archives/edgar/data/128775...
2,10-K,"Annual report [Section 13 and 15(d), not S-K I...",2023-02-07,2022-12-31,https://www.sec.gov/Archives/edgar/data/128775...
3,10-Q,Quarterly report [Sections 13 or 15(d)],2022-10-25,2022-09-30,https://www.sec.gov/Archives/edgar/data/128775...
4,10-Q,Quarterly report [Sections 13 or 15(d)],2022-07-26,2022-06-30,https://www.sec.gov/Archives/edgar/data/128775...


In [328]:
date_columns = ['Filing date', 'Reporting date']
for col in date_columns:
    filing_links[col] = pd.to_datetime(filing_links[col], format='%Y-%m-%d')
for col in date_columns:
    filing_links[col] = filing_links[col].dt.strftime("%B %d, %Y")

In [329]:
filing_links.head()

Unnamed: 0,Form type,Form description,Filing date,Reporting date,Filings URL
0,10-Q,Quarterly report [Sections 13 or 15(d)],"July 25, 2023","June 30, 2023",https://www.sec.gov/Archives/edgar/data/128775...
1,10-Q,Quarterly report [Sections 13 or 15(d)],"April 25, 2023","March 31, 2023",https://www.sec.gov/Archives/edgar/data/128775...
2,10-K,"Annual report [Section 13 and 15(d), not S-K I...","February 07, 2023","December 31, 2022",https://www.sec.gov/Archives/edgar/data/128775...
3,10-Q,Quarterly report [Sections 13 or 15(d)],"October 25, 2022","September 30, 2022",https://www.sec.gov/Archives/edgar/data/128775...
4,10-Q,Quarterly report [Sections 13 or 15(d)],"July 26, 2022","June 30, 2022",https://www.sec.gov/Archives/edgar/data/128775...


In [330]:
filing_links.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Form type         52 non-null     object
 1   Form description  52 non-null     object
 2   Filing date       52 non-null     object
 3   Reporting date    52 non-null     object
 4   Filings URL       52 non-null     object
dtypes: object(5)
memory usage: 2.2+ KB


In [331]:
# url = filing_links.iloc[1]['Filings URL']
# date = filing_links.iloc[1]['Reporting date']
# url, date

In [332]:
# response = requests.get(url, headers=headers)
# content = parse_and_trim(response.content, 'HTML')

In [333]:
# real one on git hub


# def extract_tables(soup_content, qtr_date):
#     master_table = None
#     all_tags = soup_content.find_all(True)
#     print(type(all_tags))
#     count = 0
#     for tag in soup_content.find_all(text=re.compile('^.*CONSOLIDATED\s+SCHEDULE(S|)\s+OF\s+INVESTMENTS.*$')):
#         print('yes')
#         date_str = re.search(r'([A-Za-z]+) (\d{1,2}), (\d{4})', tag)
#         print(date_str)
#         if date_str is None:
#             next_line = tag.find_next(text=re.compile(
#                 r'([A-Za-z]+) (\d{1,2}), (\d{4})')).text
#             date_str = re.search(r'([A-Za-z]+) (\d{1,2}), (\d{4})', next_line)
#         if date_str is None:
#             next_line = tag.next.next.next.next.next.next.text
#             date_str = re.search(r'([A-Za-z]+) (\d{1,2}), (\d{4})', next_line)
#         if date_str is not None:
#             date_str = str(date_str.group())
#             date_str = unicodedata.normalize('NFKD', date_str)
#             print(date_str)
#             if qtr_date.replace(',', '').strip().lower() in date_str.replace(',', '').strip().lower():
#                 count += 1
#                 print('Table found: ')
#                 print('Table #', count)
#                 html_table = tag.find_next('table')
#                 if master_table is None:
#                     master_table = pd.read_html(
#                         html_table.prettify(), skiprows=0, flavor='bs4')[0]
#                     master_table = master_table.applymap(lambda x: unicodedata.normalize(
#                         'NFKD', x.strip().strip(u'\u200b').replace('—', '-')) if type(x) == str else x)
#                     master_table = master_table.replace(r'^\s*$', np.nan, regex=True).replace(r'^\s*\$\s*$', np.nan,
#                                                                                               regex=True)
#                     master_table = master_table.dropna(how='all', axis=0)
#                 else:
#                     new_table = pd.read_html(
#                         html_table.prettify(), skiprows=0, flavor='bs4')[0]
#                     new_table = new_table.applymap(lambda x: unicodedata.normalize(
#                         'NFKD', x.strip().strip(u'\u200b').replace('—', '-')) if type(x) == str else x)
#                     new_table = new_table.replace(r'^\s*$', np.nan, regex=True).replace(r'^\s*\$\s*$', np.nan,
#                                                                                         regex=True)
#                     new_table = new_table.dropna(how='all', axis=0)
#                     # print('head')
#                     # print(new_table.head()) # text
#                     master_table = master_table.append(
#                         new_table.dropna(how='all', axis=0).reset_index(
#                             drop=True).drop(index=0),
#                         ignore_index=True)

#     master_table = master_table.applymap(
#         lambda x: x.strip().strip(u'\u200b') if type(x) == str else x)
#     master_table = master_table.replace(r'^\s*$', np.nan, regex=True).replace(
#         r'^\s*\$\s*$', np.nan, regex=True).replace(r'^\s*\)\s*$', np.nan, regex=True)
#     return master_table

In [334]:
def extract_tables(soup_content, qtr_date):
    master_table = None
    all_tags = soup_content.find_all(True)
    count = 0
    for tag in content.find_all(text=re.compile('^\s*.*\s*CONSOLIDATED\s+SCHEDULE(S|)\s+OF\s+INVESTMENTS\s*.*\s*$')):
        next_line_text = tag.next.text.strip()
        regex_pattern = r'([A-Za-z]+\s+\d{1,2},\s+\d{4})'
        date_str = re.search(regex_pattern, next_line_text)
        if date_str is None:
            next_line_text = tag.find_next(text=re.compile(regex_pattern)).text
            date_str = re.search(regex_pattern, next_line_text)
        if date_str is None:
            next_line = tag.next.text.strip()
            date_str = re.search(
                regex_pattern, next_line)
        if date_str is not None:
            date_str = str(date_str.group(1))
            date_str = unicodedata.normalize('NFKD', date_str)
            if qtr_date.replace(',', '').strip().lower() in date_str.replace(',', '').strip().lower():
                count += 1
                # print('Table #', count)
                html_table = tag.find_next('table')

                new_table = pd.read_html(
                    html_table.prettify(), skiprows=0, flavor='bs4')[0]
                new_table = new_table.applymap(lambda x: unicodedata.normalize(
                    'NFKD', x.strip().strip(u'\u200b').replace('—', '-')) if type(x) == str else x)
                new_table = new_table.replace(
                    r'^\s*$', np.nan, regex=True).replace(r'^\s*\$\s*$', np.nan, regex=True)
                new_table = new_table.dropna(how='all', axis=0)

                if master_table is None:
                    master_table = new_table
                else:
                    master_table = pd.concat(
                        [master_table, new_table], ignore_index=True)

    master_table = master_table.applymap(
        lambda x: x.strip().strip(u'\u200b') if type(x) == str else x)
    master_table = master_table.replace(r'^\s*$', np.nan, regex=True).replace(
        r'^\s*\$\s*$', np.nan, regex=True).replace(r'^\s*\)\s*$', np.nan, regex=True)
    return master_table

In [335]:
def process_table(soi_table_df, append_str):
    soi_table_df = soi_table_df.replace(r'^\s*\$\s*$', np.nan, regex=True)
    soi_table_df = soi_table_df.dropna(how='all', axis=1)
    soi_table_df = soi_table_df.dropna(
        how='all', axis=0).reset_index(drop=True)
    # print('1: ' + str(soi_table_df.shape))

    # Separate header and data
    soi_table_header = soi_table_df.iloc[0].dropna(how='any')
    soi_table_data_df = soi_table_df.rename(
        columns=soi_table_df.iloc[0]).drop(soi_table_df.index[0])
    # print('2: ' + str(soi_table_data_df.shape))

    soi_table_data_df = soi_table_data_df[soi_table_data_df[soi_table_data_df.columns[0]]
                                          != soi_table_data_df.columns[0]]
    # print('2: ' + str(soi_table_data_df.shape))

    # Drop rows with only two non-null values
    soi_table_data_df = soi_table_data_df.dropna(thresh=3)

    # print('4: ' + str(soi_table_data_df.shape))

    soi_table_data_df = soi_table_data_df.replace('-', 0, regex=False)

    # original_column_names = soi_table_data_df.columns.tolist()
    # soi_table_data_df.columns = soi_table_data_df.columns.str.replace(
    #     ' ', '')
    columns_to_fill = ['Amortized Cost', 'Fair Value']
    for col in columns_to_fill:
        col_index = soi_table_data_df.columns.str.replace(
            ' ', '').get_loc(col.replace(' ', ''))
        next_col_index = col_index + 1
        for i in range(len(soi_table_data_df)):
            current_value = soi_table_data_df.iat[i, col_index]
            if pd.isna(current_value) and next_col_index < len(soi_table_data_df.columns):
                next_valid_index = next((j for j, v in enumerate(
                    soi_table_data_df.iloc[i, next_col_index:], start=next_col_index) if pd.notna(v)), None)

                if next_valid_index is not None:
                    next_value = soi_table_data_df.iat[i, next_valid_index]
                    soi_table_data_df.iat[i, col_index] = next_value
                    soi_table_data_df.iat[i, next_valid_index] = pd.NA
    # soi_table_data_df.columns = original_column_names

    # drops everything after % of Net Assets
    # if 'FairValue' in soi_table_data_df.columns.str.replace(' ', ''):
    #     start_index = soi_table_data_df.columns.str.replace(
    #         ' ', '').get_loc('FairValue')
    #     soi_table_data_df = soi_table_data_df.iloc[:, :start_index+1]
    if 'FairValue' in soi_table_data_df.columns.str.replace(' ', ''):
        start_index = soi_table_data_df.columns.str.replace(
            ' ', '').get_loc('FairValue')
        soi_table_data_df = soi_table_data_df.iloc[:, :start_index+1]

    # Drop rows with only two non-null values
    soi_table_data_df = soi_table_data_df.dropna(thresh=3)

    # Drop rows labeled as subtotals
    # subtotal_rows = soi_table_data_df[soi_table_data_df['Company (1)'].str.replace(' ', '').str.contains(
    #     'subtotal', case=False, na=False)]
    # soi_table_data_df = soi_table_data_df[~soi_table_data_df.index.isin(
    #     subtotal_rows.index)]
    # print('3: ' + str(soi_table_data_df.shape))

    # # Drop rows based on regex pattern (e.g., 'subtotal' or 'total')
    # pattern = r'^([Ss]ubtotal)|([Tt]otal)'
    # mask = soi_table_data_df.apply(lambda row: row.astype(
    #     str).str.contains(pattern, case=False, na=False)).any(axis=1)
    # soi_table_data_df = soi_table_data_df[~mask]
    # # print('4: ' + str(soi_table_data_df.shape))

    # Drop rows with all missing values
    soi_table_df = soi_table_df.dropna(how='all')
    # print('5: ' + str(soi_table_data_df.shape))

    # # # Drop columns with all missing values
    soi_table_data_df = soi_table_data_df.dropna(how='all', axis=1)
    # # print('6: ' + str(soi_table_data_df.shape))

    # Forward fill the first two columns
    col_indices = [0, 1]
    soi_table_data_df.iloc[:, col_indices] = soi_table_data_df.iloc[:, col_indices].fillna(
        method='ffill')
    # print('7: ' + str(soi_table_data_df.shape))

    soi_table_data_df = soi_table_data_df.fillna(0)
    # soi_table_data_df = soi_table_data_df.dropna(how='all', axis=1)

    cols_to_convert = ['Shares/Units', 'Principal',
                       'Amortized Cost', 'Fair Value']
    for col in cols_to_convert:
        if col.replace(' ', '') in soi_table_data_df.columns.str.replace(' ', ''):
            col_index = soi_table_data_df.columns.str.replace(
                ' ', '').get_loc(col.replace(' ', ''))
            # soi_table_data_df.iloc[:, col_index] = soi_table_data_df.iloc[:, col_index].str.replace(
            #     '$', '')
            soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
                soi_table_data_df.iloc[:, col_index], errors='coerce').fillna(0)

    # columns_to_drop = []
    # for column in soi_table_data_df.columns:
    #     # Check for NaN values in the column
    #     # Use .item() to get a single boolean value
    #     if soi_table_data_df[column].isna().any().item():
    #         columns_to_drop.append(column)

    # # soi_table_data_df.drop(columns=columns_to_drop, inplace=True)

    # # soi_table_data_df = soi_table_data_df.replace('-', 0, regex=False)
    # soi_table_data_df.to_excel('test.xlsx')

    # print('8: ' + str(soi_table_data_df.shape))

    return soi_table_data_df

In [336]:
url = filing_links.iloc[0]['Filings URL']
date = filing_links.iloc[0]['Reporting date']
url, date
response = requests.get(url, headers=headers)
content = parse_and_trim(response.content, 'HTML')

In [337]:

master_table = extract_tables(content, date)
process_table_ = process_table(master_table, "")
process_table_.to_excel("ex.xlsx")
process_table_.to_csv('ex.csv')
process_table_

  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(


Unnamed: 0,Company (1),Business Description,Investment,Coupon (3),Reference (7),Spread (3),Acquisition Date,Maturity Date,Shares/Units,Principal,NaN,Amortized Cost,Fair Value
2,"2U, Inc.",Provider of course design and learning managem...,First lien senior secured loan,11.32 %,SOFR (M),6.50 %,01/2023,12/2026,0.0,0.0,4.7,4.4,4.5
3,"AffiniPay Midco, LLC and AffiniPay Intermediat...",Payment processing solution provider,First lien senior secured loan,10.20 %,SOFR (A),5.50 %,02/2020,06/2028,0.0,63.0,0.0,63.0,61.8
4,"AffiniPay Midco, LLC and AffiniPay Intermediat...",Payment processing solution provider,First lien senior secured loan,10.39 %,SOFR (A),5.50 %,06/2022,06/2028,0.0,120.0,0.0,118.0,117.6
5,"AffiniPay Midco, LLC and AffiniPay Intermediat...",Payment processing solution provider,Senior subordinated loan,15.06 % PIK,SOFR (Q),10.00 %,02/2020,06/2030,0.0,61.0,0.0,61.0,59.8
7,"Anaplan, Inc. (15)",Provider of cloud-based connected planning pla...,First lien senior secured loan,11.60 %,SOFR (M),6.50 %,06/2022,06/2029,0.0,1.8,0.0,1.8,1.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1554,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",First lien senior secured revolving loan,10.72 %,SOFR (Q),5.50 %,08/2018,08/2024,0.0,3.7,0.0,3.7,3.7
1555,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",First lien senior secured loan,11.00 %,SOFR (Q),5.50 %,07/2017,08/2024,0.0,29.4,0.0,29.4,29.4
1556,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",First lien senior secured loan,11.00 %,SOFR (Q),5.50 %,08/2018,08/2024,0.0,1.1,0.0,1.1,1.1
1557,"Flinn Scientific, Inc. and WCI-Quantum Holding...","Distributor of instructional products, service...",Series A preferred stock,0,0,0,10/2014,0,1272.0,0.0,0.0,0.7,1.4


In [338]:
# filing_links = filing_links.iloc[10]

In [339]:
count = 0
path = '/Users/fuadhassan/Desktop/BDC_RA/ARCC/ARCC_Investment.xlsx'
writer = pd.ExcelWriter(path, engine='openpyxl')

for qtr_date, html_link in zip(filing_links['Reporting date'], filing_links['Filings URL']):
    print('start')
    response = requests.get(html_link, headers=headers)
    content = parse_and_trim(response.content, 'HTML')
    print('content DONE')
    master_table = extract_tables(content, qtr_date)
    print(count, "master_table DONE")
    processed_table_ = process_table(
        master_table, qtr_date.replace(',', ''))
    # print(processed_table_)
    processed_table_.to_excel(
        writer, sheet_name=qtr_date.replace(',', ''), index=False)
    processed_table_.to_csv('csv_file/'+qtr_date.replace(',', '')+'.csv')
    print(count, "processed_table_ DONE")
    count += 1
    writer.save()
# writer.save()
writer.close()

start
content DONE
0 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


0 processed_table_ DONE
start
content DONE
1 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(


1 processed_table_ DONE


  writer.save()


start
content DONE
2 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


2 processed_table_ DONE
start
content DONE
3 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


3 processed_table_ DONE
start
content DONE
4 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


4 processed_table_ DONE
start
content DONE
5 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


5 processed_table_ DONE
start
content DONE
6 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


6 processed_table_ DONE
start
content DONE
7 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


7 processed_table_ DONE
start
content DONE
8 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


8 processed_table_ DONE
start
content DONE
9 master_table DONE
9 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
10 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


10 processed_table_ DONE
start
content DONE
11 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(


11 processed_table_ DONE


  writer.save()


start
content DONE
12 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


12 processed_table_ DONE
start
content DONE
13 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


13 processed_table_ DONE
start
content DONE
14 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


14 processed_table_ DONE
start
content DONE
15 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


15 processed_table_ DONE
start
content DONE
16 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


16 processed_table_ DONE
start
content DONE
17 master_table DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


17 processed_table_ DONE
start
content DONE
18 master_table DONE
18 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
19 master_table DONE
19 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
20 master_table DONE
20 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
21 master_table DONE
21 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
22 master_table DONE
22 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
23 master_table DONE
23 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
24 master_table DONE
24 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
25 master_table DONE
25 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
26 master_table DONE
26 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
27 master_table DONE
27 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
28 master_table DONE
28 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
29 master_table DONE
29 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
30 master_table DONE
30 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
31 master_table DONE
31 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
32 master_table DONE
32 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
33 master_table DONE
33 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
34 master_table DONE
34 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
35 master_table DONE
35 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
36 master_table DONE
36 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
37 master_table DONE
37 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
38 master_table DONE
38 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
39 master_table DONE
39 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
40 master_table DONE
40 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
41 master_table DONE
41 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
42 master_table DONE
42 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
43 master_table DONE
43 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
44 master_table DONE
44 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
45 master_table DONE
45 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
46 master_table DONE
46 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
47 master_table DONE
47 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
48 master_table DONE
48 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
49 master_table DONE
49 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
50 master_table DONE
50 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


start
content DONE
51 master_table DONE
51 processed_table_ DONE


  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  soi_table_data_df.iloc[:, col_index] = pd.to_numeric(
  writer.save()


In [340]:
# # url = filing_links.iloc[0]['Filings URL']
# # date = filing_links.iloc[0]['Reporting date']
# # url, date
# # response = requests.get(url, headers=headers)
# # content = parse_and_trim(response.content, 'HTML')
# master_table = extract_tables(content, date)
# process_table_ = process_table(master_table, "")
# process_table_.to_excel("ex.xlsx")
# process_table_.to_csv('ex.csv')
# process_table_

In [341]:
print('done')

done
