In [1]:
import pandas as pd
import numpy as np
import glob
import re
import traceback
pd.set_option('display.max_columns', 500)

# Summary

<i>The overall objective of this program is to map raw data extracted from PDFs to different financial categories such as Tangible Assets, Intangible Assets, Current Liabilities, etc.In essence, the program reads each raw file in a given directory and produces a mapped file of each company's balance sheet.</i>

<ol>
    <li>Read each file and filter for balance sheet tab</li>
    <li>Filter each file by section and find and map items to their respective lists via regular expression</li>
    <li>return a mapped dataframe/object for each section</li>
    <li>Combined each mapped section to single Dataframe</li>
    <li>Modify master dataframe by only showing columns that have not been restated and removing old columns that have been restated.</li>
    <li>Produce Total Assets and Liabilities rows in the master dataframe</li>
</ol>

In [2]:
def generate_company_list():
    companies = []
    unique_list = []
    file_list = glob.glob("C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares/*.xlsx") # change filepath
    for file in file_list:
        match_obj = re.search("[A-Za-z]+\_\d+|[A-Za-z]+\s+[\&A-Za-z\s\_\d]+", file)
        if match_obj is not None:
            split_str = match_obj.group().split('_')
            split_str_item = split_str[0]
            companies.append(split_str_item)
    
    for company in companies:
        if company not in unique_list:
            unique_list.append(company)
    no_spaces = [company.strip() for company in unique_list]
    clean_list = [company for company in no_spaces if company]
    return clean_list

In [461]:
generate_company_list()

['Avetta',
 'Elmica Parent',
 'Flinn Scientific',
 'FMSystems Group',
 'Novipax Buyer']

In [3]:
#filters for the balance sheet tab
def filter_sheets(sheet_list, statement_type):
    if statement_type == 'Income Statement':
        filtered_list = [sheet for sheet in sheet_list if re.search('IS\s+\-\s+\d{1}', sheet)]
        if len(filtered_list) > 0:
            return filtered_list[0]
        else:
            return [] 
    elif statement_type == 'Balance Sheet':
        filtered_list = [sheet for sheet in sheet_list if re.findall('BS\s+\-\s+\d{1}', sheet)]
        if len(filtered_list) > 1:
            return filtered_list
        elif len(filtered_list) == 1:
            return filtered_list[0]
        else:
            return []
    elif statement_type == 'Cash Flow':
        filtered_list = [sheet for sheet in sheet_list if re.search('CF\s+\-\s+\d{1}', sheet)]
        if len(filtered_list) > 0:
            return filtered_list[0]
        else:
            return []

In [4]:
# Creates a date array for each file to be used in the mapped file
def create_date_period_array(file, sheet_name):
    df = pd.read_excel(file, sheet_name='Metadata')
    filtered_df = df.loc[df['Sheet name'] == sheet_name]
    filtered_df_period = filtered_df.filter(regex="Value\s+column\s+\d{1}\s+period\s+coverage", axis=1).dropna(axis=1)
    filtered_df_date = filtered_df.filter(regex="Value\s+column\s+\d{1}\s+date", axis=1).dropna(axis=1)
    return list(filtered_df_date.values[0]), list(filtered_df_period.values[0])

In [354]:
#test_df = pd.read_excel("C:\\Users\\curt.beck\\Downloads\\Cognaize\\Avetta_202012.xlsx", sheet_name="BS - 1")
test_dates, test_period_type = create_date_period_array("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\FMSystems Group_202012.xlsx", "BS - 1")

In [19]:
test_dates

['2018-12-31', '2017-12-31']

Each template section will be generated by function, which returns a dataframew

In [358]:
#Filters raw file for the current asset section of the balance sheet.
def filter_current_asset_section(df):
    copy_df = df
    item_list = copy_df[copy_df.columns[0]].tolist()
    
    if len(copy_df.iloc[:,0].str.extract(r'(^[Cc]urrent\s+[Aa]ssets\:?|^[C]URRENT\s+ASSETS\:?)').dropna().values)>0:
        start_match = copy_df.iloc[:,0].str.extract(r'(^[Cc]urrent\s+[Aa]ssets\:?|^[C]URRENT\s+ASSETS\:?)').dropna().values[0][0]
        start_index = item_list.index(start_match)
        start_index = start_index+1
    else:
        start_match = copy_df.iloc[:,0].str.extract(r'(Cash.*)').dropna().values[0][0]
        start_index = item_list.index(start_match)
    
    end_match = copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+[Cc]urrent\s+[Aa]ssets)').dropna().values[0][0]
    end_index = item_list.index(end_match)
    
    copy_df.set_index(copy_df.columns[0], inplace=True)
    filtered_df = copy_df.iloc[start_index:end_index]
    return filtered_df

In [359]:
df = pd.read_excel("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\FMSystems Group_202012.xlsx", sheet_name="BS - 1")
filter_current_asset_section(df)

Unnamed: 0_level_0,2020,2019
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1
Cash and cash equivalents,6257667.0,3280352.0
"Accounts receivable, net",12653695.0,14988141.0
Inventory,3117933.0,4134867.0
Prepaid expenses and other current assets,1969792.0,803194.0


In [266]:
def filter_non_current_asset_section(df):
    
    if not df.iloc[:, 0].str.extract(r'([Tt]otal\s+[Pp]rop.*)').dropna().empty:
        val = df.iloc[:, 0].str.extract(r'([Tt]otal\s+[Pp]rop.*)').dropna().values[0][0]
        df = df[df[df.columns[0]]!= val]
        
    copy_df = df
    copy_df2 = df
    item_list = copy_df[copy_df.columns[0]].tolist()
    
    start_match = copy_df.iloc[:,0].str.extract(r'(.*?[Tt]otal\s+[Cc]urrent\s+[Aa]ssets|TOTAL\s+CURRENT\s+ASSETS)').dropna().values[0][0]
    start_index = item_list.index(start_match)+1
    
    end_match = copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+[Aa]ssets|[Tt]otal\s+[lL]ong\-term.*|TOTAL\s+ASSETS|Total\s+[Oo]ther\s+[Aa]ssets)').dropna().values[0][0]
    end_index = item_list.index(end_match)
    
    
    copy_df.set_index(copy_df.columns[0], inplace=True)
    filtered_df = copy_df.iloc[start_index:end_index].dropna()
    
    if filtered_df.empty:
        copy_df2.reset_index(inplace=True)
        copy_df2[copy_df2.columns[0]] = copy_df2[copy_df2.columns[0]].astype(str)
        if not copy_df2[copy_df2.columns[0]].str.extract(r'(Non\-[Cc]urrent\s+[Aa]ssets)').dropna().empty:
            start_match = copy_df2[copy_df2.columns[0]].str.extract(r'(Non\-[Cc]urrent\s+[Aa]ssets)').dropna().values[0][0]
            start_index = item_list.index(start_match) + 1
        else:
            start_match = copy_df2[copy_df2.columns[0]].str.extract(r'([Pp]rop.*)').dropna().values[0][0]
            start_index = item_list.index(start_match)
        end_match = copy_df2.iloc[:,0].str.extract(r'([Tt]otal\s+[nN]on\-[cC]urrent\s+[Aa]ssets)').dropna().values[0][0]
        end_index = item_list.index(end_match)
        new_df = copy_df2.set_index(copy_df2.columns[0])
        filtered_df = new_df.iloc[start_index:end_index].dropna()
    #filtered_df.dropna(inplace=True)
    return filtered_df

In [267]:
df = pd.read_excel("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\Hurtigruten_202103.xlsx", sheet_name="BS - 2")
#df.head()
filter_non_current_asset_section(df)


Unnamed: 0_level_0,31.3.,31.3..1,31.12.
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Property, plant and equipment including right of use asset",5516,3211,5689
Intangible assets,1657,3520,1998
Loans to group companies,67186,12218,49960
Investment in subsidiaries,556287,552477,556287
Investment in other companies,279,324,279


In [7]:
#If multiple items are mapped to a single category, this function sums the values of those items, which will then be mapped to the appropriate category
def sum_multiple_rows(rows, df, counter):
    flat_rows = list(rows.flatten())
    rows_df = df[df[df.columns[0]].isin(flat_rows)]
    row_sum = sum(list(rows_df.iloc[:, counter+1].values))
    return row_sum

In [362]:
# Returns the mapped section of the current asset portion of the balance sheet
def current_assets(file, sheet_name, date_array):
    cash_list = []
    inventory_list = []
    other_list_final_list = []
    acct_rec_other_combined_final_list = []
    curr_asset_labels = []
    totals_list = []
    #for date in date_array:
    for i in range(0, len(date_array)):
        other_list = []
        acct_rec_list = []
        prepaid_list = []
        inventory_list_first = []
        df = pd.read_excel(file, sheet_name=sheet_name)
        df[df.columns[0]] = df[df.columns[0]].str.lstrip()
        df[df.columns[0]] = df[df.columns[0]].str.rstrip()
        filtered_df = filter_current_asset_section(df)
        filtered_df.reset_index(inplace=True)
        
         # find cash & cash evquivalents
        if filtered_df.iloc[:,0].str.extract(r'(Cash.*|.*?[eE]scrow.*|.*?[lL]iquid.*|.*?[Ss]ecur.*)').dropna().empty:
            #print("No Cash")
            pass
        else:
            cash_label = filtered_df.iloc[:,0].str.extract(r'(Cash.*|.*?[eE]scrow.*|.*?[lL]iquid.*|.*?[Ss]ecur.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            cash_val = filtered_df.loc[cash_label].values[i]
            cash_list.append(cash_val)
            curr_asset_labels.append(cash_label)
            filtered_df.reset_index(inplace=True)
    #find inventory
        if filtered_df.iloc[:,0].str.extract(r'(Inven)').dropna().empty:
            #print("No Inventory")
            pass
        else:
            inven_label = filtered_df.iloc[:,0].str.extract(r'(Inven.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            inven_val = filtered_df.loc[inven_label].values[i]
            inventory_list_first.append(inven_val)
            curr_asset_labels.append(inven_label)
            filtered_df.reset_index(inplace=True)
        
        if filtered_df.iloc[:,0].str.extract(r'(Accounts\s+[rR]ec.*|.*[Rr]ece.*)').dropna().empty:
            #print("No Accounts Receievable")
            pass
        elif len(filtered_df.iloc[:,0].str.extract(r'(Accounts\s+[rR]ec.*|.*[Rr]ece.*)').dropna().values)>1:
            acct_rece_label = filtered_df.iloc[:,0].str.extract(r'(Accounts\s+[rR]ec.*|.*[Rr]ece.*)').dropna().values
            acct_rece_label = list(acct_rece_label.flatten())
            acct_rec_rows = filtered_df.iloc[:,0].str.extract(r'(Accounts\s+[rR]ec.*|.*[Rr]ece.*)').dropna().values
            acct_rows_sum = sum_multiple_rows(acct_rec_rows, filtered_df, i)
            acct_rec_list.append(acct_rows_sum)
            curr_asset_labels = curr_asset_labels + acct_rece_label
            #flattened_acct_rows = list(acct_rec_rows.flatten())
            #acct_rec_df = filtered_df[filtered_df[filtered_df.columns[0]].isin(flattened_acct_rows)]
            #acct_rec_list.append(sum(list(acct_rec_df.iloc[:, i+1].values)))
        else:
            acct_rece_label = filtered_df.iloc[:,0].str.extract(r'(Accounts\s+[rR]ec.*|.*[Rr]ece.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            acc_rece_val = filtered_df.loc[acct_rece_label].values[i]
            acct_rec_list.append(acc_rece_val)
            curr_asset_labels.append(acct_rece_label)
            filtered_df.reset_index(inplace=True)
        #if filtered_df.iloc[:,0].str.extract(r'(Prepaid.*)').dropna().empty:
        #    print("No Prepaid Expenses")
        #elif len(filtered_df.iloc[:,0].str.extract(r'(Prepaid.*)').dropna().values)>1:
        #    prepaid_rows = filtered_df.iloc[:,0].str.extract(r'(Prepaid.*)').dropna().values
        #    prepaid_row_sum = sum_multiple_rows(prepaid_rows, filtered_df, i)
        #    prepaid_list.append(prepaid_row_sum)
        #else:
        #    prepaid_label = filtered_df.iloc[:,0].str.extract(r'(Prepaid.*)').dropna().values[0][0]
        #    filtered_df.set_index(filtered_df.columns[0], inplace=True)
        #    prepaid_val = filtered_df.loc[prepaid_label].values[i]
        #    prepaid_list.append(prepaid_val)
        #    curr_asset_labels.append(prepaid_label)
        #    filtered_df.reset_index(inplace=True)
    
        
        other_asset_df = filtered_df[~filtered_df[filtered_df.columns[0]].isin(curr_asset_labels)]
        other_list.append(sum(other_asset_df.iloc[:, i+1].values))
        other_list_final_list.append(sum(other_list))
        acct_rec_other_combined_list = acct_rec_list
        acct_rec_other_combined_final_list.append(sum(acct_rec_other_combined_list))
        inventory_list.append(sum(inventory_list_first))
        
        #temp_list = [[cash_val], inventory_list_first, acct_rec_other_combined_list, other_list]
        #temp_list_flat = [item for sublist in temp_list for item in sublist]
        #temp_list_flattened = temp_list.flatten()
        #totals = sum(temp_list_flat)
        #totals_list.append(totals)
    data = [cash_list, inventory_list, acct_rec_other_combined_final_list, other_list_final_list]
    final_df = pd.DataFrame(data=data, columns=date_array, index=['Cash and Cash Equivalents', 'Inventories', 'Trade and Other Receivables', 'Other Current Assets'])
    final_df.loc['Total Current Assets'] = final_df.sum()
    return final_df
        

        
    
        
        
        

In [361]:
current_assets("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\FMSystems Group_202012.xlsx", "BS - 1", test_dates)

Unnamed: 0,2020-12-31,2019-12-31
Cash and Cash Equivalents,6257667.0,3280352.0
Inventories,3117933.0,4134867.0
Trade and Other Receivables,12653695.0,14988141.0
Other Current Assets,1969792.0,803194.0
Total Current Assets,23999087.0,23206554.0


In [363]:
# Returns a mapped section of the non current asset portion of the balance sheet
def non_current_assets(file, sheet_name, date_array):
    intangible_list_final = []
    tangible_list = []
    other_list = []
    non_curr_labels = []
    totals_list = []
    for i in range(0, len(date_array)):
        intangible_list_first = []
        tangible_list_first = []
        goodwill_list = []
        other_list_first = []
        df = pd.read_excel(file, sheet_name=sheet_name)
        df[df.columns[0]] = df[df.columns[0]].str.lstrip()
        df[df.columns[0]] = df[df.columns[0]].str.rstrip()
        filtered_df = filter_non_current_asset_section(df)
        filtered_df.reset_index(inplace=True)
        
        if filtered_df.iloc[:,0].str.extract(r'(Intangible.*|[Oo]perating\s+lease.*|[Ss]oftware.*|.*?[tT]rademark.*|.*[Rr]ight\s+of\s+use.*|Goodwill.*)').dropna().empty:
            #print("No Intangibles")
            pass
        
        elif len(filtered_df.iloc[:,0].str.extract(r'(Intangible.*|[Oo]perating\s+lease.*|[Ss]oftware.*|.*?[tT]rademark.*|.*[Rr]ight\s+of\s+use.*|Goodwill.*)').dropna()) > 1:
            intan_label = filtered_df.iloc[:,0].str.extract(r'(Intangible.*|[Oo]perating\s+lease.*|[Ss]oftware.*|.*?[tT]rademark.*|.*[Rr]ight\s+of\s+use.*|Goodwill.*)').dropna().values
            intan_label = list(intan_label.flatten())
            intan_label_rows = filtered_df.iloc[:,0].str.extract(r'(Intangible.*|[Oo]perating\s+lease.*|[Ss]oftware.*|.*?[tT]rademark.*|.*[Rr]ight\s+of\s+use.*|Goodwill.*)').dropna().values
            intan_rows_sum = sum_multiple_rows(intan_label_rows, filtered_df, i)
            intangible_list_first.append(intan_rows_sum)
            #intangible_list_final.append(intan_rows_sum)
            non_curr_labels = non_curr_labels + intan_label
            
        else:
            intan_label = filtered_df.iloc[:,0].str.extract(r'(Intangible.*|[Oo]perating\s+lease.*|[Ss]oftware.*|.*?[tT]rademark.*|.*[Rr]ight\s+of\s+use.*|Goodwill.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            intan_val = filtered_df.loc[intan_label].values[i]
            intangible_list_first.append(intan_val)
            #intangible_list_final.append(intan_val)
            non_curr_labels.append(intan_label)
            filtered_df.reset_index(inplace=True)
            
        #if filtered_df.iloc[:,0].str.extract(r'(Goodwill.*)').dropna().empty:
        #    print("No Goodwill")
        #else:
        #    intan_label = filtered_df.iloc[:,0].str.extract(r'(Goodwill.*)').dropna().values[0][0]
        #    filtered_df.set_index(filtered_df.columns[0], inplace=True)
        #    intan_val = filtered_df.loc[intan_label].values[i]
        #    goodwill_list.append(intan_val)
        #    non_curr_labels.append(intan_label)
        #    filtered_df.reset_index(inplace=True)
        
        #intangible_list_final_vals = intangible_list_first + goodwill_list

        intangible_list_final.append(sum(intangible_list_first))
        
        if filtered_df.iloc[:,0].str.extract(r'(.*?Property.*|Capital\s+[Ll]ease.*|Less\s+[Aa]cc.*|.*?[eE]quipment.*|.*?[pP]lant.*|.*?PPE.*|.*?[Pp]{2}e.*|.*?[Ff]urniture.*|.*?[bB]uilding.*|.*?[lL]and.*)').dropna().empty:
            #print("No Property...")
            pass
        elif len(filtered_df.iloc[:,0].str.extract(r'(.*?Property.*|Capital\s+[Ll]ease.*|Less\s+[Aa]cc.*|.*?[eE]quipment.*|.*?[pP]lant.*|.*?PPE.*|.*?[Pp]{2}e.*|.*?[Ff]urniture.*|.*?[bB]uilding.*|.*?[lL]and.*)'))>1:
            tan_label = filtered_df.iloc[:,0].str.extract(r'(.*?Property.*|Capital\s+[Ll]ease.*|Less\s+[Aa]cc.*|.*?[eE]quipment.*|.*?[pP]lant.*|.*?PPE.*|.*?[Pp]{2}e.*|.*?[Ff]urniture.*|.*?[bB]uilding.*|.*?[lL]and.*)').dropna().values
            tan_label = list(tan_label.flatten())
            tan_label_rows = filtered_df.iloc[:,0].str.extract(r'(.*?Property.*|Capital\s+[Ll]ease.*|Less\s+[Aa]cc.*|.*?[eE]quipment.*|.*?[pP]lant.*|.*?PPE.*|.*?[Pp]{2}e.*|.*?[Ff]urniture.*|.*?[bB]uilding.*|.*?[lL]and.*)').dropna().values
            tan_rows_sum = sum_multiple_rows(tan_label_rows, filtered_df, i)
            tangible_list_first.append(tan_rows_sum)
            non_curr_labels = non_curr_labels + tan_label
            tangible_list.append(tangible_list_first[0])
        else:
            tan_label = filtered_df.iloc[:,0].str.extract(r'(.*?Property.*|Capital\s+[Ll]ease.*|Less\s+[Aa]cc.*|.*?[eE]quipment.*|.*?[pP]lant.*|.*?PPE.*|.*?[Pp]{2}e.*|.*?[Ff]urniture.*|.*?[bB]uilding.*|.*?[lL]and.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            tan_val = filtered_df.loc[tan_label].values[i]
            tangible_list_first.append(tan_val)
            non_curr_labels.append(tan_label)
            filtered_df.reset_index(inplace=True)
            tangible_list.append(tangible_list_first)


        other_non_curr_asset_df = filtered_df[~filtered_df[filtered_df.columns[0]].isin(non_curr_labels)]
        other_list.append(sum((list(other_non_curr_asset_df.iloc[:, i+1].values[0:]))))
        other_list_first.append(sum((list(other_non_curr_asset_df.iloc[:, i+1].values[0:]))))
        

    data = [intangible_list_final, tangible_list, other_list]
    final_df = pd.DataFrame(data=data, columns=date_array, index=['Intangible Assets', 'Tangible Assets', 'Other Long-Term Assets'])
    final_df.loc['Total Non-Current Assets'] = final_df.sum()
    return final_df
        

In [364]:
non_current_assets("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\Hurtigruten_201812.xlsx", "BS - 3", test_dates)


Unnamed: 0,2020-12-31,2019-12-31
Intangible Assets,2653228.0,2664560.0
Tangible Assets,4528117.0,4301649.0
Other Long-Term Assets,1383642.0,280131.0
Total Non-Current Assets,8564987.0,7246340.0


In [225]:
# Filters the file for the current liability section of the balance sheet
def filter_current_liabilities_section(df):
    copy_df = df.drop_duplicates()
    item_list = copy_df[copy_df.columns[0]].tolist()
    
    start_match = copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+[Aa]ssets|TOTAL\s+ASSETS)').dropna().values[0][0]
    start_index = item_list.index(start_match)
    #if df.iloc[:,0].str.extract(r'(Current\s+Liabilities\:?|CURRENT\s+LIABILITIES\:?)').dropna().empty:
    #    start_match = df.iloc[:,0].str.extract(r'([Aa]ccounts\s+[Pp]ay.*)').dropna().values[0][0]
    #    start_index = item_list.index(start_match)
    #else:
    #    start_match = df.iloc[:,0].str.extract(r'([Aa]ccounts\s+[Pp]ay.*)').dropna().values[0][0]
    #    start_index = item_list.index(start_match)
    
    end_match = copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+[Cc]urrent\s+[Ll]iabilities)').dropna().values[0][0]
    end_index = item_list.index(end_match)
    

    #copy_df = copy_df[(copy_df[copy_df.columns[0]] != start_match) & (df[df.columns[0]] != end_match)]
    copy_df.set_index(copy_df.columns[0], inplace=True)
    filtered_df = copy_df.iloc[start_index+1:end_index].dropna()
    
    check_df = filtered_df.reset_index()
    if len(check_df.iloc[:,0].str.extract(r'(.*[Ee]quity)').dropna().values)> 0 :
        check_df = df
        start_match = check_df.iloc[:,0].str.extract(r'(Total non-current liabilities)').dropna().values[0][0]
        start_index = item_list.index(start_match)
        check_df.set_index(check_df.columns[0], inplace=True)
        filtered_df = check_df.iloc[start_index+1:end_index].dropna()
    return filtered_df

In [291]:
test_df = pd.read_excel("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\Avetta_202112.xlsx", sheet_name='BS - 1')
filter_current_liabilities_section(test_df)

Unnamed: 0_level_0,ASSETS.1,ASSETS.2
ASSETS,Unnamed: 1_level_1,Unnamed: 2_level_1
LIABILITIES AND PARTNERS’ EQUITY,LIABILITIES AND PARTNERS’ EQUITY,LIABILITIES AND PARTNERS’ EQUITY
Accounts payable,7582,2087
Accrued expenses,25374,16610
"Deferred revenue contract liabilities, current",82889,63553
Taxes payable,1032,849
Current portion of long-term debt,3610,2750


In [272]:
# filters the file for non current liability section of the balance sheet
def filter_non_current_liabilities_section(df):
    copy_df = df
    item_list = copy_df[copy_df.columns[0]].tolist()
    start_match = copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+[Cc]urrent\s+[Ll]iabilities)').dropna().values[0][0]
    start_index = item_list.index(start_match)+1
    
    
    end_match = copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+[Ll]iabilities|Total\s+Long\s+Term\s+Liabilities|[Tt]otal\s+[Ll]iab[a-z]+|TOTAL\s+LIAB[A-Z]+|[Tt]otal\s+non\-[Cc]urrent\s+[Ll]iab[a-z]+)').dropna().values[0][0]
    end_index = item_list.index(end_match)
    
    #end_match_list = copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+long.*)').dropna().values.tolist()
    #if len(end_match_list)
    if len(copy_df.iloc[:,0].str.extract(r'([Tt]otal\s+[Ll]ong.*)').dropna().values.tolist()) > 0:
        filtered_df = copy_df.iloc[start_index:end_index].dropna()
    else:
        filtered_df = copy_df.iloc[start_index:end_index].dropna()
    filtered_df.set_index(copy_df.columns[0], inplace=True)
    
    if filtered_df.empty:
        copy_df2 = df
        if not copy_df2.iloc[:,0].str.extract(r'(^[Nn]on\-[Cc]urrent\s+[Ll]iab[a-z]+|^[Nn]on\s+[Cc]urrent\s+[Ll]iab[a-z]+)').dropna().empty:
            start_match = copy_df2.iloc[:,0].str.extract(r'([Nn]on\-[Cc]urrent\s+[Ll]iab[a-z]+|[Nn]on\s+[Cc]urrent\s+[Ll]iab[a-z]+)').dropna().values[0][0]
            start_index = item_list.index(start_match) + 1
        else:
            start_match = copy_df2.iloc[:,0].str.extract(r'([Tt]otal\s+[Ee]quity)').dropna().values[0][0]
            start_index = item_list.index(start_match) + 1
        end_match = copy_df2.iloc[:,0].str.extract(r'([Tt]otal\s+[Nn]on\-[Cc]urrent\s+[Ll]iab[a-z]+)').dropna().values[0][0]
        end_index = item_list.index(end_match)
        copy_df2.set_index(copy_df2.columns[0], inplace=True)
        filtered_df = copy_df2.iloc[start_index:end_index]
    return filtered_df

In [274]:
df = pd.read_excel("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\Hurtigruten_201812.xlsx", sheet_name="BS - 3")
filter_non_current_liabilities_section(df)

Unnamed: 0_level_0,31.12. 2018,31.12. 2017
(NOK 1 000),Unnamed: 1_level_1,Unnamed: 2_level_1
Borrowings,6879320.0,486556.0
Prepaid travels with departure dates beyond one year,103555.0,133328.0
Derivative financial instruments,57351.0,0.0
Other non-current liabilities,125095.0,118822.0


In [365]:
# Returns a mapped section of the current liabilities portion of the balance sheet
def current_liabilities(file, sheet_name, date_array):
    trade_other_rec_list_final = []
    curr_tax_liab_list_final = []
    short_debt_list_final = []
    other_liab_list = []
    curr_liab_labels = []
    totals_list = []
    
    for i in range(0, len(date_array)):
        trade_other_rec_list_first = []
        curr_tax_liab_list_first = []
        short_debt_list_first = []
        other_liab_list_first = []
        
        df = pd.read_excel(file, sheet_name=sheet_name)
        df[df.columns[0]] = df[df.columns[0]].str.lstrip()
        df[df.columns[0]] = df[df.columns[0]].str.rstrip()
        filtered_df = filter_current_liabilities_section(df)
        filtered_df.reset_index(inplace=True)
        
        if filtered_df.iloc[:,0].str.extract(r'([Aa]ccounts\s+[Pp]ay.*|[pP]ayroll.*|[Cc]ommis.*|[Vv]acation.*|[Ff]reight.*|.*[Dd]eferred.*|.*[Cc]apital\s+[Ll]eas.*|Accrued.*|.*[Tt]ax|.*?Trade.*|.*?[Ee]mployee.*)').dropna().empty:
            #print("No Accounts Payable")
            pass
        elif len(filtered_df.iloc[:,0].str.extract(r'([Aa]ccounts\s+[Pp]ay.*|[pP]ayroll.*|[Cc]ommis.*|[Vv]acation.*|[Ff]reight.*|.*[Dd]eferred.*|.*[Cc]apital\s+[Ll]eas.*|Accrued.*|.*[Tt]ax|.*?Trade.*|.*?[Ee]mployee.*)').dropna().values) > 1:
            acct_pay_labels = filtered_df.iloc[:,0].str.extract(r'([Aa]ccounts\s+[Pp]ay.*|[pP]ayroll.*|[Cc]ommis.*|[Vv]acation.*|[Ff]reight.*|.*[Dd]eferred.*|.*[Cc]apital\s+[Ll]eas.*|Accrued.*|.*[Tt]ax|.*?Trade.*|.*?[Ee]mployee.*)').dropna().values
            acct_pay_labels = list(acct_pay_labels.flatten())
            acct_pay_rows = filtered_df.iloc[:,0].str.extract(r'([Aa]ccounts\s+[Pp]ay.*|[pP]ayroll.*|[Cc]ommis.*|[Vv]acation.*|[Ff]reight.*|.*[Dd]eferred.*|.*[Cc]apital\s+[Ll]eas.*|Accrued.*|.*[Tt]ax|.*?Trade.*|.*?[Ee]mployee.*)').dropna().values
            acct_pay_rows_sum = sum_multiple_rows(acct_pay_rows, filtered_df, i)
            trade_other_rec_list_first.append(acct_pay_rows_sum)
            curr_liab_labels = curr_liab_labels + acct_pay_labels
            #trade_other_rec_list_final.append(trade_other_rec_list_first)
        else:
            acct_pay_label = filtered_df.iloc[:,0].str.extract(r'([Aa]ccounts\s+[Pp]ay.*|[pP]ayroll.*|[Cc]ommis.*|[Vv]acation.*|[Ff]reight.*|.*[Dd]eferred.*|.*[Cc]apital\s+[Ll]eas.*|Accrued.*|.*[Tt]ax|.*?Trade.*|.*?[Ee]mployee.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            acct_pay_val = filtered_df.loc[acct_pay_label].values[i]
            trade_other_rec_list_first.append(acct_pay_val)
            curr_liab_labels.append(acct_pay_label)
            filtered_df.reset_index(inplace=True)
        

        trade_other_rec_list_final.append(sum(trade_other_rec_list_first))
        
        if filtered_df.iloc[:,0].str.extract(r'(Current.*[Dd]ebt|.*?[Ll]ine\s+of.*|[Cc]urrent\s+[Mm]aturities.*|Interest.*[dD]ebt|Borrowings.*|.*?[Ii]nterest.*[Bb]earing.*)').dropna().empty:
            #print("Not Short term debt")
            pass
        elif len(filtered_df.iloc[:,0].str.extract(r'(Current.*[Dd]ebt|.*?[Ll]ine\s+of.*|[Cc]urrent\s+[Mm]aturities.*|Interest.*[dD]ebt|Borrowings.*|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values) > 1:
            curr_debt_label = filtered_df.iloc[:,0].str.extract(r'(Current.*[Dd]ebt|.*?[Ll]ine\s+of.*|[Cc]urrent\s+[Mm]aturities.*|Interest.*[dD]ebt|Borrowings.*|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values
            curr_debt_rows = filtered_df.iloc[:,0].str.extract(r'(Current.*[Dd]ebt|.*?[Ll]ine\s+of.*|[Cc]urrent\s+[Mm]aturities.*|Interest.*[dD]ebt|Borrowings.*|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values
            curr_debt_label = list(curr_debt_label.flatten())
            curr_debt_sum = sum_multiple_rows(curr_debt_rows, filtered_df, i)
            short_debt_list_first.append(curr_debt_sum)
            #short_debt_list_final.append(curr_debt_sum)
            curr_liab_labels = curr_liab_labels + curr_debt_label
        else:
            curr_debt_label = filtered_df.iloc[:,0].str.extract(r'(Current.*[Dd]ebt|.*?[Ll]ine\s+of.*|[Cc]urrent\s+[Mm]aturities.*|Interest.*[dD]ebt|Borrowings.*|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            curr_debt_val = filtered_df.loc[curr_debt_label].values[i]
            short_debt_list_first.append(curr_debt_val)
            curr_liab_labels.append(curr_debt_label)
            filtered_df.reset_index(inplace=True)
            #short_debt_list_final.append(sum(short_debt_list_first))
        
        if filtered_df.iloc[:,0].str.extract(r'(^[Tt]axe?s?.*)').dropna().empty:
            #print("No Taxes")
            pass
        else:
            curr_tax_label = filtered_df.iloc[:,0].str.extract(r'(^[Tt]axe?s?.*)').dropna().values[0][0]
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            curr_tax_val = filtered_df.loc[curr_tax_label].values[i]
            curr_tax_liab_list_first.append(curr_tax_val)
            curr_liab_labels.append(curr_tax_label)
            filtered_df.reset_index(inplace=True)
            curr_tax_liab_list_final.append(sum(curr_tax_liab_list_first))
        
        short_debt_list_final.append(sum(short_debt_list_first))
        other_curr_liab_df = filtered_df[~filtered_df[filtered_df.columns[0]].isin(curr_liab_labels)]
        #other_curr_liab_df =  other_curr_liab_df.apply(pd.to_numeric, errors='coerce').dropna()
        
        if other_curr_liab_df.empty or other_curr_liab_df[other_curr_liab_df.columns[1]].dtype == 'object':
            other_liab_list_first.append(0)
            other_liab_list.append(0)
        else:
            other_liab_list_first.append(sum((list(other_curr_liab_df.iloc[:, i+1].values[0:]))))
            other_liab_list.append(sum((list(other_curr_liab_df.iloc[:, i+1].values[0:]))))
        
        #temp_master_list = [trade_other_rec_list_first, curr_tax_liab_list_first, short_debt_list_first, other_liab_list_first]
        
        #temp_master_list_flat = [item for sublist in temp_master_list for item in sublist]
        #totals_list.append(sum(temp_master_list_flat))
        #print(temp_master_list_flat)
    #print(other_curr_liab_df)
    data = [trade_other_rec_list_final, curr_tax_liab_list_final, short_debt_list_final, other_liab_list]

    final_df = pd.DataFrame(data=data, columns=date_array, index=['Trade and Other Payables', 'Current Tax Liabilities', 'Short-Term Debt', 'Other Current Liabilities'])
    final_df.loc['Total Current Liabilities'] = final_df.sum()
    return final_df
        


In [248]:
test_dates

['2018-12-31', '2017-12-31']

In [366]:
current_liabilities("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\Flinn Scientific_202107.xlsx", "BS - 1", test_dates)

Unnamed: 0,2020-12-31,2019-12-31
Trade and Other Payables,8359.0,8607.0
Current Tax Liabilities,,
Short-Term Debt,7869.0,11302.0
Other Current Liabilities,16.0,-51.0
Total Current Liabilities,16244.0,19858.0


In [338]:
# Returns mapped section of the non current liabilities portion of the balance sheet
def non_current_liabilities(file, sheet_name, date_array):
    long_debt_list_final = []
    provisions_list_final = []
    other_long_debt_final = []
    non_curr_liab_labels = []
    totals_list = []
    for i in range(0, len(date_array)):
        long_debt_list_first = []
        provisions_list_first = []
        other_long_debt_first = []
        
        df = pd.read_excel(file, sheet_name=sheet_name)
        df[df.columns[0]] = df[df.columns[0]].str.lstrip()
        df[df.columns[0]] = df[df.columns[0]].str.rstrip()
        filtered_df = filter_non_current_liabilities_section(df)
        filtered_df.reset_index(inplace=True)
        
        if len(filtered_df.iloc[:,0].str.extract(r'([Tt]erm.*|[Ll]ong.*|[Cc]ap.*|[Nn]ote.*|Revolving.*|Delayed.*|Operating\s+[lL]ease.*|Borrowings?.*|Interest.*[Dd]ebt|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values) > 1:
            long_debt_label = filtered_df.iloc[:,0].str.extract(r'([Tt]erm.*|[Ll]ong.*|[Cc]ap.*|[Nn]ote.*|Revolving.*|Delayed.*|Operating\s+[lL]ease.*|Borrowings?.*|Interest.*[Dd]ebt|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values
            long_debt_rows = filtered_df.iloc[:,0].str.extract(r'([Tt]erm.*|[Ll]ong.*|[Cc]ap.*|[Nn]ote.*|Revolving.*|Delayed.*|Operating\s+[lL]ease.*|Borrowings?.*|Interest.*[Dd]ebt|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values
            long_debt_label = list(long_debt_label.flatten())
            long_debt_rows_sum = sum_multiple_rows(long_debt_rows, filtered_df, i)
            long_debt_list_first.append(long_debt_rows_sum)
            non_curr_liab_labels = non_curr_liab_labels + long_debt_label
            long_debt_list_final.append(long_debt_list_first[0])
        else:
            long_debt_label = filtered_df.iloc[:,0].str.extract(r'([Tt]erm.*|[Ll]ong.*|[Cc]ap.*|[Nn]ote.*|Revolving.*|Delayed.*|Operating\s+[lL]ease.*|Borrowings?.*|Interest.*[Dd]ebt|.*?[Ii]nterest.*[Bb]earing.*)').dropna().values[0][0] 
            filtered_df.set_index(filtered_df.columns[0], inplace=True)
            long_debt_val = filtered_df.loc[long_debt_label].values[i]
            long_debt_list_first.append(long_debt_val)
            non_curr_liab_labels.append(long_debt_label)
            filtered_df.reset_index(inplace=True)
            long_debt_list_final.append(sum(long_debt_list_first))
        
        
        other_non_curr_liab_df = filtered_df[~filtered_df[filtered_df.columns[0]].isin(non_curr_liab_labels)]
        
        other_long_debt_final.append(sum(list(other_non_curr_liab_df.iloc[:, i+1].values[0:])))
        other_long_debt_first.append(sum(list(other_non_curr_liab_df.iloc[:, i+1].values[0:])))
        #temp_master_list = [long_debt_list_first, provisions_list_first, other_long_debt_first]
        #temp_master_list_flat = [item for sublist in temp_master_list for item in sublist]
        #totals_list.append(sum(temp_master_list_flat))
    data = [long_debt_list_final, provisions_list_final, other_long_debt_final]
    final_df = pd.DataFrame(data=data, columns=date_array, index=['Long-Term Debt', 'Provisions', 'Other Long-Term Liabilities'])
    final_df.loc['Total Non-Current Liabilities'] = final_df.sum()
    return final_df

In [339]:
non_current_liabilities("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\Hurtigruten_202106.xlsx", "BS - 2", test_dates)

Unnamed: 0,2021-12-31,2020-12-31
Long-Term Debt,1326796.0,1228480.0
Provisions,,
Other Long-Term Liabilities,53434.0,43022.0
Total Non-Current Liabilities,1380230.0,1271502.0


In [261]:
def total_equity_liabilities(file, sheet_name, date_array):
    equity_list = []
    equity_liab_list = []
    
    for i in range(0, len(date_array)):
        df = pd.read_excel(file, sheet_name=sheet_name)
        df[df.columns[0]] = df[df.columns[0]].str.lstrip()
        #df.reset_index(inplace=True)
        equity_label = df.iloc[:,0].str.extract(r'([Tt]otal.*[Ee]quity|TOTAL\s+EQU[A-Z]+)').dropna().values[0][0]
        df.set_index(df.columns[0], inplace=True)
        equity_val = df.loc[equity_label].values[i]
        equity_list.append(equity_val)
        df.reset_index(inplace=True)

        equity_liab_label = df.iloc[:,0].str.extract(r'([Tt]otal\s+[Ll]iab.*[Ee]quity|TOTAL\s+LIABILITIES\s+\&\s+EQ[A-Z]+|[Tt]otal\s+[Ee]quity.*[Ll]iab[a-z]+)').dropna().values[0][0]
        df.set_index(df.columns[0], inplace=True)
        equity_liab_val = df.loc[equity_liab_label].values[i]
        equity_liab_list.append(equity_liab_val)
        #df.reset_index(inplace=True)
        
    data = [equity_list, equity_liab_list]
    final_df = pd.DataFrame(data=data, columns=date_array, index=['Equity', 'Total Liabilities and Equity'])
    return final_df

In [262]:
total_equity_liabilities("C:\\Users\\curt.beck\\OneDrive\\Financial_Mapping\\Ares\\Hurtigruten_201812.xlsx", "BS - 3", test_dates)

Unnamed: 0,2018-12-31,2017-12-31
Equity,770997.0,526567.0
Total Liabilities and Equity,9740304.0,8246848.0


In [15]:
# Removes old columns that have been restated, keeping the restated ones and all of the columns that did not have restatements
def drop_old_data(df):
    exception_df = pd.DataFrame()
    unique_list = []
    col_list = df.columns
    for col in col_list:
        initial_col = col
        if df[[col]].shape[-1] > 1 and col not in unique_list:
            unique_list.append(col)
    
    for col in unique_list:
        temp_df = df[[col]].iloc[:, -1]
        exception_df = pd.concat([exception_df, temp_df], axis=1)
    non_dupe_df = df.loc[:, ~df.columns.isin(unique_list)]
    final_non_dupe_df = pd.concat([non_dupe_df, exception_df], axis=1)
    sorted_cols = np.sort(final_non_dupe_df.columns, axis=None)
    final_sorted_df = final_non_dupe_df[sorted_cols]
    return final_sorted_df

In [16]:
# Create total asset and liability rows in the master dataframe and returns and final dataframe to be exported to excel
def create_total_asset_total_liab_rows(df):
    total_curr_assets = df.loc['Total Current Assets'].values
    total_non_curr_assets = df.loc['Total Non-Current Assets'].values
    total_assets = np.add(total_curr_assets, total_non_curr_assets)
    
    total_curr_liab = df.loc['Total Current Liabilities'].values
    total_non_curr_liab = df.loc['Total Non-Current Liabilities'].values
    total_liab = np.add(total_curr_liab, total_non_curr_liab)
    
    df.loc['Total Assets'] = total_assets
    df.loc['Total Liabilities'] = total_liab
    return df

In [467]:
create_total_asset_total_liab_rows(reinstated_df)

Unnamed: 0,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31,2021-08-31,2021-09-30,2021-10-30,2021-11-30,2021-12-31,2022-01-31,2022-02-28
Intangible Assets,175342.0,219708.0,168322.0,168322.0,168423.0,168423.0,166943.0,166753.0,161188.0,160998.0,159518.0,145136.0,155501.0,153974.0
Tangible Assets,,,,,,,,,,,,25900.0,,
Other Long-Term Assets,41441.0,60331.0,42914.0,42936.0,43093.0,43109.0,42419.0,42687.0,42430.0,42145.0,41914.0,171524.0,41695.0,41273.0
Total Non-Current Assets,216783.0,280039.0,211236.0,211258.0,211516.0,211532.0,209362.0,209440.0,203618.0,203143.0,201432.0,342560.0,197196.0,195247.0
Cash and Cash Equivalents,2734.0,3418.0,2617.0,2966.0,1613.0,1878.0,2951.0,1638.0,1813.0,4026.0,4749.0,3043.0,1266.0,4580.0
Inventories,9100.0,8164.0,9312.0,10551.0,11137.0,10585.0,10538.0,9465.0,9970.0,10099.0,9720.0,10649.0,10184.0,10816.0
Trade and Other Receivables,11899.0,14228.0,14138.0,12868.0,12397.0,13619.0,13126.0,13456.0,13883.0,14594.0,12600.0,10902.0,12610.0,13343.0
Other Current Assets,3184.0,1749.0,1659.0,2108.0,2133.0,2243.0,1995.0,1776.0,1643.0,1509.0,1767.0,14947.0,1395.0,948.0
Total Current Assets,26917.0,27559.0,27726.0,28493.0,27280.0,28325.0,28610.0,26335.0,27309.0,30228.0,28836.0,39541.0,25455.0,29687.0
Trade and Other Payables,11518.0,13114.0,13524.0,16316.0,14914.0,14321.0,14850.0,11118.0,10404.0,11154.0,10427.0,8944.0,10672.0,12907.0


# Main Script

<ol>
    <li>Read each file and filter for balance sheet tab</li>
    <li>Filter each file by section and find and map items to their respective lists via regular expression</li>
    <li>return a mapped dataframe/object for each section</li>
    <li>Combined each mapped section to single Dataframe</li>
    <li>Modify master dataframe by only showing columns that have not been restated and removing old columns that have been restated.</li>
    <li>Produce Total Assets and Liabilities rows in the master dataframe</li>
</ol>

In [368]:
companies = generate_company_list() # generates a list of companies to loop over and produce templates for
for company in companies:
    file_list = glob.glob(f"C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares/{company}*.xlsx") # Change file path directory; Only has been tested for Avetta
    master_df_non_curr_asset = pd.DataFrame()
    master_df_curr_asset = pd.DataFrame()
    master_df_curr_liab = pd.DataFrame()
    master_df_non_curr_liab = pd.DataFrame()
    master_df_eq_liab_df = pd.DataFrame()
    final_master_df = pd.DataFrame()
    statement_type = 'Balance Sheet'
    for file in file_list:
        try:
            sheet_list = pd.ExcelFile(file).sheet_names
            sheet_name = filter_sheets(sheet_list, statement_type)
            if len(sheet_name) > 0:
                date_array, period_type_array = create_date_period_array(file, sheet_name)
                non_curr_asset_df = non_current_assets(file, sheet_name, date_array)
                curr_asset_df = current_assets(file, sheet_name, date_array)
                curr_liab_df = current_liabilities(file, sheet_name, date_array)
                non_curr_liab_df = non_current_liabilities(file, sheet_name, date_array)
                eq_liab_df = total_equity_liabilities(file, sheet_name, date_array)
        
                master_df_non_curr_asset = pd.concat([master_df_non_curr_asset, non_curr_asset_df], axis=1)
                master_df_curr_asset = pd.concat([master_df_curr_asset, curr_asset_df], axis=1)
                master_df_curr_liab = pd.concat([master_df_curr_liab, curr_liab_df], axis=1)
                master_df_non_curr_liab = pd.concat([master_df_non_curr_liab, non_curr_liab_df], axis=1)
                master_df_eq_liab_df = pd.concat([master_df_eq_liab_df, eq_liab_df], axis=1)
        #curr_liab_df = current_liabilities(file, sheet_name, date_array)
        #master_df_curr_liab = pd.concat([master_df_curr_liab, curr_liab_df], axis=1)
                final_master_df = pd.concat([master_df_non_curr_asset, 
                                     master_df_curr_asset, 
                                     master_df_curr_liab,
                                    master_df_non_curr_liab,
                                    master_df_eq_liab_df], axis=0)
        
                np_date_array = final_master_df.columns
                sorted_date_array = np.sort(np_date_array, axis=None)
        
                final_master_sorted_df = final_master_df[sorted_date_array]
                reinstated_df = drop_old_data(final_master_sorted_df)
                #reinstated_df.reset_index(inplace=True)
                #reinstated_df.set_index(reinstated_df.columns[0], inplace=True)
                final_reinstated_df = create_total_asset_total_liab_rows(reinstated_df)
                print(f"{company}'s {file} balance sheet mapping complete")
                final_reinstated_df.to_excel(f"C:\\Users\\curt.beck\\OneDrive\Financial_Mapping\\Ares Output\\{company}_mapped_BS_new.xlsx")
            else:
                print(f"{file} does not have {statement_type}")
                pass
        except:
            #print(final_master_df[sorted_date_array])
            print(f"There was an issue with {file}")
            traceback.print_exc()
            break

Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202012.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202102.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202103.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202104.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202105.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202106.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202107.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202108.xlsx balance sheet mapping complete
Avetta's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Avetta_202109.xlsx balance sheet mapping complete
Avetta's C

Hurtigruten's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Hurtigruten_202112.xlsx balance sheet mapping complete
Novipax Buyer's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Novipax Buyer_202102.xlsx balance sheet mapping complete
Novipax Buyer's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Novipax Buyer_202103.xlsx balance sheet mapping complete
Novipax Buyer's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Novipax Buyer_202104.xlsx balance sheet mapping complete
Novipax Buyer's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Novipax Buyer_202105.xlsx balance sheet mapping complete
Novipax Buyer's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Novipax Buyer_202106.xlsx balance sheet mapping complete
Novipax Buyer's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Novipax Buyer_202107.xlsx balance sheet mapping complete
Novipax Buyer's C:/Users/curt.beck/OneDrive/Financial_Mapping/Ares\Novipax Buyer_202108.xlsx balance sheet mapping complete
Novipax Buye