In [3]:
import pandas as pd
import numpy as np
import re as re

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows',100)

In [4]:
# Take in Excel

itemized = pd.read_excel("C:/Users/danie/OneDrive/Projects/fedex_reconciliation/Invoice_data.xlsx", header=2, sheet_name=0)
invoice_data = pd.read_excel("C:/Users/danie/OneDrive/Projects/fedex_reconciliation/Invoice_data.xlsx", sheet_name=1)
qbo = pd.read_excel("C:/Users/danie/OneDrive/Projects/fedex_reconciliation/QBO_customers(1).xlsx")
amt = pd.read_excel("C:/Users/danie/OneDrive/Projects/fedex_reconciliation/Exensiv.xlsx", sheet_name='AMT')
gp_acoustics = pd.read_excel("C:/Users/danie/OneDrive/Projects/fedex_reconciliation/Exensiv.xlsx", sheet_name='GPAcoustics')

# **Invoice Data** <-> **QBO**

## - Compare **Invoice Data** [`Customer PO #`] to **QBO** [`Display_Name`]

In [5]:
# Merge QuickBooks and FedEx to find shared values

#? Check if ['Display name'] is the only key to compare against

qbo_found = pd.merge(qbo, invoice_data, right_on='Customer PO #', left_on= 'Display_Name', how='inner')

In [6]:
# Number of PO's Found in QuickBooks

print(f'Unique Found POs: {len(qbo_found['Display_Name'].drop_duplicates())}',
      f'Total Found POs: {len(qbo_found)}', sep='\n')

Unique Found POs: 6
Total Found POs: 2218


In [7]:
def compare_qbo(qbo: pd.DataFrame, invoice_data: pd.DataFrame) -> pd.DataFrame:
    """
    Function: Compares FedEx invoice with QuickBooks via keys 'Customer PO #' and 'Display_Name'
    Input: Original QuickBooks and FedEx Invoice file
    Output: Pandas DataFrame with values not found in QuickBooks
    """

    # ? is 'Display_Name' the only key to compare against?

    qbo_found = pd.merge(
        qbo, invoice_data, right_on="Customer PO #", left_on="Display_Name", how="inner"
    )

    lst = set()
    for i in invoice_data["Customer PO #"]:
        if i not in list(qbo_found["Display_Name"].unique()):
            lst.add(i)

    not_found = pd.DataFrame()
    not_found["Customer PO #"] = pd.DataFrame(lst)
    not_found = not_found.merge(
        invoice_data[
            [
                "Customer PO #",
                "Reference",
                "Reference 2",
                "Total Charges",
                "Receiver Name",
                "Receiver Company",
                "Receiver Address",
                "Tracking #",
            ]
        ],
        on="Customer PO #",
        how="left",
    )
    return not_found

### - Add not found values to new table: `not_found`

In [8]:
# Add unique Customer PO's not found in quickbooks to a unique list

lst = set()
for i in invoice_data['Customer PO #']:
    if i not in list(qbo_found['Display_Name'].unique()):
        lst.add(i)

In [9]:
len(lst)

84

In [10]:
not_found = pd.DataFrame()
not_found['Customer PO #'] = pd.DataFrame(lst)
not_found = not_found.merge(invoice_data,
                             on='Customer PO #', how='left')
not_found

Unnamed: 0,Customer PO #,Shipper #,Invoice #,Invoice Date,Invoice Amount,Tracking #,Ship Date,Delivery Date,Delivery Time,Service Level,Zone,Reference,Department,Bill Option,Piece Count,Actual Weight,Bill Weight,Cwt Weight,Ship Device,Shipper Name,Shipper Company,Shipper Address,Shipper City,Shipper State,Shipper Postal Code,Shipper Country,Receiver Account,Receiver Name,Receiver Company,Receiver Address,Receiver City,Receiver State,Receiver Postal Code,Receiver Country,Customer Dept. #,Customer Invoice #,Declared Value,Customs Value,GL Code,Shipper Account,Reference 2,Length,Width,Height,Control #,Credit Reason,Published Amount,Discounted Amount,Residential,DAS,Fuel,Saturday Delivery,Add'd Handling,Misc. Charges,Client,Coding,Total Charges,Adjusted Amount,Audited Amount,Service Packaging
0,Nautical/Qoute/BottleNeck,693070511,867618863,2024-11-08,29866.52,418132348736,2024-10-23,2024-10-28,15:26:00,Ground,7,BottleNecker Sample,,Prepaid,,2.2,3,,,Nautical Fulfillment,Nautical Fulfillment,16100 W 116th St,Lenexa,KS,66219,US,,Devon Davis,Dome Printing,2031 Dome Lane,MCCLELLAN,CA,95652.0,US,,,0,0,,693070511,,14,10,6,ADVA4456,,16.24,-11.75,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging
1,700670008/SD266463,693070511,866891361,2024-11-01,21136.17,280881789594,2024-10-21,2024-10-24,12:23:00,Ground,6,10569,,Prepaid,,12.6,13,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,MUSICAL FULFILLMENT NV,MUSICAL FULFILLMENT NV,450 MAESTRO DR,RENO,NV,895111294.0,US,,,0,0,,693070511,,14,14,14,ADVA4452,,20.95,-15.48,0.00,0.00,0.44,0,0.0,0.00,Nautical,59062/587120,5.91,0,5.91,Customer Packaging
2,ARRCO-20424-00014,693070511,867618863,2024-11-08,29866.52,419064408583,2024-10-25,2024-10-28,11:52:00,Ground,3,Nautical/Arrived/OllyChews,,Prepaid,,5.7,6,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,Lenexa,KS,66219,US,,EMMALEE EVANS,U OF OK | KAPPA KAPPA GAMMA,1212 W 4TH AVE,STILLWATER,OK,740743147.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,14.13,-9.64,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging
3,ARRCO-20424-00014,693070511,867618863,2024-11-08,29866.52,419064415883,2024-10-25,2024-10-30,11:55:00,Ground,4,Nautical/Arrived/OllyChews,,Prepaid,,5.7,6,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,Lenexa,KS,66219,US,,Kaley Sorg,Ball State University (Tutor Ctr),"2000 W. University Ave North Quad,",Muncie,IN,47306.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,15.62,-11.13,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging
4,ARRCO-20424-00014,693070511,867618863,2024-11-08,29866.52,419064415060,2024-10-25,2024-10-28,12:58:00,Ground,5,Nautical/Arrived/OllyChews,,Prepaid,,5.7,6,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,Lenexa,KS,66219,US,,MS. LIVINGSTON,GWINNETT COLLEGE (ROSWELL/SANDY SPR,1455 OLD ALABAMA RD,ROSWELL,GA,300762167.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,16.86,-12.37,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2750,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398396447,2024-11-05,2024-11-11,11:17:00,Ground,6,Nautical/Arrived/WaterWipes,,Prepaid,,19.0,19,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,ESTEBAN SOSA,BLAIR S KRANSON MD,7320 WOODLAKE AVE,WEST HILLS,CA,913071496.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,27.28,-20.16,0.00,0.00,0.58,0,0.0,0.00,Nautical,59062/587120,7.70,0,7.70,Customer Packaging
2751,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398395999,2024-11-05,2024-11-12,12:17:00,Ground,6,Nautical/Arrived/WaterWipes,,Prepaid,,18.6,19,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,COURTNEY SCOTT,MIDWIFE COURTNEY,1444 MAIN ST,RAMONA,CA,920658106.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,27.28,-20.16,0.00,1.15,0.67,0,0.0,0.00,Nautical,59062/587120,8.94,0,8.94,Customer Packaging
2752,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398394308,2024-11-05,2024-11-15,10:53:00,Ground,6,Nautical/Arrived/WaterWipes,,Prepaid,,18.4,19,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,JENNIFER RECTOR,THE VILLAGE MIDWIFE BIRTH CENTER,321 MAIN ST,NEWPORT NEWS,VA,236013814.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,27.28,-20.16,5.55,0.00,1.05,0,0.0,0.23,Nautical,59062/587120,13.95,0,13.95,Customer Packaging
2753,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398397075,2024-11-05,2024-11-11,12:26:00,Ground,5,Nautical/Arrived/WaterWipes,,Prepaid,,19.1,20,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,LOU,SOUTHWEST MIDWIVES,1 MERCADO ST,DURANGO,CO,813017311.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,24.22,-17.90,0.00,1.15,0.60,0,0.0,0.00,Nautical,59062/587120,8.07,0,8.07,Customer Packaging


In [11]:
def compare_qbo(qbo: pd.DataFrame, invoice_data: pd.DataFrame) -> pd.DataFrame:
    """
    Function: Compares FedEx invoice with QuickBooks via keys 'Customer PO #' and 'Display_Name'
    Input: Original QuickBooks and FedEx Invoice file
    Output: Pandas DataFrame with values not found in QuickBooks
    """

    # ? is 'Display_Name' the only key to compare against?

    qbo_found = pd.merge(
        qbo, invoice_data, right_on="Customer PO #", left_on="Display_Name", how="inner"
    )

    lst = set()
    for i in invoice_data["Customer PO #"]:
        if i not in list(qbo_found["Display_Name"].unique()):
            lst.add(i)

    not_found = pd.DataFrame()
    not_found["Customer PO #"] = pd.DataFrame(lst)
    not_found = not_found.merge(
        invoice_data[
            [
                "Customer PO #",
                "Reference",
                "Reference 2",
                "Total Charges",
                "Receiver Name",
                "Receiver Company",
                "Receiver Address",
                "Tracking #",
            ]
        ],
        on="Customer PO #",
        how="left",
    )
    return not_found

# **Invoice Data** [`Reference`] <-> **Extensiv** [`Reference`]

## - For all **Invoice Data** [`Customer PO #`] not in **QBO** [`Display Name`]

### - Create RegEx token function 

In [12]:
# Create Regex Tokens

def reg_tokenizer(value):

    '''
    Input: Value of each column in Reference and Reference 2
    Output: RegEx object
    '''

    with_letters = re.sub(r'[a-zA-Z]+',r'\\w+' , str(value))

    with_numbers = re.sub(r'\d+', r'\\d+', with_letters)

    with_spaces = re.sub(r'\s+', r'\\s+', with_numbers)

    final = re.compile(with_spaces)
    
    return final

In [13]:
# RegEx Test

for i in range(len(not_found['Customer PO #'])):

    test = reg_tokenizer(not_found['Customer PO #'][i])
    
    match = re.fullmatch(test, str(not_found['Customer PO #'][i]))
    
    # print(f'{test}, original: {not_found['Customer PO #'][i]}, match: {match.group(0)}')

### - Add RegEx column to `not_found` with [Reference] patterns

In [14]:
# Add Pattern column

token_lst = []

for i in not_found['Reference']:
    token_lst.append(reg_tokenizer(i))

not_found['Pattern'] = token_lst


In [15]:
# Fedex columns with added Reference pattern

not_found

Unnamed: 0,Customer PO #,Shipper #,Invoice #,Invoice Date,Invoice Amount,Tracking #,Ship Date,Delivery Date,Delivery Time,Service Level,Zone,Reference,Department,Bill Option,Piece Count,Actual Weight,Bill Weight,Cwt Weight,Ship Device,Shipper Name,Shipper Company,Shipper Address,Shipper City,Shipper State,Shipper Postal Code,Shipper Country,Receiver Account,Receiver Name,Receiver Company,Receiver Address,Receiver City,Receiver State,Receiver Postal Code,Receiver Country,Customer Dept. #,Customer Invoice #,Declared Value,Customs Value,GL Code,Shipper Account,Reference 2,Length,Width,Height,Control #,Credit Reason,Published Amount,Discounted Amount,Residential,DAS,Fuel,Saturday Delivery,Add'd Handling,Misc. Charges,Client,Coding,Total Charges,Adjusted Amount,Audited Amount,Service Packaging,Pattern
0,Nautical/Qoute/BottleNeck,693070511,867618863,2024-11-08,29866.52,418132348736,2024-10-23,2024-10-28,15:26:00,Ground,7,BottleNecker Sample,,Prepaid,,2.2,3,,,Nautical Fulfillment,Nautical Fulfillment,16100 W 116th St,Lenexa,KS,66219,US,,Devon Davis,Dome Printing,2031 Dome Lane,MCCLELLAN,CA,95652.0,US,,,0,0,,693070511,,14,10,6,ADVA4456,,16.24,-11.75,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging,re.compile('\\w+\\s+\\w+')
1,700670008/SD266463,693070511,866891361,2024-11-01,21136.17,280881789594,2024-10-21,2024-10-24,12:23:00,Ground,6,10569,,Prepaid,,12.6,13,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,MUSICAL FULFILLMENT NV,MUSICAL FULFILLMENT NV,450 MAESTRO DR,RENO,NV,895111294.0,US,,,0,0,,693070511,,14,14,14,ADVA4452,,20.95,-15.48,0.00,0.00,0.44,0,0.0,0.00,Nautical,59062/587120,5.91,0,5.91,Customer Packaging,re.compile('\\d+')
2,ARRCO-20424-00014,693070511,867618863,2024-11-08,29866.52,419064408583,2024-10-25,2024-10-28,11:52:00,Ground,3,Nautical/Arrived/OllyChews,,Prepaid,,5.7,6,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,Lenexa,KS,66219,US,,EMMALEE EVANS,U OF OK | KAPPA KAPPA GAMMA,1212 W 4TH AVE,STILLWATER,OK,740743147.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,14.13,-9.64,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging,re.compile('\\w+/\\w+/\\w+')
3,ARRCO-20424-00014,693070511,867618863,2024-11-08,29866.52,419064415883,2024-10-25,2024-10-30,11:55:00,Ground,4,Nautical/Arrived/OllyChews,,Prepaid,,5.7,6,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,Lenexa,KS,66219,US,,Kaley Sorg,Ball State University (Tutor Ctr),"2000 W. University Ave North Quad,",Muncie,IN,47306.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,15.62,-11.13,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging,re.compile('\\w+/\\w+/\\w+')
4,ARRCO-20424-00014,693070511,867618863,2024-11-08,29866.52,419064415060,2024-10-25,2024-10-28,12:58:00,Ground,5,Nautical/Arrived/OllyChews,,Prepaid,,5.7,6,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,Lenexa,KS,66219,US,,MS. LIVINGSTON,GWINNETT COLLEGE (ROSWELL/SANDY SPR,1455 OLD ALABAMA RD,ROSWELL,GA,300762167.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,16.86,-12.37,0.00,0.00,0.36,0,0.0,0.00,Nautical,59062/587120,4.85,0,4.85,Customer Packaging,re.compile('\\w+/\\w+/\\w+')
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2750,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398396447,2024-11-05,2024-11-11,11:17:00,Ground,6,Nautical/Arrived/WaterWipes,,Prepaid,,19.0,19,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,ESTEBAN SOSA,BLAIR S KRANSON MD,7320 WOODLAKE AVE,WEST HILLS,CA,913071496.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,27.28,-20.16,0.00,0.00,0.58,0,0.0,0.00,Nautical,59062/587120,7.70,0,7.70,Customer Packaging,re.compile('\\w+/\\w+/\\w+')
2751,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398395999,2024-11-05,2024-11-12,12:17:00,Ground,6,Nautical/Arrived/WaterWipes,,Prepaid,,18.6,19,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,COURTNEY SCOTT,MIDWIFE COURTNEY,1444 MAIN ST,RAMONA,CA,920658106.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,27.28,-20.16,0.00,1.15,0.67,0,0.0,0.00,Nautical,59062/587120,8.94,0,8.94,Customer Packaging,re.compile('\\w+/\\w+/\\w+')
2752,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398394308,2024-11-05,2024-11-15,10:53:00,Ground,6,Nautical/Arrived/WaterWipes,,Prepaid,,18.4,19,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,JENNIFER RECTOR,THE VILLAGE MIDWIFE BIRTH CENTER,321 MAIN ST,NEWPORT NEWS,VA,236013814.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,27.28,-20.16,5.55,0.00,1.05,0,0.0,0.23,Nautical,59062/587120,13.95,0,13.95,Customer Packaging,re.compile('\\w+/\\w+/\\w+')
2753,ARRCO-20424-00016,693070511,869086366,2024-11-22,39212.48,416398397075,2024-11-05,2024-11-11,12:26:00,Ground,5,Nautical/Arrived/WaterWipes,,Prepaid,,19.1,20,,,Jeff Flassig,Advantage Solutions,16000 W 116th st,lenexa,KS,66219,US,,LOU,SOUTHWEST MIDWIVES,1 MERCADO ST,DURANGO,CO,813017311.0,US,,,0,0,,693070511,,14,11,10,ADVA4472,,24.22,-17.90,0.00,1.15,0.60,0,0.0,0.00,Nautical,59062/587120,8.07,0,8.07,Customer Packaging,re.compile('\\w+/\\w+/\\w+')


In [16]:
def add_pattern_column(invoice_data: pd.DataFrame) -> pd.DataFrame:
    """
    Function: Adds a column to the FedEx Invoice DataFrame with the RegEx pattern symbolizing 'Customer PO #'
    Input: FedEx Invoice DataFrame
    Output: DataFrame with added 'Pattern' column
    """

    def reg_tokenizer(value):

        with_letters = re.sub(r"[a-zA-Z]+", r"\\w+", str(value))
        with_numbers = re.sub(r"\d+", r"\\d+", with_letters)
        with_spaces = re.sub(r"\s+", r"\\s+", with_numbers)

        final = re.compile(with_spaces)

        return final

    token_lst = []

    for i in invoice_data["Reference"]:
        token_lst.append(reg_tokenizer(i))

    invoice_data["Pattern"] = token_lst

    return invoice_data

### - Compare `not found`[Pattern] to values in Extensiv Tables

In [17]:
# Compare Reference Patterns against every value in Extensiv Table

def find_col_match(ext_table, ref_pattern):

    col_lst = set()

    for col in ext_table.columns:

        for value in ext_table[col]:

            if re.fullmatch(ref_pattern,str(value)):

                col_lst.add(col)
                break
            
            else:
                break

    if len(col_lst) != 0:
        return col_lst
    else:
        return None


In [18]:
# Test \d+ to AMT table 

find_col_match(amt, not_found['Pattern'][1])



{'AsnCandidate',
 'Column1',
 'CreatedByIdentifier.Id',
 'Custom.TotalResults',
 'CustomerIdentifier.Id',
 'Index',
 'LastModifiedByIdentifier.Id',
 'LoadedState',
 'OrderId',
 'ParcelLabelType',
 'RouteCandidate',
 'RoutingInfo.BillOfLading',
 'ShipTo.Zip',
 'Status',
 'TransactionEntryType',
 'WarehouseTransactionSourceType'}

In [19]:
# Check AMT table

amt[['AsnCandidate',
 'Column1',
 'CreatedByIdentifier.Id',
 'Custom.TotalResults',
 'CustomerIdentifier.Id',
 'Index',
 'LastModifiedByIdentifier.Id',
 'LoadedState',
 'OrderId',
 'ParcelLabelType',
 'RouteCandidate',
 'RoutingInfo.BillOfLading',
 'ShipTo.Zip',
 'Status',
 'TransactionEntryType',
 'WarehouseTransactionSourceType']].head()

Unnamed: 0,AsnCandidate,Column1,CreatedByIdentifier.Id,Custom.TotalResults,CustomerIdentifier.Id,Index,LastModifiedByIdentifier.Id,LoadedState,OrderId,ParcelLabelType,RouteCandidate,RoutingInfo.BillOfLading,ShipTo.Zip,Status,TransactionEntryType,WarehouseTransactionSourceType
0,0,0,10,1044,11,1,10,0,363,0,0,17,52404,1,1,1
1,0,0,10,1044,11,1,10,0,363,0,0,17,52404,1,1,1
2,0,0,10,1044,11,1,10,0,363,0,0,17,52404,1,1,1
3,0,0,10,1044,11,1,10,0,363,0,0,17,52404,1,1,1
4,0,0,10,1044,11,1,10,0,363,0,0,17,52404,1,1,1


### - Compare all `not_found`[Reference], `not_found`[Pattern] values to Extensiv Table

In [20]:
def find_extensiv_reference_columns(extensiv_table: pd.DataFrame, invoice_data_w_patterns: pd.DataFrame) -> dict:  # fmt: skip
    """
    Function: Finds all of the columns in the Extensiv table that match each 'Reference' in FedEx Invoice not in QBO
    Input: Extensiv DataFrame, FedEx Invoice DataFrame w/ added 'Pattern' column
    Ouput: Dictionary {'match_lst': list of Extensiv columns that match 'Reference' pattern,
                       'Total Charges': Charges associated with that 'Reference' in FedEx Invoice,
                       'Tracking #': Tracking number associated with that 'Reference' in FedEx Invoice}
    Notes: May not need Total Charges and Tracking # in the end
    """

    def find_col_match(extensiv_table: pd.DataFrame, ref_pattern: pd.Series) -> list:
        """
        Function: Subfunction to iterate through each of the patterns in FedEx Invoice
        Input: Extensiv DataFrame, FedEx Reference patterns as a Series in a for loop
        Ouput: List of columns that match given Reference pattern
        """
        col_lst = set()

        for col in extensiv_table.columns:

            for value in extensiv_table[col]:

                if re.fullmatch(ref_pattern, str(value)):

                    col_lst.add(col)
                    break

                else:
                    break

        if len(col_lst) != 0:
            return col_lst
        else:
            return None

    match_dct = dict()
    suffix = 0

    for i, v in enumerate(invoice_data_w_patterns["Reference"]):

        if i != 0 and v == invoice_data_w_patterns["Reference"][i - 1]:

            suffix += 1
            v = f"{v}-s{suffix}"

        elif i != 0 and v != invoice_data_w_patterns["Reference"][i - 1]:
            suffix = 0
        else:
            continue

        match_lst = find_col_match(
            extensiv_table, invoice_data_w_patterns["Pattern"][i]
        )

        if match_lst is not None and not pd.isna(v):

            match_dct[v] = {
                "match_lst": match_lst,
                "Total Charges": invoice_data_w_patterns["Total Charges"][i],
                "Tracking #": invoice_data_w_patterns["Tracking #"][i],
            }

    return match_dct

In [21]:
# Test Find column matches

amt_match_dct = find_extensiv_reference_columns(amt, not_found)
gp_acoustics_dct = find_extensiv_reference_columns(gp_acoustics, not_found)

In [22]:
# Reference numbers in FedEx and matching columns in Extensiv 

gp_acoustics_dct

{10569: {'match_lst': {'AsnCandidate',
   'Column1',
   'CreatedByIdentifier.Id',
   'Custom.TotalResults',
   'CustomerIdentifier.Id',
   'Index',
   'LoadedState',
   'OrderId',
   'ParcelLabelType',
   'ReferenceNum',
   'RouteCandidate',
   'RoutingInfo.BillOfLading',
   'RoutingInfo.Mode',
   'RoutingInfo.TrackingNumber',
   'ShipTo.AddressStatus',
   'ShipTo.ContactId',
   'ShipTo.Zip',
   'Status',
   'TransactionEntryType',
   'WarehouseTransactionSourceType'},
  'Total Charges': np.float64(5.91),
  'Tracking #': np.int64(280881789594)},
 11653: {'match_lst': {'AsnCandidate',
   'Column1',
   'CreatedByIdentifier.Id',
   'Custom.TotalResults',
   'CustomerIdentifier.Id',
   'Index',
   'LoadedState',
   'OrderId',
   'ParcelLabelType',
   'ReferenceNum',
   'RouteCandidate',
   'RoutingInfo.BillOfLading',
   'RoutingInfo.Mode',
   'RoutingInfo.TrackingNumber',
   'ShipTo.AddressStatus',
   'ShipTo.ContactId',
   'ShipTo.Zip',
   'Status',
   'TransactionEntryType',
   'Warehous

### - Compare each `not_found`[Reference] to each value in matched Extensiv columns

In [23]:
def find_value_match(extensiv_table: pd.DataFrame, reference_matches: dict) -> list:

    match_lst = list()

    for reference in reference_matches:

        matches = reference_matches[reference]["match_lst"]
        total_charges = reference_matches[reference]["Total Charges"]
        tracking_number = reference_matches[reference]["Tracking #"]

        for col in extensiv_table[list(matches)]:

            for i, val in enumerate(extensiv_table[col]):

                base_reference = re.sub(r"-s\d+$", "", str(reference))

                if val == reference or val == base_reference:

                    match_entry = {
                        "Reference": base_reference,
                        "Name": extensiv_table["CustomerIdentifier.Name"][i],
                        "Column": col,
                        "Total Charges": total_charges,
                        "Tracking #": tracking_number,
                    }

                    if match_entry not in match_lst:
                        match_lst.append(match_entry)
                else:
                    continue

    if not match_lst:
        print(f"No Matches")

    else:
        return match_lst

In [24]:
# amt_reference_matches = find_value_match(gp_acoustics, gp_acoustics_dct)
gp_reference_matches = find_value_match(gp_acoustics, gp_acoustics_dct)

In [25]:
gp_reference_matches

[{'Reference': '10569',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(5.91),
  'Tracking #': np.int64(280881789594)},
 {'Reference': '11653',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(30.42),
  'Tracking #': np.int64(280887141570)},
 {'Reference': '10566',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(9.56),
  'Tracking #': np.int64(280881955477)},
 {'Reference': '14371',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(9.62),
  'Tracking #': np.int64(281397637770)},
 {'Reference': '11391',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(9.56),
  'Tracking #': np.int64(280766989314)},
 {'Reference': '13730',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(22.44),
  'Tracking #': np.int64(281241623171)},
 {'Reference': '11397',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 

In [26]:
# for i, row in not_found.iterrows():
#     # print(row['Reference'])
#     for dct in gp_found_values:
        
#         try:
#             dct['Reference'] = int(dct['Reference'])  # Convert to integer
#         except ValueError:
#             pass
#         # print(dct['Reference'])
#         if dct['Reference'] == row['Reference']:
#             not_found.loc[i,'Customer PO #'] = dct['Name']
# not_found


In [27]:
not_found[(not_found['Customer PO #'] == 'GP Acoustics')]

Unnamed: 0,Customer PO #,Shipper #,Invoice #,Invoice Date,Invoice Amount,Tracking #,Ship Date,Delivery Date,Delivery Time,Service Level,Zone,Reference,Department,Bill Option,Piece Count,Actual Weight,Bill Weight,Cwt Weight,Ship Device,Shipper Name,Shipper Company,Shipper Address,Shipper City,Shipper State,Shipper Postal Code,Shipper Country,Receiver Account,Receiver Name,Receiver Company,Receiver Address,Receiver City,Receiver State,Receiver Postal Code,Receiver Country,Customer Dept. #,Customer Invoice #,Declared Value,Customs Value,GL Code,Shipper Account,Reference 2,Length,Width,Height,Control #,Credit Reason,Published Amount,Discounted Amount,Residential,DAS,Fuel,Saturday Delivery,Add'd Handling,Misc. Charges,Client,Coding,Total Charges,Adjusted Amount,Audited Amount,Service Packaging,Pattern


# **Invoice Data** [`Receiver Info`] <-> **Extensiv** [`Receiver Info`]

In [28]:
def create_extensiv_receiver_info(extensiv_table: pd.DataFrame) -> dict:

    extensiv_receiver_info = extensiv_table[
        [
            "ShipTo.CompanyName",
            "ShipTo.Name",
            "ShipTo.Address1",
            "CustomerIdentifier.Name",
        ]
    ]

    extensiv_receiver_info_nd = extensiv_receiver_info.drop_duplicates(
        [
            "ShipTo.CompanyName",
            "ShipTo.Name",
            "ShipTo.Address1",
            "CustomerIdentifier.Name",
        ]
    )

    extensiv_receiver_dct = dict()

    for i, row in extensiv_receiver_info_nd.iterrows():

        extensiv_receiver_dct[i] = {
            "Receiver Address": row["ShipTo.Address1"],
            "Receiver Company": row["ShipTo.CompanyName"],
            "Receiver Name": row["ShipTo.Name"],
            "Customer Identifier": row["CustomerIdentifier.Name"],
        }

    return extensiv_receiver_dct

In [29]:
def create_invoice_data_receiver_info(invoice_data: pd.DataFrame, reference_matches: list) -> dict:  # fmt: skip

    found_reference_lst = list()

    for i in reference_matches:
        found_reference_lst.append(i["Reference"])

    #! Taking out this conditional to test
    invoice_data_null = invoice_data[
        (invoice_data["Customer PO #"].isna())
        & ~(invoice_data["Reference"].isin(found_reference_lst))
    ]

    invoice_data_dct = {}

    for i, row in invoice_data.iterrows():

        invoice_data_dct[i] = {
            "Receiver Address": row["Receiver Address"],
            "Receiver Company": row["Receiver Company"],
            "Receiver Name": row["Receiver Name"],
            "Tracking #": row["Tracking #"],
        }

    return invoice_data_dct

In [30]:
gp_receiver_info = create_extensiv_receiver_info(gp_acoustics)
invoice_data_receiver_info = create_invoice_data_receiver_info(invoice_data, gp_reference_matches)

In [31]:
def compare_receiver_info(invoice_data_receiver_info: dict, extensiv_receiver_info: dict) -> list:  # fmt: skip

    match_entry = dict()
    match_lst = list()

    for i in invoice_data_receiver_info:

        for e in extensiv_receiver_info:

            if (
                invoice_data_receiver_info[i]["Receiver Address"]
                == extensiv_receiver_info[e]["Receiver Address"]
                or invoice_data_receiver_info[i]["Receiver Name"]
                == extensiv_receiver_info[e]["Receiver Name"]
                or invoice_data_receiver_info[i]["Receiver Company"]
                == extensiv_receiver_info[e]["Receiver Company"]
            ):

                match_entry = {
                    "Address": invoice_data_receiver_info[i]["Receiver Address"],
                    "Name": invoice_data_receiver_info[i]["Receiver Name"],
                    "Company": invoice_data_receiver_info[i]["Receiver Company"],
                    "Customer": extensiv_receiver_info[e]["Customer Identifier"],
                }

                if match_entry not in match_lst:
                    match_lst.append(match_entry)

    if match_lst:
        return pd.DataFrame(match_lst)
    else:
        print("No Match")

In [32]:
gp_receiver_matches = compare_receiver_info(invoice_data_receiver_info, gp_receiver_info)

In [33]:
len(gp_receiver_matches)

11

In [35]:
final_matches_lst = []
final_matches_lst.extend(gp_reference_matches)
final_matches_lst.extend(gp_receiver_matches)

In [37]:
final_matches_lst

[{'Reference': '10569',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(5.91),
  'Tracking #': np.int64(280881789594)},
 {'Reference': '11653',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(30.42),
  'Tracking #': np.int64(280887141570)},
 {'Reference': '10566',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(9.56),
  'Tracking #': np.int64(280881955477)},
 {'Reference': '14371',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(9.62),
  'Tracking #': np.int64(281397637770)},
 {'Reference': '11391',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(9.56),
  'Tracking #': np.int64(280766989314)},
 {'Reference': '13730',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': np.float64(22.44),
  'Tracking #': np.int64(281241623171)},
 {'Reference': '11397',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 

In [221]:
for i, row in not_found.iterrows():
    for dct in final_matches_lst:
        
        if 'Reference' in dct and dct['Reference'] == row['Reference']:
            not_found.loc[i, 'Customer PO #'] = dct['Name']
        elif 'Address' in dct and dct['Address'] == row['Receiver Address']:
            not_found.loc[i, 'Customer PO #'] = dct['Customer']
        elif 'Name' in dct and dct['Name'] == row['Receiver Name']:
            not_found.loc[i, 'Customer PO #'] = dct['Customer']
        elif 'Company' in dct and dct['Company'] == row['Receiver Company']:
            not_found.loc[i, 'Customer PO #'] = dct['Customer']
        elif 'Reference' in dct:
            try:
                dct['Reference'] = int(dct['Reference'])
            except ValueError:
                pass


In [222]:
not_found[not_found['Customer PO #'] == 'GP Acoustics']

Unnamed: 0,Customer PO #,Shipper #,Invoice #,Invoice Date,Invoice Amount,Tracking #,Ship Date,Delivery Date,Delivery Time,Service Level,Zone,Reference,Department,Bill Option,Piece Count,Actual Weight,Bill Weight,Cwt Weight,Ship Device,Shipper Name,Shipper Company,Shipper Address,Shipper City,Shipper State,Shipper Postal Code,Shipper Country,Receiver Account,Receiver Name,Receiver Company,Receiver Address,Receiver City,Receiver State,Receiver Postal Code,Receiver Country,Customer Dept. #,Customer Invoice #,Declared Value,Customs Value,GL Code,Shipper Account,Reference 2,Length,Width,Height,Control #,Credit Reason,Published Amount,Discounted Amount,Residential,DAS,Fuel,Saturday Delivery,Add'd Handling,Misc. Charges,Client,Coding,Total Charges,Adjusted Amount,Audited Amount,Service Packaging,Pattern
0,GP Acoustics,693070511,867618863,2024-11-08,29866.52,418132348770,2024-10-24,2024-10-28,09:38:00,Ground,5,GP Acoustics,,Prepaid,,42.5,43,,,Nautical Fulfillment,Nautical Fulfillment,16100 W 116th St,Lenexa,KS,66219,US,,ISP TECHNOLOGIES LLC,ISP TECHNOLOGIES LLC,5479 PERRY DR,WATERFORD,MI,483294828.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,43.86,-32.85,0.0,0.0,0.89,0,0.0,0.0,Nautical,59062/587120,11.9,0,11.9,Customer Packaging,re.compile('\\w+\\s+\\w+')
1,GP Acoustics,693070511,867618863,2024-11-08,29866.52,418132348758,2024-10-24,2024-10-28,09:38:00,Ground,5,GP Acoustics,,Prepaid,,42.5,43,,,Nautical Fulfillment,Nautical Fulfillment,16100 W 116th St,Lenexa,KS,66219,US,,ISP TECHNOLOGIES LLC,ISP TECHNOLOGIES LLC,5479 PERRY DR,WATERFORD,MI,483294828.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,43.86,-32.85,0.0,0.0,0.89,0,0.0,0.0,Nautical,59062/587120,11.9,0,11.9,Customer Packaging,re.compile('\\w+\\s+\\w+')
2,GP Acoustics,693070511,867618863,2024-11-08,29866.52,418132348769,2024-10-24,2024-10-28,09:38:00,Ground,5,GP Acoustics,,Prepaid,,42.6,43,,,Nautical Fulfillment,Nautical Fulfillment,16100 W 116th St,Lenexa,KS,66219,US,,ISP TECHNOLOGIES LLC,ISP TECHNOLOGIES LLC,5479 PERRY DR,WATERFORD,MI,483294828.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,43.86,-32.85,0.0,0.0,0.89,0,0.0,0.0,Nautical,59062/587120,11.9,0,11.9,Customer Packaging,re.compile('\\w+\\s+\\w+')
3,GP Acoustics,693070511,867618863,2024-11-08,29866.52,418132348747,2024-10-24,2024-10-28,09:38:00,Ground,5,GP Acoustics,,Prepaid,,42.5,43,,,Nautical Fulfillment,Nautical Fulfillment,16100 W 116th St,Lenexa,KS,66219,US,,ISP TECHNOLOGIES LLC,ISP TECHNOLOGIES LLC,5479 PERRY DR,WATERFORD,MI,483294828.0,US,,,0,0,,693070511,,14,13,10,ADVA4456,,43.86,-32.85,0.0,0.0,0.89,0,0.0,0.0,Nautical,59062/587120,11.9,0,11.9,Customer Packaging,re.compile('\\w+\\s+\\w+')
443,GP Acoustics,693070511,868389946,2024-11-15,27353.46,281397637770,2024-11-04,2024-11-06,14:14:00,Ground,5,14371,,Prepaid,,33.6,34,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,DANLEY SOUND LAB,DANLEY SOUND LAB,2160 HILTON DR,GAINESVILLE,GA,305016153.0,US,,,0,0,,693070511,,20,14,14,ADVA4465,,35.46,-26.56,0.0,0.0,0.72,0,0.0,0.0,Nautical,59062/587120,9.62,0,9.62,Customer Packaging,re.compile('\\d+')
444,GP Acoustics,693070511,866891361,2024-11-01,21136.17,280881789594,2024-10-21,2024-10-24,12:23:00,Ground,6,10569,,Prepaid,,12.6,13,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,MUSICAL FULFILLMENT NV,MUSICAL FULFILLMENT NV,450 MAESTRO DR,RENO,NV,895111294.0,US,,,0,0,,693070511,,14,14,14,ADVA4452,,20.95,-15.48,0.0,0.0,0.44,0,0.0,0.0,Nautical,59062/587120,5.91,0,5.91,Customer Packaging,re.compile('\\d+')
1072,GP Acoustics,693070511,866891361,2024-11-01,21136.17,280881537650,2024-10-21,2024-10-23,12:17:00,Ground,4,10556,,Prepaid,,12.5,13,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,MUSICAL FULFILLMENT OH,MUSICAL FULFILLMENT OH,1040 N WYNN RD,OREGON,OH,436161430.0,US,,,0,0,,693070511,,14,14,14,ADVA4452,,17.43,-12.88,0.0,0.0,0.37,0,0.0,0.0,Nautical,59062/587120,4.92,0,4.92,Customer Packaging,re.compile('\\d+')
1140,GP Acoustics,693070511,867618863,2024-11-08,29866.52,280986780422,2024-10-23,2024-10-28,14:01:00,Ground,6,11912,,Prepaid,,21.7,22,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,FUCHS AUDIO TECHNOLOGY,FUCHS AUDIO TECHNOLOGY,407 GETTY AVE,CLIFTON,NJ,70112121.0,US,,,0,0,,693070511,,20,14,14,ADVA4456,,30.27,-22.37,0.0,0.0,0.64,0,0.0,0.0,Nautical,59062/587120,8.54,0,8.54,Customer Packaging,re.compile('\\d+')
1141,GP Acoustics,693070511,867618864,2024-11-08,25739.62,281142406523,2024-10-28,2024-10-30,13:30:00,Ground,4,13390,,Prepaid,,38.2,39,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,ATLAS CUSTOM CABINETS,ATLAS CUSTOM CABINETS,28 SUNDANCE CIR,NEDERLAND,CO,804669526.0,US,,,0,0,,693070511,,24,14,14,ADVA4456,,33.6,-25.16,7.7,0.0,1.77,0,0.0,5.78,Nautical,59062/587120,23.69,0,23.69,Customer Packaging,re.compile('\\d+')
1142,GP Acoustics,693070511,867618864,2024-11-08,25739.62,281142405045,2024-10-28,2024-10-30,13:29:00,Ground,4,13390,,Prepaid,,37.7,38,,,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219,US,,ATLAS CUSTOM CABINETS,ATLAS CUSTOM CABINETS,28 SUNDANCE CIR,NEDERLAND,CO,804669526.0,US,,,0,0,,693070511,,24,14,14,ADVA4456,,32.23,-24.14,7.7,0.0,1.74,0,0.0,5.78,Nautical,59062/587120,23.31,0,23.31,Customer Packaging,re.compile('\\d+')


# MISC