In [46]:
import pandas as pd
import re as re
import numpy as np

from processing import invoice_data, qbo, whill, gp_acoustics, amt

pd.set_option('display.max_columns', 500)

## **Invoice Data** <-> **QBO**

### - Compare **Invoice Data** [`Customer PO #`] to **QBO** [`Display_Name`]

In [47]:
def compare_qbo(qbo: pd.DataFrame, invoice_data: pd.DataFrame) -> pd.DataFrame:
    """
    Function: Compares FedEx invoice with QuickBooks via keys 'Customer PO #' and 'Display_Name'
    Input: Original QuickBooks and FedEx Invoice file
    Output: Pandas DataFrame with values not found in QuickBooks
    """

    # ? is 'Display_Name' the only key to compare against?

    qbo_found = pd.merge(
        qbo, invoice_data, right_on="Customer PO #", left_on="Display_Name", how="inner", suffixes=['_qbo','_invoice_data']
    )

    lst = set()
    for i in invoice_data["Customer PO #"]:
        if i not in list(qbo_found["Display_Name"].unique()):
            lst.add(i)

    qbo_not_found = pd.DataFrame()
    qbo_not_found["Customer PO #"] = pd.DataFrame(lst)
    qbo_not_found = qbo_not_found.merge(
        invoice_data,
        on="Customer PO #",
        how="left",
    )
    return qbo_found, qbo_not_found

In [48]:
# Run 
qbo_found, qbo_not_found = compare_qbo(qbo, invoice_data)

In [49]:
# Test
print(f"PO's in QBO: {len(qbo_found['Customer PO #'].unique())}")

PO's in QBO: 6


In [50]:
# Test
len(qbo_not_found['Customer PO #'].unique())
print(f"PO's in QBO: {len(qbo_not_found['Customer PO #'].unique())}")

PO's in QBO: 84


## **Invoice Data** [`Reference`] <-> **Extensiv** [`Reference`]

### - For all **Invoice Data** [`Customer PO #`] not in **QBO** [`Display Name`]

In [51]:

def reg_tokenizer(value):
    
    # Add pattern tokens to FedEx invoice table in a new column called "Reference"
    with_letters = re.sub(r"[a-zA-Z]+", r"\\w+", str(value))
    with_numbers = re.sub(r"\d+", r"\\d+", with_letters)
    with_spaces = re.sub(r"\s+", r"\\s+", with_numbers)

    final = re.compile(with_spaces)

    return final


In [52]:
# Run it

invoice_data['Pattern'] = invoice_data['Reference'].apply(reg_tokenizer)


In [55]:
def find_extensiv_reference_columns(extensiv_table: pd.DataFrame, invoice_data_w_patterns: pd.DataFrame) -> dict:  # fmt: skip
    """
    Function: Finds all of the columns in the Extensiv table that match each 'Reference' in FedEx Invoice not in QBO
    Input: Extensiv DataFrame, FedEx Invoice DataFrame w/ added 'Pattern' column
    Ouput: Dictionary {'match_lst': list of Extensiv columns that match 'Reference' pattern,
                       'Total Charges': Charges associated with that 'Reference' in FedEx Invoice,
                       'Tracking #': Tracking number associated with that 'Reference' in FedEx Invoice}
    Notes: May not need Total Charges and Tracking # in the end
    """

    def find_col_match(extensiv_table: pd.DataFrame, ref_pattern: pd.Series) -> list:
        """
        Function: Subfunction to iterate through each of the patterns in FedEx Invoice
        Input: Extensiv DataFrame, FedEx Reference patterns as a Series in a for loop
        Ouput: List of columns that match given Reference pattern
        """
        col_lst = set()

        for col in extensiv_table.columns:

            for value in extensiv_table[col][:25]:

                if re.fullmatch(ref_pattern, str(value)):

                    col_lst.add(col.strip())

        if col_lst:
            return col_lst
        
    match_dct = dict()
    suffix = 0

    for i, v in enumerate(invoice_data_w_patterns["Reference"]):

        if i != 0 and v == invoice_data_w_patterns["Reference"][i - 1]:

            suffix += 1
            v = f"{v}-s{suffix}"

        elif i != 0 and v != invoice_data_w_patterns["Reference"][i - 1]:
            suffix = 0
        else:
            continue

        match_lst = find_col_match(
            extensiv_table, invoice_data_w_patterns["Pattern"][i]
        )

        if match_lst is not None and not pd.isna(v):

            match_dct[v] = {
                "match_lst": match_lst,
                "Tracking #": (invoice_data_w_patterns["Tracking #"][i]),
            }

    return match_dct

### - Find Extensiv Reference Columns

In [56]:
gp_reference_columns = find_extensiv_reference_columns(gp_acoustics, qbo_not_found)
amt_reference_columns = find_extensiv_reference_columns(amt, qbo_not_found)
whill_reference_columns = find_extensiv_reference_columns(whill, qbo_not_found)

In [63]:
# VSCode Version of this Code

def find_value_match(extensiv_table: pd.DataFrame, reference_matches: dict) -> pd.DataFrame:

    match_lst = list()

    for reference in reference_matches:
        
        matches = reference_matches[reference]["match_lst"]
        tracking_number = reference_matches[reference]["Tracking #"]

        for col in extensiv_table[list[matches]]:

                for i, val in enumerate(extensiv_table[col]):

                    base_reference = re.sub(r"-s\d+$", "", str(reference))

                    if val == reference or val == base_reference:
    
                            match_entry = {
                                "Reference": base_reference,
                                "Name": extensiv_table["CustomerIdentifier.Name"][i],
                                "Column": col,
                                "Tracking #": tracking_number,
                            }

                            if match_entry not in match_lst:
                                match_lst.append(match_entry)

    
    return match_lst

In [67]:
gp_reference_matches = find_value_match(gp_acoustics,gp_reference_columns)
amt_reference_matches = find_value_match(amt,amt_reference_columns)
whill_reference_matches = find_value_match(whill,gp_reference_columns)

KeyboardInterrupt: 

In [62]:
gp_reference_matches

[{'Reference': '15116',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Tracking #': 281752495934},
 {'Reference': '10558',
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Tracking #': 280881667358},
 {'Reference': 'GP Acoustics',
  'Name': 'GP Acoustics',
  'Column': 'CustomerIdentifier.Name',
  'Tracking #': 418132348770},
 {'Reference': 'GP Acoustics',
  'Name': 'GP Acoustics',
  'Column': 'ShipTo.CompanyName',
  'Tracking #': 418132348770},
 {'Reference': 'GP Acoustics',
  'Name': 'GP Acoustics',
  'Column': 'ShipTo.Name',
  'Tracking #': 418132348770},
 {'Reference': 'GP Acoustics',
  'Name': 'GP Acoustics',
  'Column': 'CustomerIdentifier.Name',
  'Tracking #': 418132348758},
 {'Reference': 'GP Acoustics',
  'Name': 'GP Acoustics',
  'Column': 'ShipTo.CompanyName',
  'Tracking #': 418132348758},
 {'Reference': 'GP Acoustics',
  'Name': 'GP Acoustics',
  'Column': 'ShipTo.Name',
  'Tracking #': 418132348758},
 {'Reference': 'GP Acoustics',
  'Name': 'GP Acoustics',
  'Colu

In [37]:
def create_extensiv_receiver_info(extensiv_table: pd.DataFrame) -> dict:

    extensiv_receiver_info = extensiv_table[
        [
            "ShipTo.CompanyName",
            "ShipTo.Name",
            "ShipTo.Address1",
            "CustomerIdentifier.Name",
        ]
    ]

    extensiv_receiver_info_nd = extensiv_receiver_info.drop_duplicates(
        [
            "ShipTo.CompanyName",
            "ShipTo.Name",
            "ShipTo.Address1",
            "CustomerIdentifier.Name",
        ]
    )

    extensiv_receiver_dct = dict()

    for i, row in extensiv_receiver_info_nd.iterrows():

        extensiv_receiver_dct[i] = {
            "Receiver Address": row["ShipTo.Address1"],
            "Receiver Company": row["ShipTo.CompanyName"],
            "Receiver Name": row["ShipTo.Name"],
            "Customer Identifier": row["CustomerIdentifier.Name"],
        }

    return extensiv_receiver_dct


def create_invoice_data_receiver_info(invoice_data: pd.DataFrame) -> dict:  # fmt: skip

    invoice_data_dct = {}

    for i, row in invoice_data.iterrows():

        invoice_data_dct[i] = {
            "Receiver Address": row["Receiver Address"],
            "Receiver Company": row["Receiver Company"],
            "Receiver Name": row["Receiver Name"],
            "Tracking #": row["Tracking #"],
        }

    return invoice_data_dct

def compare_receiver_info(invoice_data_receiver_info: dict, extensiv_receiver_info: dict) -> list:  # fmt: skip

    match_entry = dict()
    match_lst = list()

    for i in invoice_data_receiver_info:

        for e in extensiv_receiver_info:

            if (
                invoice_data_receiver_info[i]["Receiver Address"]
                == extensiv_receiver_info[e]["Receiver Address"]
                or invoice_data_receiver_info[i]["Receiver Name"]
                == extensiv_receiver_info[e]["Receiver Name"]
                or invoice_data_receiver_info[i]["Receiver Company"]
                == extensiv_receiver_info[e]["Receiver Company"]
            ):

                match_entry = {
                    "Address": invoice_data_receiver_info[i]["Receiver Address"],
                    "Name": invoice_data_receiver_info[i]["Receiver Name"],
                    "Company": invoice_data_receiver_info[i]["Receiver Company"],
                    "Customer": extensiv_receiver_info[e]["Customer Identifier"],
                }

                if match_entry not in match_lst:
                    match_lst.append(match_entry)
    if match_lst:
        return match_lst



In [38]:
gp_receiver_info = create_extensiv_receiver_info(gp_acoustics)
amt_receiver_info = create_extensiv_receiver_info(amt)
whill_receiver_info = create_extensiv_receiver_info(whill)
invoice_data_receiver_info = create_invoice_data_receiver_info(invoice_data)

In [40]:
def make_final_df(reference_matches, receiver_matches, invoice_data_not_qbo):

    try:

        final_matches_lst = []
        final_matches_lst.extend(reference_matches)
        final_matches_lst.extend(receiver_matches)
        
        for i, row in invoice_data_not_qbo.iterrows():

            for dct in final_matches_lst:
                

                if "Reference" in dct and dct["Reference"] == row["Reference"]:
                    invoice_data_not_qbo.loc[i, "Customer PO #"] = dct["Name"]
                elif "Address" in dct and dct["Address"] == row["Receiver Address"]:
                    invoice_data_not_qbo.loc[i, "Customer PO #"] = dct["Customer"]
                elif "Name" in dct and dct["Name"] == row["Receiver Name"]:
                    invoice_data_not_qbo.loc[i, "Customer PO #"] = dct["Customer"]
                elif "Company" in dct and dct["Company"] == row["Receiver Company"]:
                    invoice_data_not_qbo.loc[i, "Customer PO #"] = dct["Customer"]
                elif "Reference" in dct:
                    try:
                        dct["Reference"] = int(dct["Reference"])
                    except ValueError:
                        pass
    
    except TypeError:
        pass


    return invoice_data_not_qbo

In [41]:
gp_receiver_matches = compare_receiver_info(
    invoice_data_receiver_info, gp_receiver_info
)
amt_receiver_matches = compare_receiver_info(
    invoice_data_receiver_info, amt_receiver_info
)
whill_receiver_matches = compare_receiver_info(
    invoice_data_receiver_info, whill_receiver_info
)
final_df = make_final_df(
    gp_reference_matches, gp_receiver_matches, qbo_not_found
)
final_df = make_final_df(
    amt_reference_matches, amt_receiver_matches, qbo_not_found
)
final_df = make_final_df(
    whill_reference_matches, whill_receiver_matches, qbo_not_found
)

# del final_df["Pattern"]

In [42]:
gp_reference_matches

[{'Reference': 10554,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 6.54,
  'Tracking #': 280881245477},
 {'Reference': 10560,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 8.21,
  'Tracking #': 280881903399},
 {'Reference': 11523,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 19.93,
  'Tracking #': 280791161446},
 {'Reference': 10556,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 4.92,
  'Tracking #': 280881537650},
 {'Reference': 11934,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 10.85,
  'Tracking #': 280986974282},
 {'Reference': 10564,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 5.18,
  'Tracking #': 280882000233},
 {'Reference': 11912,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 8.54,
  'Tracking #': 280986780422},
 {'Reference': 10566,
  'Name': 'GP Acoustics',
  'Column': 'OrderId',
  'Total Charges': 9.56,
  'Tracking #

In [43]:
final_df[final_df['Customer PO #'] == 'GP Acoustics']

Unnamed: 0,Customer PO #,Shipper #,Invoice #,Invoice Date,Invoice Amount,Tracking #,Ship Date,Delivery Date,Delivery Time,Service Level,Zone,Reference,Department,Bill Option,Actual Weight,Bill Weight,Shipper Name,Shipper Company,Shipper Address,Shipper City,Shipper State,Shipper Postal Code,Shipper Country,Receiver Name,Receiver Company,Receiver Address,Receiver City,Receiver State,Receiver Postal Code,Receiver Country,Declared Value,Customs Value,Shipper Account,Reference 2,Length,Width,Height,Control #,Published Amount,Discounted Amount,Residential,DAS,Fuel,Saturday Delivery,Add'd Handling,Misc. Charges,Client,Coding,Total Charges,Adjusted Amount,Audited Amount,Service Packaging,Source,Pattern
67,GP Acoustics,693070511.0,866891360.0,2024-11-01,24182.11,280881200000.0,2024-10-21,2024-10-23,11:33:00,Ground,4.0,10554,,Prepaid,24.1,25.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,MUSICAL FULFILLMENT MS,MUSICAL FULFILLMENT MS,12914 STATELINE RD,OLIVE BRANCH,MS,386544017.0,US,0.0,0.0,693070511.0,,20.0,14.0,14.0,ADVA4452,23.16,-17.11,0.0,0.0,0.49,0.0,0.0,0.0,Nautical,59062/587120,6.54,0.0,6.54,Customer Packaging,Invoice Data,re.compile('\\d+')
694,GP Acoustics,693070511.0,866891360.0,2024-11-01,24182.11,280881900000.0,2024-10-21,2024-10-24,12:23:00,Ground,6.0,10560,,Prepaid,20.4,21.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,MUSICAL FULFILLMENT NV,MUSICAL FULFILLMENT NV,450 MAESTRO DR,RENO,NV,895111294.0,US,0.0,0.0,693070511.0,,14.0,14.0,14.0,ADVA4452,29.09,-21.49,0.0,0.0,0.61,0.0,0.0,0.0,Nautical,59062/587120,8.21,0.0,8.21,Customer Packaging,Invoice Data,re.compile('\\d+')
1202,GP Acoustics,693070511.0,866891360.0,2024-11-01,24182.11,280791200000.0,2024-10-18,2024-10-21,13:10:00,Ground,7.0,11523,,Prepaid,13.3,14.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,ADI TEJADA,ADI TEJADA,403 S H ST,LOMPOC,CA,934367812.0,US,0.0,0.0,693070511.0,,14.0,14.0,14.0,ADVA4452,27.54,-20.35,5.7,5.7,1.49,0.0,0.0,-0.15,Nautical,59062/587120,19.93,0.0,19.93,Customer Packaging,Invoice Data,re.compile('\\d+')
1204,GP Acoustics,693070511.0,866891361.0,2024-11-01,21136.17,280881500000.0,2024-10-21,2024-10-23,12:17:00,Ground,4.0,10556,,Prepaid,12.5,13.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,MUSICAL FULFILLMENT OH,MUSICAL FULFILLMENT OH,1040 N WYNN RD,OREGON,OH,436161430.0,US,0.0,0.0,693070511.0,,14.0,14.0,14.0,ADVA4452,17.43,-12.88,0.0,0.0,0.37,0.0,0.0,0.0,Nautical,59062/587120,4.92,0.0,4.92,Customer Packaging,Invoice Data,re.compile('\\d+')
1240,GP Acoustics,693070511.0,866891361.0,2024-11-01,21136.17,280987000000.0,2024-10-23,2024-10-24,16:20:00,Ground,2.0,11934,,Prepaid,9.0,9.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,ALEX GILPIN,ALEX GILPIN,1809 S VASSAR AVE,INDEPENDENCE,MO,640524053.0,US,0.0,0.0,693070511.0,,14.0,14.0,14.0,ADVA4452,13.97,-9.48,5.55,0.0,0.81,0.0,0.0,0.0,Nautical,59062/587120,10.85,0.0,10.85,Customer Packaging,Invoice Data,re.compile('\\d+')
1241,GP Acoustics,693070511.0,866891361.0,2024-11-01,21136.17,280882000000.0,2024-10-21,2024-10-24,11:21:00,Ground,6.0,10564,,Prepaid,7.0,8.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,MUSICAL FULFILMENT SERVICES,MUSICAL FULFILMENT SERVICES,8 THORNTON RD,OAKLAND,NJ,74363116.0,US,0.0,0.0,693070511.0,,14.0,14.0,14.0,ADVA4452,18.35,-13.56,0.0,0.0,0.39,0.0,0.0,0.0,Nautical,59062/587120,5.18,0.0,5.18,Customer Packaging,Invoice Data,re.compile('\\d+')
1242,GP Acoustics,693070511.0,867618863.0,2024-11-08,29866.52,280986800000.0,2024-10-23,2024-10-28,14:01:00,Ground,6.0,11912,,Prepaid,21.7,22.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,FUCHS AUDIO TECHNOLOGY,FUCHS AUDIO TECHNOLOGY,407 GETTY AVE,CLIFTON,NJ,70112121.0,US,0.0,0.0,693070511.0,,20.0,14.0,14.0,ADVA4456,30.27,-22.37,0.0,0.0,0.64,0.0,0.0,0.0,Nautical,59062/587120,8.54,0.0,8.54,Customer Packaging,Invoice Data,re.compile('\\d+')
1243,GP Acoustics,693070511.0,866891361.0,2024-11-01,21136.17,280882000000.0,2024-10-21,2024-10-24,11:21:00,Ground,6.0,10566,,Prepaid,24.9,25.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,MUSICAL FULFILMENT SERVICES,MUSICAL FULFILMENT SERVICES,8 THORNTON RD,OAKLAND,NJ,74363116.0,US,0.0,0.0,693070511.0,,26.0,14.0,14.0,ADVA4452,33.87,-25.03,0.0,0.0,0.72,0.0,0.0,0.0,Nautical,59062/587120,9.56,0.0,9.56,Customer Packaging,Invoice Data,re.compile('\\d+')
1246,GP Acoustics,693070511.0,866891362.0,2024-11-01,37778.6,280768300000.0,2024-10-17,2024-10-21,13:56:00,Ground,6.0,11397,,Prepaid,24.3,25.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,RELIC MUSIC LLC,RELIC MUSIC LLC,13 MONMOUTH ST,RED BANK,NJ,77011613.0,US,0.0,0.0,693070511.0,,20.0,14.0,14.0,ADVA4452,33.87,-25.03,0.0,0.0,0.72,0.0,0.0,0.0,Nautical,59062/587120,9.56,0.0,9.56,Customer Packaging,Invoice Data,re.compile('\\d+')
1247,GP Acoustics,693070511.0,867618864.0,2024-11-08,25739.62,281058900000.0,2024-10-25,2024-10-29,11:41:00,Ground,5.0,13157,,Prepaid,25.9,26.0,Nautical - Renner,Nautical - Renner,16100 W. 116th St,Lenexa,KS,66219.0,US,WORTH WEAVER,MOJOTONE,137 WORTH BEVERAGE DR,BURGAW,NC,284250000.0,US,0.0,0.0,693070511.0,,24.0,12.0,12.0,ADVA4456,28.87,-21.33,0.0,1.15,0.7,0.0,0.0,0.0,Nautical,59062/587120,9.39,0.0,9.39,Customer Packaging,Invoice Data,re.compile('\\d+')
