In [261]:
import pandas as pd
import matplotlib.pyplot as plt

# Load your dataset
file_path = "../../data/input/Freight_Cost_Analysis_CY2024-03.25.csv"
df = pd.read_csv(file_path, encoding="latin1", low_memory=False)

#  === Load Commodity Groups ===
# Load the commodity groups from the Excel file
commodity_df = pd.read_excel('../../data/input/IFS Cloud Commodity Groups.xlsx', sheet_name='Commodity Groups')
commodity_df.head()

# Convert 'Commodity Group' to string and create a new column 'COMM 1'
commodity_df['COMM 1'] = commodity_df['Commodity Group'].astype(str)

# Convert 'Commodity Group' to string in the main DataFrame
df['COMM 1'] = df['COMM 1'].astype(str)

# Perform the join on the 'COMM 1' column
merged_df = df.merge(commodity_df, on='COMM 1', how='left')

# Display the first few rows of the merged DataFrame
merged_df.head()

# Replace values in the 'uom' column
merged_df['INV UOM'] = merged_df['INV UOM'].replace({'SF': 'SQFT', 'SY': 'SQYD'})

# Display the updated DataFrame
df = merged_df

# Normalize the 'INV UOM' column to handle case sensitivity and strip spaces
df['INV UOM'] = df['INV UOM'].str.strip().str.upper()

# Add a classification column to the original dataframe
df['Classification'] = df.apply(
    lambda row: 'Classified' if row['INV UOM'] in ['SQFT', 'SQYD'] else 'Unclassified',
    axis=1
)
# Create a new column 'conversion_code' based on the 'Description' column
df['conversion_code'] = df['Description'].str.replace(' ', '_', regex=True).astype(str) + '_' + df['Commodity Group'].astype(str) + '_' + df['INV UOM'].astype(str)
df.head()

Unnamed: 0,SITE,SITE DESCRIPTION,SUPPLIER NO,SUPPLIER NAME,INVOICE ID,INVOICE NO,DATE POSTED,PROJECT ID,PROJECT NAME,ACCOUNT,...,INV UOM,INVOICED LINE QTY,INVOICE LINE TOTAL,PO PRICE,Commodity Group,Description,Old/New,Priority,Classification,conversion_code
0,BNB,Beckers New Brighton,102548,Lonseal Flooring,433731,0007795-CM,3-Jan-24,2311121922,REGIONS HOSPITAL 4TH MRI,5400,...,,,-600.0,,,,,,Unclassified,nan_nan_nan
1,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,EA,4.0,18.4,18.4,1ACC,Accessories,New Commodity,No,Unclassified,Accessories_1ACC_EA
2,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,EA,7.0,45.36,45.36,1TRAN,Transitions,New Commodity,No,Unclassified,Transitions_1TRAN_EA
3,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,5504,...,,,50.0,,,,,,Unclassified,nan_nan_nan
4,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2015,...,,,151.42,,,,,,Unclassified,nan_nan_nan


In [262]:
df.columns

Index(['SITE', 'SITE DESCRIPTION', 'SUPPLIER NO', 'SUPPLIER NAME',
       'INVOICE ID', 'INVOICE NO', 'DATE POSTED', 'PROJECT ID', 'PROJECT NAME',
       'ACCOUNT', 'ACCOUNT DESCRIPTION', 'PLANNED DELIVERY DATE',
       'SHIP TO ZIP', 'PO NO', 'PO LINE NO', 'PO REL NO', 'RECEIPT NO',
       'PART NO', 'PART DESCRIPTION', 'COMM 1', 'COMM 2', 'PO PURCH QTY',
       'PURCH UOM', 'PO INV QTY', 'INV UOM', 'INVOICED LINE QTY',
       'INVOICE LINE TOTAL', 'PO PRICE', 'Commodity Group', 'Description',
       'Old/New', 'Priority', 'Classification', 'conversion_code'],
      dtype='object')

In [263]:
# Filter rows where ACCOUNT is 5504
freight_invoices = df[df['ACCOUNT'] == 5504]

# Check if INVOICE ID exists in the filtered DataFrame
df['Has Matching Account 5504'] = df['INVOICE ID'].isin(freight_invoices['INVOICE ID'])

# Display the result
df[['INVOICE ID', 'Has Matching Account 5504']]

Unnamed: 0,INVOICE ID,Has Matching Account 5504
0,433731,False
1,433340,True
2,433340,True
3,433340,True
4,433340,True
...,...,...
441702,1731885,False
441703,1731996,False
441704,1732937,False
441705,1736749,False


In [264]:
freight_invoices = df[df['Has Matching Account 5504'] == True]
freight_invoices

Unnamed: 0,SITE,SITE DESCRIPTION,SUPPLIER NO,SUPPLIER NAME,INVOICE ID,INVOICE NO,DATE POSTED,PROJECT ID,PROJECT NAME,ACCOUNT,...,INVOICED LINE QTY,INVOICE LINE TOTAL,PO PRICE,Commodity Group,Description,Old/New,Priority,Classification,conversion_code,Has Matching Account 5504
1,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,4.00,18.40,18.40,1ACC,Accessories,New Commodity,No,Unclassified,Accessories_1ACC_EA,True
2,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,7.00,45.36,45.36,1TRAN,Transitions,New Commodity,No,Unclassified,Transitions_1TRAN_EA,True
3,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,5504,...,,50.00,,,,,,Unclassified,nan_nan_nan,True
4,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2015,...,,151.42,,,,,,Unclassified,nan_nan_nan,True
5,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,240.00,715.20,715.20,1VNL,Vinyl,New Commodity,Yes,Classified,Vinyl_1VNL_SQFT,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
441599,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,2008,...,234.67,2905.21,2905.21,1CPT,Carpet Tile,New Commodity,Yes,Classified,Carpet_Tile_1CPT_SQYD,True
441600,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,2008,...,234.67,2905.21,2905.21,1CPT,Carpet Tile,New Commodity,Yes,Classified,Carpet_Tile_1CPT_SQYD,True
441601,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,2008,...,10.00,1320.00,1320.00,1ADH,Adhesive,New Commodity,No,Unclassified,Adhesive_1ADH_EA,True
441602,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,5205,...,,3565.92,,,,,,Unclassified,nan_nan_nan,True


In [265]:
part_account_2008 = freight_invoices[freight_invoices['ACCOUNT'] == 2008]
part_account_2008

Unnamed: 0,SITE,SITE DESCRIPTION,SUPPLIER NO,SUPPLIER NAME,INVOICE ID,INVOICE NO,DATE POSTED,PROJECT ID,PROJECT NAME,ACCOUNT,...,INVOICED LINE QTY,INVOICE LINE TOTAL,PO PRICE,Commodity Group,Description,Old/New,Priority,Classification,conversion_code,Has Matching Account 5504
1,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,4.00,18.40,18.40,1ACC,Accessories,New Commodity,No,Unclassified,Accessories_1ACC_EA,True
2,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,7.00,45.36,45.36,1TRAN,Transitions,New Commodity,No,Unclassified,Transitions_1TRAN_EA,True
5,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,240.00,715.20,715.20,1VNL,Vinyl,New Commodity,Yes,Classified,Vinyl_1VNL_SQFT,True
6,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,60.00,128.40,128.40,1BASE,Base,New Commodity,No,Unclassified,Base_1BASE_LF,True
7,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,1.00,32.47,32.47,82,Vinyl Accessories,Old Commodity,No,Unclassified,Vinyl_Accessories_82_EA,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
441598,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,2008,...,234.67,2905.21,2905.21,1CPT,Carpet Tile,New Commodity,Yes,Classified,Carpet_Tile_1CPT_SQYD,True
441599,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,2008,...,234.67,2905.21,2905.21,1CPT,Carpet Tile,New Commodity,Yes,Classified,Carpet_Tile_1CPT_SQYD,True
441600,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,2008,...,234.67,2905.21,2905.21,1CPT,Carpet Tile,New Commodity,Yes,Classified,Carpet_Tile_1CPT_SQYD,True
441601,SPWV,Spectra W Valley Cty,121550,Mohawk Industries,1749432,9500882374,25-Mar-25,2412198745,South Town Mall - Forever Young Sto,2008,...,10.00,1320.00,1320.00,1ADH,Adhesive,New Commodity,No,Unclassified,Adhesive_1ADH_EA,True


In [266]:
template_columns = ['PO NO','ACCOUNT', 'ACCOUNT DESCRIPTION', 'SITE','SITE DESCRIPTION','SUPPLIER NO', 'SUPPLIER NAME', 'PART NO', 'PART DESCRIPTION',
                    'INVOICED LINE QTY','INVOICE ID', 'INVOICE NO','INV UOM','COMM 1','COMM 2',
                    'Commodity Group', 'Description',
       'Old/New', 'Priority', 'Classification', 'conversion_code']

In [267]:
# Define the mapping of template columns to the desired column names
column_mapping = {
    'PO NO': 'po_no',
    'INVOICE ID': 'invoice_id',
    'INVOICE NO': 'invoice_no',
    'ACCOUNT':'account', 
    'ACCOUNT DESCRIPTION':'account_description',
    'SITE': 'siteid',
    'SITE DESCRIPTION': 'site',
    'SUPPLIER NO': 'supplierid',
    'SUPPLIER NAME': 'suppliername',
    'INVOICED LINE QTY': 'quantity',
    'PART NO': 'partnumber',
    'PART DESCRIPTION':'partdescription',
    'COMM 1': 'comm1',
    'COMM 2': 'comm2',
    'Commodity Group': 'commodity_group',
    'Description': 'commoditydescription',
    'INV UOM': 'uom',
    'Priority':'priority', 
    'Classification': 'classification',
    'conversion_code': 'conversion_code',
    'Old/New': 'old_new'
}

# Rename the columns in the DataFrame
mapped_df = part_account_2008[template_columns].rename(columns=column_mapping)

# Display the first few rows of the mapped DataFrame
mapped_df.head()

Unnamed: 0,po_no,account,account_description,siteid,site,supplierid,suppliername,partnumber,partdescription,quantity,...,invoice_no,uom,comm1,comm2,commodity_group,commoditydescription,old_new,priority,classification,conversion_code
1,48180,2008,Received Not Yet Invoiced,BNB,Beckers New Brighton,104716,Hank's Specialties,1000007968,PROTECT ALL PRE-NOTCHED Z-BAR INSIDE CORNER AL...,4.0,...,173373,EA,1ACC,1ACC,1ACC,Accessories,New Commodity,No,Unclassified,Accessories_1ACC_EA
2,48180,2008,Received Not Yet Invoiced,BNB,Beckers New Brighton,104716,Hank's Specialties,1000008162,PROTECT ALL Z BAR DARK GRAY VINYL COVE CAP 8' ...,7.0,...,173373,EA,1TRAN,2ALL,1TRAN,Transitions,New Commodity,No,Unclassified,Transitions_1TRAN_EA
5,48180,2008,Received Not Yet Invoiced,BNB,Beckers New Brighton,104716,Hank's Specialties,1000008241,Protect All Classic 5ft x 8ft Matte - Dark Gra...,240.0,...,173373,SQFT,1VNL,2ROLL,1VNL,Vinyl,New Commodity,Yes,Classified,Vinyl_1VNL_SQFT
6,48180,2008,Received Not Yet Invoiced,BNB,Beckers New Brighton,104716,Hank's Specialties,200204-001,Protect All Specialty Flooring Vinyl Cove Base...,60.0,...,173373,LF,1BASE,2VNL,1BASE,Base,New Commodity,No,Unclassified,Base_1BASE_LF
7,48180,2008,Received Not Yet Invoiced,BNB,Beckers New Brighton,104716,Hank's Specialties,1000007963,PROTECT ALL NON RAPID WELD APPLICATIONS SEAM S...,1.0,...,173373,EA,82,,82,Vinyl Accessories,Old Commodity,No,Unclassified,Vinyl_Accessories_82_EA


In [268]:
mapped_df.to_csv('../../data/output/part_account_20083.csv', index=False)
# Display the first few rows of the filtered DataFrame  