In [33]:
import os
input_path = "../data/input"

# Check if the directory exists
if os.path.exists(input_path):
    # List all files
    files_in_input = os.listdir(input_path)
    
    print("📁 Files in 'data/input':")
    for file in files_in_input:
        print("•", file)
else:
    print(f"❌ Directory '{input_path}' does not exist.")

📁 Files in 'data/input':
• accounts_to_exclude.xlsx
• Freight_Cost_Analysis_CY2024-03.25.csv
• Freight_Data_Example.csv
• freight_results_20250407_014948.csv
• IFS Cloud Commodity Groups.xlsx
• Manual Lines.xlsx
• Southeast Freight Audit 3-4-25 - V2.xlsx
• XGS rates .xlsx


In [34]:
import pandas as pd
import matplotlib.pyplot as plt

# Load your dataset
file_path = "../data/input/Freight_Cost_Analysis_CY2024-03.25.csv"
df = pd.read_csv(file_path, encoding="latin1", low_memory=False)

# === Flag if a PO has any 'PROJECT Freight' using ACCOUNT code 5504 ===
df['IS_PROJECT_FREIGHT_LINE'] = df['ACCOUNT'] == 5504
project_freight_flag = df[df['IS_PROJECT_FREIGHT_LINE']].groupby('PO NO').size().reset_index(name='PROJECT_FREIGHT_COUNT')
project_freight_flag['PO_HAS_PROJECT_FREIGHT'] = True
df = df.merge(project_freight_flag[['PO NO', 'PO_HAS_PROJECT_FREIGHT']], on='PO NO', how='left')
df['PO_HAS_PROJECT_FREIGHT'] = df['PO_HAS_PROJECT_FREIGHT'].fillna(False).astype(bool)

# === Step 1: Freight Spend Profiling ===
freight_lines = df[df['IS_PROJECT_FREIGHT_LINE']]

total_po_value = df.groupby('PO NO', as_index=False)['INVOICE LINE TOTAL'].sum().rename(
    columns={'INVOICE LINE TOTAL': 'TOTAL_PO_VALUE'}
)
freight_cost = freight_lines.groupby('PO NO', as_index=False)['INVOICE LINE TOTAL'].sum().rename(
    columns={'INVOICE LINE TOTAL': 'PROJECT_FREIGHT_COST'}
)

df = df.merge(total_po_value, on='PO NO', how='left')
df = df.merge(freight_cost, on='PO NO', how='left')

# Convert to numeric types
df['TOTAL_PO_VALUE'] = pd.to_numeric(df['TOTAL_PO_VALUE'], errors='coerce')
df['PROJECT_FREIGHT_COST'] = pd.to_numeric(df['PROJECT_FREIGHT_COST'], errors='coerce').fillna(0)

# Calculations with enforced numeric types
df['PRODUCT_ONLY_PO_VALUE'] = df['TOTAL_PO_VALUE'] - df['PROJECT_FREIGHT_COST']
df['PRODUCT_ONLY_PO_VALUE'] = pd.to_numeric(df['PRODUCT_ONLY_PO_VALUE'], errors='coerce')

df['PROJECT_FREIGHT_PERCENT'] = df['PROJECT_FREIGHT_COST'] / df['TOTAL_PO_VALUE']
df['PROJECT_FREIGHT_PERCENT'] = pd.to_numeric(df['PROJECT_FREIGHT_PERCENT'], errors='coerce')

df['FREIGHT_PERCENT_EXCL_PRODUCT'] = df['PROJECT_FREIGHT_COST'] / df['PRODUCT_ONLY_PO_VALUE']
df['FREIGHT_PERCENT_EXCL_PRODUCT'] = pd.to_numeric(df['FREIGHT_PERCENT_EXCL_PRODUCT'], errors='coerce')

# Flags
df['FREIGHT_≥90%_OF_PO'] = (df['PROJECT_FREIGHT_PERCENT'] >= 0.9).astype(bool)
df['FREIGHT_GT_PRODUCT'] = (df['PROJECT_FREIGHT_COST'] > df['PRODUCT_ONLY_PO_VALUE']).astype(bool)
df['NEGATIVE_FREIGHT_PERCENT'] = (df['PROJECT_FREIGHT_PERCENT'] < 0).astype(bool)

# Count number of ACCOUNT 5504 lines per PO
freight_line_counts = df[df['ACCOUNT'] == 5504].groupby('PO NO').agg(
    PROJECT_FREIGHT_LINE_COUNT=('ACCOUNT', 'count')
).reset_index()

df = df.merge(freight_line_counts, on='PO NO', how='left')
df['PROJECT_FREIGHT_LINE_COUNT'] = df['PROJECT_FREIGHT_LINE_COUNT'].fillna(0).astype(int)
df['PO_HAS_MULTIPLE_PROJECT_FREIGHT_LINES'] = df['PROJECT_FREIGHT_LINE_COUNT'] > 1

# === Step 2: ZIP & Supplier Analysis ===
df['SHIP TO ZIP'] = df['SHIP TO ZIP'].astype(str).str.extract(r'(\d{5})')
freight_df = df[df['PROJECT_FREIGHT_COST'] > 0]
zip_supplier_summary = freight_df.groupby(['SHIP TO ZIP', 'SUPPLIER NO']).agg(
    AVG_FREIGHT_PERCENT=('PROJECT_FREIGHT_PERCENT', 'mean'),
    PO_COUNT=('PO NO', 'nunique')
).reset_index()
zip_supplier_summary['HIGH_FREIGHT_FLAG'] = zip_supplier_summary['AVG_FREIGHT_PERCENT'] > 0.5
df = df.merge(zip_supplier_summary[['SHIP TO ZIP', 'SUPPLIER NO', 'HIGH_FREIGHT_FLAG']],
              on=['SHIP TO ZIP', 'SUPPLIER NO'], how='left')

# === Step 3: Product Analysis ===
labor_keywords = ['LABOR', 'INSTALL', 'SERVICE', 'WAGE', 'CONTRACT', 'EMPLOYEE']
df['PART DESCRIPTION CLEAN'] = df['PART DESCRIPTION'].astype(str).str.upper()
df['IS_LABOR'] = df['PART DESCRIPTION CLEAN'].apply(lambda desc: any(k in desc for k in labor_keywords))
product_counts = df[~df['IS_LABOR'] & df['PART DESCRIPTION'].notna()].groupby(
    'PART DESCRIPTION'
).size().reset_index(name='PRODUCT_ORDER_COUNT')
product_counts['PRODUCT_ORDER_RANK'] = product_counts['PRODUCT_ORDER_COUNT'].rank(method='dense', ascending=False).astype(int)
df = df.merge(product_counts, on='PART DESCRIPTION', how='left')

# === Step 4: UOM Consistency ===
def compare_uom(row):
    if pd.isna(row['INV UOM']):
        return 'Missing INV UOM'
    elif row['PURCH UOM'] == row['INV UOM']:
        return 'Match'
    else:
        return 'Mismatch'
df['UOM_COMPARISON_STATUS'] = df.apply(compare_uom, axis=1)

# === Step 5: Key Metrics Summary ===
po_summary = df[['PO NO', 'PROJECT_FREIGHT_COST', 'PRODUCT_ONLY_PO_VALUE']].drop_duplicates().dropna()
po_summary['FREIGHT_PERCENT_EXCL_PRODUCT'] = po_summary['PROJECT_FREIGHT_COST'] / po_summary['PRODUCT_ONLY_PO_VALUE']
pct_le_10 = (po_summary['FREIGHT_PERCENT_EXCL_PRODUCT'] <= 0.10).mean() * 100
pct_gt_50 = (po_summary['FREIGHT_PERCENT_EXCL_PRODUCT'] > 0.50).mean() * 100
print(f"% of POs with freight ≤ 10% of product spend: {pct_le_10:.2f}%")
print(f"% of POs with freight > 50% of product spend: {pct_gt_50:.2f}%")

# === Step 6: Composite Key Consistency ===
df['PO_INVOICE_COMPOSITE_KEY'] = df['PO NO'].astype(str) + '|' + df['INVOICE ID'].astype(str) + '|' + df['INVOICE NO'].astype(str)
composite_counts = df.groupby('PO NO')['PO_INVOICE_COMPOSITE_KEY'].nunique().reset_index()
composite_counts['PO_COMPOSITE_KEY_CONSISTENCY'] = composite_counts['PO_INVOICE_COMPOSITE_KEY'].apply(
    lambda x: 'Consistent' if x == 1 else 'Inconsistent'
)
df = df.merge(composite_counts[['PO NO', 'PO_COMPOSITE_KEY_CONSISTENCY']], on='PO NO', how='left')

# === Step 7: Quantity Consistency Check ===
# Ensure numeric comparisons
df['INVOICED LINE QTY'] = pd.to_numeric(df['INVOICED LINE QTY'], errors='coerce')
df['PO PURCH QTY'] = pd.to_numeric(df['PO PURCH QTY'], errors='coerce')
df['PO INV QTY'] = pd.to_numeric(df['PO INV QTY'], errors='coerce')

# Compare all three quantities
df['QTY_CONSISTENCY_FLAG'] = (
    (df['INVOICED LINE QTY'] == df['PO PURCH QTY']) &
    (df['PO PURCH QTY'] == df['PO INV QTY'])
)

# === Final Type Cleanup Before Export ===
numeric_cols = [
    'TOTAL_PO_VALUE', 'PROJECT_FREIGHT_COST', 'PROJECT_FREIGHT_PERCENT',
    'PRODUCT_ONLY_PO_VALUE', 'FREIGHT_PERCENT_EXCL_PRODUCT'
]

for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce').round(6)

# === Export enriched dataset to data/output/ ===
output_path = "../data/output/Freight_Analysis_Enriched_Output.csv"
df.to_csv(output_path, index=False, float_format='%.6f')
print(f"\n✅ Enriched dataset exported to: {output_path}")


  df['PO_HAS_PROJECT_FREIGHT'] = df['PO_HAS_PROJECT_FREIGHT'].fillna(False).astype(bool)


% of POs with freight ≤ 10% of product spend: 87.67%
% of POs with freight > 50% of product spend: 2.61%

✅ Enriched dataset exported to: ../data/output/Freight_Analysis_Enriched_Output.csv


In [35]:
df.columns


Index(['SITE', 'SITE DESCRIPTION', 'SUPPLIER NO', 'SUPPLIER NAME',
       'INVOICE ID', 'INVOICE NO', 'DATE POSTED', 'PROJECT ID', 'PROJECT NAME',
       'ACCOUNT', 'ACCOUNT DESCRIPTION', 'PLANNED DELIVERY DATE',
       'SHIP TO ZIP', 'PO NO', 'PO LINE NO', 'PO REL NO', 'RECEIPT NO',
       'PART NO', 'PART DESCRIPTION', 'COMM 1', 'COMM 2', 'PO PURCH QTY',
       'PURCH UOM', 'PO INV QTY', 'INV UOM', 'INVOICED LINE QTY',
       'INVOICE LINE TOTAL', 'PO PRICE', 'IS_PROJECT_FREIGHT_LINE',
       'PO_HAS_PROJECT_FREIGHT', 'TOTAL_PO_VALUE', 'PROJECT_FREIGHT_COST',
       'PRODUCT_ONLY_PO_VALUE', 'PROJECT_FREIGHT_PERCENT',
       'FREIGHT_PERCENT_EXCL_PRODUCT', 'FREIGHT_≥90%_OF_PO',
       'FREIGHT_GT_PRODUCT', 'NEGATIVE_FREIGHT_PERCENT',
       'PROJECT_FREIGHT_LINE_COUNT', 'PO_HAS_MULTIPLE_PROJECT_FREIGHT_LINES',
       'HIGH_FREIGHT_FLAG', 'PART DESCRIPTION CLEAN', 'IS_LABOR',
       'PRODUCT_ORDER_COUNT', 'PRODUCT_ORDER_RANK', 'UOM_COMPARISON_STATUS',
       'PO_INVOICE_COMPOSITE_KEY',

In [36]:
#  === Load Commodity Groups ===
# Load the commodity groups from the Excel file
commodity_df = pd.read_excel('../data/input/IFS Cloud Commodity Groups.xlsx', sheet_name='Commodity Groups')
commodity_df.head()

Unnamed: 0,Commodity Group,Description,Old/New,Priority
0,0,Zero Cost,Old Commodity,No
1,10,Carpet,Old Commodity,Yes
2,20,Sheet Vinyl,Old Commodity,Yes
3,30,Product Care Supplies,Old Commodity,No
4,40,Vinyl Tile,Old Commodity,Yes


In [37]:
# Convert 'Commodity Group' to string and create a new column 'COMM 1'
commodity_df['COMM 1'] = commodity_df['Commodity Group'].astype(str)

In [38]:
# Convert 'Commodity Group' to string in the main DataFrame
df['COMM 1'] = df['COMM 1'].astype(str)

In [39]:
# Perform the join on the 'COMM 1' column
merged_df = df.merge(commodity_df, on='COMM 1', how='left')

# Display the first few rows of the merged DataFrame
merged_df.head()

Unnamed: 0,SITE,SITE DESCRIPTION,SUPPLIER NO,SUPPLIER NAME,INVOICE ID,INVOICE NO,DATE POSTED,PROJECT ID,PROJECT NAME,ACCOUNT,...,PRODUCT_ORDER_COUNT,PRODUCT_ORDER_RANK,UOM_COMPARISON_STATUS,PO_INVOICE_COMPOSITE_KEY,PO_COMPOSITE_KEY_CONSISTENCY,QTY_CONSISTENCY_FLAG,Commodity Group,Description,Old/New,Priority
0,BNB,Beckers New Brighton,102548,Lonseal Flooring,433731,0007795-CM,03-Jan-24,2311121922,REGIONS HOSPITAL 4TH MRI,5400,...,,,Missing INV UOM,nan|433731|0007795-CM,,False,,,,
1,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,1.0,129.0,Match,48180|433340|173373,Inconsistent,True,1ACC,Accessories,New Commodity,No
2,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,1.0,129.0,Match,48180|433340|173373,Inconsistent,True,1TRAN,Transitions,New Commodity,No
3,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,5504,...,,,Missing INV UOM,48180|433340|173373,Inconsistent,False,,,,
4,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2015,...,,,Missing INV UOM,48180|433340|173373,Inconsistent,False,,,,


In [40]:
# === Load Manual Lines ===
# Load the manual lines from the Excel file
manual_lines_df = pd.read_excel('../data/input/Manual Lines.xlsx',sheet_name='Sheet0')
manual_lines_df.head()

Unnamed: 0,Supplier Id,Supplier Name,Po Ref Number,Status,Invoice Series,Invoice No,Invoice Date,Voucher Type,Voucher No,Voucher Date,...,Project Desc,Site Code,Site Code Desc,Fixed Asset,Fixed Asset Desc,Brand Part,Brand Part Desc,Project Activity,Line Ref,Posting Line Amount
0,104323,Nydree Flooring,168676,Paid Posted,SX,0125342-IN,2024-12-17,I,2025001129,2025-01-02,...,Amli Atlantic Station Public Area,SPN,Spectra Norcross,,,SPC,Spectra,100044728.0,,81.31
1,107786,Dal Tile Corporation,130449,Paid Posted,SX,0143516889,2024-08-15,I,2025009505,2025-01-23,...,North Chase 300,FSC,Floor Sol Charleston,,,FLS,Flooring Solutions,100155932.0,,10.45
2,107786,Dal Tile Corporation,130449,Paid Posted,SX,0143516889,2024-08-15,I,2025009505,2025-01-23,...,North Chase 300,FSC,Floor Sol Charleston,,,FLS,Flooring Solutions,100155932.0,,25.98
3,107786,Dal Tile Corporation,130449,Paid Posted,SX,0143516889,2024-08-15,I,2025009505,2025-01-23,...,North Chase 300,FSC,Floor Sol Charleston,,,FLS,Flooring Solutions,100155932.0,,24.19
4,126695,Tarkett USA Inc,107284,Paid Posted,SI,8201991030,2024-07-17,I,2025003166,2025-01-08,...,LCPS Loudoun County High School,CCSG,Contract Carpet Sol Government,,,CCSG,Contract Carpet Systems Government,100147054.0,,6.31


In [41]:
# Convert 'Po Ref Number' to string and create a new column 'PO NO'
manual_lines_df['PO NO'] = manual_lines_df['Po Ref Number'].astype(str)

In [42]:
# Convert 'PO NO' to string in the main DataFrame
merged_df['PO NO'] = merged_df['PO NO'].astype(str)

In [43]:
# Create a new column to indicate if the PO NO is manual
merged_df['IS_MANUAL'] = merged_df['PO NO'].isin(manual_lines_df['PO NO'])

In [44]:
# Group by 'PO NO' and check if all rows in each group have 'Priority' == 'Yes'
merged_df['ALL_PRIORITY'] = merged_df.groupby('PO NO')['Priority'].transform(lambda x: (x == 'Yes').any())

# Flag the PO NOs where all rows have 'Priority' == 'Yes'
merged_df['PRIORITY_PO_FLAG'] = merged_df['ALL_PRIORITY']

In [45]:
# Filter the DataFrame for rows where PO_HAS_PROJECT_FREIGHT is True
filtered_df = merged_df[merged_df['PRIORITY_PO_FLAG'] == True]

# Write the filtered DataFrame to a CSV file
output_path_filtered = "../data/output/Filtered_PO_Flag_True.csv"
filtered_df.to_csv(output_path_filtered, index=False)
print(f"\n✅ Filtered dataset exported to: {output_path_filtered}")


✅ Filtered dataset exported to: ../data/output/Filtered_PO_Flag_True.csv


In [46]:
important_columns = [
'SITE',
'SITE DESCRIPTION',
'SUPPLIER NO',
'SUPPLIER NAME',
'PO NO',
'ACCOUNT',
'ACCOUNT DESCRIPTION',
'SHIP TO ZIP',
'PART NO', 
'PART DESCRIPTION',
'PO PURCH QTY',
'PO INV QTY', 
'INVOICED LINE QTY',
'PURCH UOM', 
'INV UOM', 
'INVOICE LINE TOTAL', 
'PO PRICE', 
'TOTAL_PO_VALUE',
'PO_HAS_PROJECT_FREIGHT',
'PROJECT_FREIGHT_COST', 
'PROJECT_FREIGHT_PERCENT',
'PRODUCT_ONLY_PO_VALUE', 
'FREIGHT_≥90%_OF_PO',
'FREIGHT_GT_PRODUCT', 
'NEGATIVE_FREIGHT_PERCENT',
'PO_HAS_MULTIPLE_PROJECT_FREIGHT_LINES',
'COMM 1',
'Commodity Group', 
'Description', 
'Old/New', 
'Priority',
'IS_MANUAL',
'PRIORITY_PO_FLAG',
'QTY_CONSISTENCY_FLAG',
'UOM_COMPARISON_STATUS'

]

In [47]:
# Filter the DataFrame to include only the important columns
df_filtered = merged_df[important_columns]
df_filtered.head(2)

Unnamed: 0,SITE,SITE DESCRIPTION,SUPPLIER NO,SUPPLIER NAME,PO NO,ACCOUNT,ACCOUNT DESCRIPTION,SHIP TO ZIP,PART NO,PART DESCRIPTION,...,PO_HAS_MULTIPLE_PROJECT_FREIGHT_LINES,COMM 1,Commodity Group,Description,Old/New,Priority,IS_MANUAL,PRIORITY_PO_FLAG,QTY_CONSISTENCY_FLAG,UOM_COMPARISON_STATUS
0,BNB,Beckers New Brighton,102548,Lonseal Flooring,,5400,PROJECT Sub-Contract Labor,,,,...,False,,,,,,True,False,False,Missing INV UOM
1,BNB,Beckers New Brighton,104716,Hank's Specialties,48180.0,2008,Received Not Yet Invoiced,55430.0,1000007968.0,PROTECT ALL PRE-NOTCHED Z-BAR INSIDE CORNER AL...,...,False,1ACC,1ACC,Accessories,New Commodity,No,False,True,True,Match


In [48]:
# Filter for rows where ACCOUNT is 2008
account_2008_df = merged_df[merged_df['ACCOUNT'] == 2008]

# Group by 'PO NO' and check if all rows in each group have 'Priority' == 'Yes'
priority_2008_flag = df_filtered[df_filtered['ACCOUNT'] == 2008].groupby('PO NO')['Priority'].apply(lambda x: (x == 'Yes').all()).reset_index(name='ALL_PRIORITY_2008')

# Merge the result back to the filtered DataFrame
df_filtered = df_filtered.merge(priority_2008_flag, on='PO NO', how='left')

# Fill NaN values with False for the new column
df_filtered['ALL_PRIORITY_2008'] = df_filtered['ALL_PRIORITY_2008'].fillna(False)

# Flag the PO NOs where all rows with ACCOUNT = 2008 have 'Priority' == 'Yes'
df_filtered['PRIORITY_PO_FLAG_2008'] = df_filtered['ALL_PRIORITY_2008']

  df_filtered['ALL_PRIORITY_2008'] = df_filtered['ALL_PRIORITY_2008'].fillna(False)


In [None]:
# === Filter for specific sites ===
# Filter the DataFrame for specific sites
df_sites = df_filtered[df_filtered['SITE'].isin(['SPJ','SPW','SPT'])]
df_sites.shape
df.head(2)

Unnamed: 0,SITE,SITE DESCRIPTION,SUPPLIER NO,SUPPLIER NAME,INVOICE ID,INVOICE NO,DATE POSTED,PROJECT ID,PROJECT NAME,ACCOUNT,...,PO_HAS_MULTIPLE_PROJECT_FREIGHT_LINES,HIGH_FREIGHT_FLAG,PART DESCRIPTION CLEAN,IS_LABOR,PRODUCT_ORDER_COUNT,PRODUCT_ORDER_RANK,UOM_COMPARISON_STATUS,PO_INVOICE_COMPOSITE_KEY,PO_COMPOSITE_KEY_CONSISTENCY,QTY_CONSISTENCY_FLAG
0,BNB,Beckers New Brighton,102548,Lonseal Flooring,433731,0007795-CM,03-Jan-24,2311121922,REGIONS HOSPITAL 4TH MRI,5400,...,False,,NAN,False,,,Missing INV UOM,nan|433731|0007795-CM,,False
1,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,False,False,PROTECT ALL PRE-NOTCHED Z-BAR INSIDE CORNER AL...,False,1.0,129.0,Match,48180|433340|173373,Inconsistent,True
2,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,False,False,PROTECT ALL Z BAR DARK GRAY VINYL COVE CAP 8' ...,False,1.0,129.0,Match,48180|433340|173373,Inconsistent,True
3,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,5504,...,False,,NAN,False,,,Missing INV UOM,48180|433340|173373,Inconsistent,False
4,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,03-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2015,...,False,,NAN,False,,,Missing INV UOM,48180|433340|173373,Inconsistent,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
441702,WFS,,107162,Cintas Corporation No.2 dba Cintas First Aid &...,1731885,2000136371,13-Mar-25,,,7913,...,False,,NAN,False,,,Missing INV UOM,nan|1731885|2000136371,,False
441703,WFS,,107162,Cintas Corporation No.2 dba Cintas First Aid &...,1731996,9287014199,13-Mar-25,,,7913,...,False,,NAN,False,,,Missing INV UOM,nan|1731996|9287014199,,False
441704,WFS,,107162,Cintas Corporation No.2 dba Cintas First Aid &...,1732937,9306917035,13-Mar-25,,,7913,...,False,,NAN,False,,,Missing INV UOM,nan|1732937|9306917035,,False
441705,WFS,,107162,Cintas Corporation No.2 dba Cintas First Aid &...,1736749,9306915283,17-Mar-25,,,7913,...,False,,NAN,False,,,Missing INV UOM,nan|1736749|9306915283,,False


In [50]:
# Export enriched dataset
output_path = "../data/output/Freight_Analysis_Enriched_all_sites_Any_v4.csv"
df_filtered.to_csv(output_path, index=False)
print(f"\n✅ Enriched dataset exported to: {output_path}")


✅ Enriched dataset exported to: ../data/output/Freight_Analysis_Enriched_all_sites_Any_v4.csv


In [None]:
# Export enriched dataset
output_path = "../data/output/Freight_Analysis_Enriched_Sample_Sites_Any_v4.csv"
df_sites.to_csv(output_path, index=False)
print(f"\n✅ Enriched dataset exported to: {output_path}")


✅ Enriched dataset exported to: ../data/output/Freight_Analysis_Enriched_Sample_Sites_Any_v4.csv
