In [8]:
# Step 1: Filter and Clean Invoice Data
import pandas as pd

# Load the invoice input data
invoice_path = "invoice_input_data.xlsx"  # Update path if needed
invoice_df = pd.read_excel(invoice_path)

# Keep only rows with valid freight class and historical rate
invoice_filtered = invoice_df[
    invoice_df["freight_class"].notna() &
    invoice_df["historical_rate"].notna()
][[
    "site", 
    "rate_unit", 
    "new_commodity_group", 
    "invoice_commodity_description",
    "freight_class", 
    "historical_rate"
]].copy()

# Preview the result
print("Filtered Invoice Data (Step 1):")
invoice_filtered.head()


# Also include xgs_rate
invoice_filtered = invoice_df[
    invoice_df["freight_class"].notna() &
    invoice_df["historical_rate"].notna() &
    invoice_df["xgs_rate"].notna()
][[
    "site", 
    "rate_unit", 
    "new_commodity_group", 
    "invoice_commodity_description",
    "freight_class", 
    "historical_rate",
    "xgs_rate"
]].copy()


Filtered Invoice Data (Step 1):


Unnamed: 0,site,rate_unit,new_commodity_group,invoice_commodity_description,freight_class,historical_rate
0,DIT,CWT,1VNL,LVT,1M,0.044483
1,DIT,SQYD,1CPT,Carpet Tiles,L5C,1.027585
2,DIT,CWT,1VNL,LVT,L5C,0.227596
3,DIT,SQYD,1CPT,Carpet Tiles,L5C,0.24335
4,DIT,CWT,1VNL,LVP,3M,0.043875


In [None]:
# Step 2: Pivot Invoice Data to Freight Class Columns

# Define standard freight classes in reporting template
freight_class_columns = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

# Pivot: average historical_rate by site/unit/commodity and freight_class
pivot_df = invoice_filtered.pivot_table(
    index=["site", "rate_unit", "new_commodity_group", "invoice_commodity_description"],
    columns="freight_class",
    values="historical_rate",
    aggfunc="mean"
).reset_index()

# Ensure all required freight class columns are present
for col in freight_class_columns:
    if col not in pivot_df.columns:
        pivot_df[col] = None  # Add missing class columns as blank

# Reorder columns
pivot_df = pivot_df[["site", "rate_unit", "new_commodity_group", "invoice_commodity_description"] + freight_class_columns]

# Preview the result
print("Pivoted Invoice Summary (Step 2):")
pivot_df.head()


In [None]:
# Step 3: Add and Rename Columns to Match Vendor Template

# Rename columns to match template
pivot_df.rename(columns={
    "rate_unit": "unit",
    "new_commodity_group": "commodity_group",
    "invoice_commodity_description": "commodity_description"
}, inplace=True)

# Add missing template columns
pivot_df["site_description"] = "Itasca"  # or dynamically map if needed
pivot_df["unitclass"] = pivot_df["unit"].apply(lambda x: "Weight" if x == "CWT" else "Area")

# Reorder to match vendor column layout
ordered_cols = [
    "site_description", "site", "unit", "unitclass", "commodity_group", "commodity_description"
] + ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

pivot_df = pivot_df[ordered_cols]

# Preview the result
print("Formatted Invoice Summary (Step 3):")
pivot_df.head()


Formatted Invoice Summary (Step 3):


freight_class,site_description,site,unit,unitclass,commodity_group,commodity_description,L5C,5C,1M,2M,3M,5M,10M,20M,30M,40M
0,Itasca,DIT,CWT,Weight,1VNL,LVP,0.554113,0.261966,0.24507,0.112344,0.171638,0.110898,,,,
1,Itasca,DIT,CWT,Weight,1VNL,LVT,0.758267,0.186397,0.275416,0.102616,0.174844,0.08442,,0.0653,0.047238,0.061794
2,Itasca,DIT,CWT,Weight,1VNL,VCT,0.734376,0.299858,,0.023383,0.274947,,0.190925,,,
3,Itasca,DIT,SQYD,Area,1CBL,Carpet Roll,2.510639,1.021696,0.830599,2.816536,0.482738,0.165966,0.159801,,,
4,Itasca,DIT,SQYD,Area,1CPT,Carpet Tiles,2.431266,8.161047,1.483412,4.140805,,0.379897,,,,


In [None]:

# Step 2.5: Pivot XGS Rate Data

# Pivot: average xgs_rate by site/unit/commodity and freight_class
pivot_xgs = invoice_filtered.pivot_table(
    index=["site", "rate_unit", "new_commodity_group", "invoice_commodity_description"],
    columns="freight_class",
    values="xgs_rate",
    aggfunc="mean"
).reset_index()

# Ensure all required freight class columns are present
for col in freight_class_columns:
    if col not in pivot_xgs.columns:
        pivot_xgs[col] = None  # Add missing class columns as blank

# Rename columns to distinguish from historical
pivot_xgs = pivot_xgs.rename(columns={col: f"xgs_{col}" for col in freight_class_columns})


In [None]:

# Step 3: Add and Rename Columns to Match Vendor Template

# Merge xgs pivot into historical pivot
pivot_df = pd.merge(
    pivot_df,
    pivot_xgs,
    on=["site", "rate_unit", "new_commodity_group", "invoice_commodity_description"],
    how="left"
)

# Rename columns to match template
pivot_df.rename(columns={
    "rate_unit": "unit",
    "new_commodity_group": "commodity_group",
    "invoice_commodity_description": "commodity_description"
}, inplace=True)

# Add missing template columns
pivot_df["site_description"] = "Itasca"
pivot_df["unitclass"] = pivot_df["unit"].apply(lambda x: "Weight" if x == "CWT" else "Area")

# Reorder to match vendor column layout for historical
ordered_cols = [
    "site_description", "site", "unit", "unitclass", "commodity_group", "commodity_description"
] + freight_class_columns + [f"xgs_{col}" for col in freight_class_columns]

pivot_df = pivot_df[ordered_cols]


In [None]:
# Step 4: Add Source Column and Append to Vendor Data

# Add source column to invoice data
pivot_df["source"] = "invoice"

# Load vendor data
vendor_path = "freight_rates_operating_multi_reporting.csv"  # Update if needed
vendor_df = pd.read_csv(vendor_path)

# Add source column to vendor rows
vendor_df["source"] = "vendor"

# Ensure both DataFrames have the same column order
final_cols = vendor_df.columns.tolist()
pivot_df = pivot_df[final_cols]  # reorder invoice data columns to match

# Combine the tables
combined_df = pd.concat([vendor_df, pivot_df], ignore_index=True)

# Preview the result
print("Appended Final Table (Step 4):")
print(combined_df.tail())


# Also normalize xgs_ freight class rates for CWT
xgs_cols = [f"xgs_{col}" for col in freight_class_cols]
combined_df.loc[vendor_cwt_mask, xgs_cols] = combined_df.loc[vendor_cwt_mask, xgs_cols] / 100


Appended Final Table (Step 4):
   site_description site  unit unitclass commodity_group  \
6            Itasca  DIT   CWT    Weight            1VNL   
7            Itasca  DIT   CWT    Weight            1VNL   
8            Itasca  DIT  SQYD      Area            1CBL   
9            Itasca  DIT  SQYD      Area            1CPT   
10           Itasca  DIT  SQYD      Area            1CPT   

   commodity_description       L5C        5C        1M        2M        3M  \
6                    LVT  0.758267  0.186397  0.275416  0.102616  0.174844   
7                    VCT  0.734376  0.299858       NaN  0.023383  0.274947   
8            Carpet Roll  2.510639  1.021696  0.830599  2.816536  0.482738   
9           Carpet Tiles  2.431266  8.161047  1.483412  4.140805       NaN   
10          carpet tiles  3.008201  1.211902       NaN       NaN       NaN   

          5M       10M     20M       30M       40M   source  
6   0.084420       NaN  0.0653  0.047238  0.061794  invoice  
7        NaN  0

In [None]:
# Step 6: Normalize Vendor Rates from $/CWT to $/LBS

# Identify rows where unit is CWT (used for 1VNL)
vendor_cwt_mask = (combined_df["source"] == "vendor") & (combined_df["unit"] == "CWT")

# List of freight class columns to scale
freight_class_cols = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

# Convert vendor rates from $/CWT to $/LBS
combined_df.loc[vendor_cwt_mask, freight_class_cols] = combined_df.loc[vendor_cwt_mask, freight_class_cols] / 100

print("✅ Converted vendor CWT rates to $/LBS for comparability.")


In [None]:
# Step 5: Export Final Combined Table to Excel or CSV

# Define export paths
excel_output_path = "combined_freight_rate_report.xlsx"
csv_output_path = "combined_freight_rate_report.csv"

# Save to Excel
combined_df.to_excel(excel_output_path, index=False)

# Optionally save to CSV
combined_df.to_csv(csv_output_path, index=False)

print(f"✅ Exported successfully to:\n- {excel_output_path}\n- {csv_output_path}")


✅ Exported successfully to:
- combined_freight_rate_report.xlsx
- combined_freight_rate_report.csv
