In [3]:
# !pip install XlsxWriter
import xlsxwriter

In [2]:
# Step 1: Filter and Clean Invoice Data
import pandas as pd


# 🔧 Configure which sites to process
selected_sites = ["DIT", "SPN", "SPCP","SPW","SPT","SPHU","SPTM","PVF","SPJ","CCS","SPB","SPL","SPLV","CCSG","SPCB","SPWV","FSU","SPK","SPLA","SPD","SPTG","KFC"]  # Example: update these as needed

# Load the invoice input data
invoice_path = "invoice_input_data_all.xlsx"  # Update path if needed
invoice_df = pd.read_excel(invoice_path)
invoice_df = invoice_df[invoice_df['rate_ratio_normal_outlier'] == 'OK' ]
print(invoice_df.shape)
invoice_GA_df = invoice_df[invoice_df['model'] == True]
# invoice_df[invoice_df["rate_ratio_normal_outlier"]!= 'Missing']
print(invoice_GA_df.shape)

invoice_GA_df = invoice_GA_df[[
    "invoice_id", "site",'invoice_commodity_quantity', "invoice_commodity_group", "invoice_commodity_description",
    "location", "model", "unit", "rate_unit", "freight_class", "applied_rate",
    "shipment_type", "realistic_optimal_method", "xgs_rate", "historical_rate"
]]

invoice_GA_df["invoice_commodity_description"] = invoice_GA_df["invoice_commodity_description"].apply(
    lambda x: x.title() if str(x).strip().lower() == "carpet tiles" else x
)

# Filter input invoices to selected sites
invoice_GA_df = invoice_GA_df[invoice_GA_df["site"].isin(selected_sites)]


(11999, 61)
(9311, 61)


In [3]:
# Load the invoice input data
invoice_path = "invoice_input_data_all.xlsx"  # Update path if needed
invoice_df = pd.read_excel(invoice_path)
print(invoice_df.shape)
# invoice_df = invoice_df[invoice_df['model'] == True]
invoice_all_df = invoice_df[invoice_df["rate_ratio_normal_outlier"]!= 'MISSING']
print(invoice_all_df.shape)

invoice_all_df = invoice_all_df[[
    "invoice_id", "site",'invoice_commodity_quantity', "invoice_commodity_group", "invoice_commodity_description",
    "location", "model", "unit", "rate_unit", "freight_class", "applied_rate",
    "shipment_type", "realistic_optimal_method", "xgs_rate", "historical_rate"
]]

invoice_all_df["invoice_commodity_description"] = invoice_all_df["invoice_commodity_description"].apply(
    lambda x: x.title() if str(x).strip().lower() == "carpet tiles" else x
)

# Filter input invoices to selected sites
invoice_all_df = invoice_all_df[invoice_all_df["site"].isin(selected_sites)]

(17846, 61)
(13813, 61)


In [4]:
# Compute weighted average, 50th percentile (median), and 75th percentile of historical_rate by invoice_commodity_group for all sites using invoice_all_df

def weighted_avg(series, weights):
    return (series * weights).sum() / weights.sum()

wavg_hist_all_sites = (
    invoice_all_df
    .groupby(["site", "invoice_commodity_group"])
    .apply(lambda df: pd.Series({
        "hist_weighted_avg_all": weighted_avg(df["historical_rate"], df["invoice_commodity_quantity"]),
        "hist_median_all": df["historical_rate"].median(),
        "hist_75th_all": df["historical_rate"].quantile(0.75),
    }))
    .reset_index()
)

wavg_hist_all_sites
# Convert agg_rates (which is a DataFrame already) to a standard DataFrame if needed
wavg_hist_all_sites = pd.DataFrame(wavg_hist_all_sites)
wavg_hist_all_sites.reset_index(drop=True, inplace=True)
wavg_hist_all_sites.head()

  .apply(lambda df: pd.Series({


Unnamed: 0,site,invoice_commodity_group,hist_weighted_avg_all,hist_median_all,hist_75th_all
0,CCS,1CBL,0.996795,1.052129,1.332963
1,CCS,1CPT,1.350164,1.420594,1.845
2,CCS,1VNL,0.133302,0.163569,0.292137
3,CCSG,1CBL,1.258827,1.208769,1.645385
4,CCSG,1CPT,1.353927,1.447031,2.785434


In [5]:
# Compute weighted average, 50th percentile (median), and 75th percentile of historical_rate and xgs_rate by invoice_commodity_group for all sites

def weighted_avg(series, weights):
    return (series * weights).sum() / weights.sum()

agg_rates = (
    invoice_GA_df
    .groupby(["site", "invoice_commodity_group"])
    .apply(lambda df: pd.Series({
        "GA_weighted_avg": weighted_avg(df["historical_rate"], df["invoice_commodity_quantity"]),
        "GA_median": df["historical_rate"].median(),
        "GA_75th": df["historical_rate"].quantile(0.75),
        "xgs_weighted_avg": weighted_avg(df["xgs_rate"], df["invoice_commodity_quantity"]),
        "xgs_median": df["xgs_rate"].median(),
        "xgs_75th": df["xgs_rate"].quantile(0.75),
    }))
    .reset_index()
)
agg_rates

# Convert agg_rates (which is a DataFrame already) to a standard DataFrame if needed
agg_rates = pd.DataFrame(agg_rates)
agg_rates.reset_index(drop=True, inplace=True)
agg_rates.head()

  .apply(lambda df: pd.Series({


Unnamed: 0,site,invoice_commodity_group,GA_weighted_avg,GA_median,GA_75th,xgs_weighted_avg,xgs_median,xgs_75th
0,CCS,1CBL,1.051899,1.06116,1.318981,0.612178,0.561604,1.355208
1,CCS,1CPT,1.297253,1.416836,1.573031,0.895125,0.936674,1.254483
2,CCS,1VNL,0.155539,0.166764,0.264433,0.130259,0.192452,0.266866
3,CCSG,1CBL,1.179662,1.114443,1.359055,0.777104,1.110266,1.280691
4,CCSG,1CPT,1.346094,1.447031,2.740836,0.702924,0.936669,1.280691


In [6]:
# Merge agg_rates and wavg_hist_all_sites on 'site' and 'invoice_commodity_group'
merged_rates = pd.merge(
    agg_rates,
    wavg_hist_all_sites,
    on=["site", "invoice_commodity_group"],
    how="left"
)

merged_rates.head()

Unnamed: 0,site,invoice_commodity_group,GA_weighted_avg,GA_median,GA_75th,xgs_weighted_avg,xgs_median,xgs_75th,hist_weighted_avg_all,hist_median_all,hist_75th_all
0,CCS,1CBL,1.051899,1.06116,1.318981,0.612178,0.561604,1.355208,0.996795,1.052129,1.332963
1,CCS,1CPT,1.297253,1.416836,1.573031,0.895125,0.936674,1.254483,1.350164,1.420594,1.845
2,CCS,1VNL,0.155539,0.166764,0.264433,0.130259,0.192452,0.266866,0.133302,0.163569,0.292137
3,CCSG,1CBL,1.179662,1.114443,1.359055,0.777104,1.110266,1.280691,1.258827,1.208769,1.645385
4,CCSG,1CPT,1.346094,1.447031,2.740836,0.702924,0.936669,1.280691,1.353927,1.447031,2.785434


In [7]:
# Create recommended columns by taking the max between GA and historical, then multiplying by 1.06
merged_rates["Recommended_weighted_avg"] = merged_rates[["GA_weighted_avg", "hist_weighted_avg_all"]].max(axis=1) * 1.06
merged_rates["Recommended_median"] = merged_rates[["GA_median", "hist_median_all"]].max(axis=1) * 1.06
merged_rates["Recommended_75th"] = merged_rates[["GA_75th", "hist_75th_all"]].max(axis=1) * 1.06

merged_rates[["site", "invoice_commodity_group", "Recommended_weighted_avg", "Recommended_median", "Recommended_75th"]].head()

Unnamed: 0,site,invoice_commodity_group,Recommended_weighted_avg,Recommended_median,Recommended_75th
0,CCS,1CBL,1.115013,1.124829,1.412941
1,CCS,1CPT,1.431174,1.505829,1.9557
2,CCS,1VNL,0.164871,0.17677,0.309666
3,CCSG,1CBL,1.334357,1.281295,1.744108
4,CCSG,1CPT,1.435162,1.533853,2.95256


In [8]:
merged_rates.head()


Unnamed: 0,site,invoice_commodity_group,GA_weighted_avg,GA_median,GA_75th,xgs_weighted_avg,xgs_median,xgs_75th,hist_weighted_avg_all,hist_median_all,hist_75th_all,Recommended_weighted_avg,Recommended_median,Recommended_75th
0,CCS,1CBL,1.051899,1.06116,1.318981,0.612178,0.561604,1.355208,0.996795,1.052129,1.332963,1.115013,1.124829,1.412941
1,CCS,1CPT,1.297253,1.416836,1.573031,0.895125,0.936674,1.254483,1.350164,1.420594,1.845,1.431174,1.505829,1.9557
2,CCS,1VNL,0.155539,0.166764,0.264433,0.130259,0.192452,0.266866,0.133302,0.163569,0.292137,0.164871,0.17677,0.309666
3,CCSG,1CBL,1.179662,1.114443,1.359055,0.777104,1.110266,1.280691,1.258827,1.208769,1.645385,1.334357,1.281295,1.744108
4,CCSG,1CPT,1.346094,1.447031,2.740836,0.702924,0.936669,1.280691,1.353927,1.447031,2.785434,1.435162,1.533853,2.95256


In [9]:
# Export merged_rates to CSV
merged_rates.to_csv("merged_rates_output_V2.csv", index=False)
print("✅ merged_rates exported to merged_rates_output.csv")

✅ merged_rates exported to merged_rates_output.csv


In [12]:
invoice_df['invoice_commodity_description'].unique()

array(['VCT', 'Carpet Tiles', 'LVT', 'LVP', 'carpet tiles', 'Carpet Roll'],
      dtype=object)

In [13]:
# Step 2: Pivot Both Arithmetic and Weighted Averages

from numpy import average

freight_class_columns = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

# First, filter again just to be safe
invoice_filtered = invoice_df[
    invoice_df["freight_class"].notna() &
    invoice_df["historical_rate"].notna() &
    invoice_df["xgs_rate"].notna() &
    invoice_df["invoice_commodity_quantity"].notna()
][[
    "site", 
    "rate_unit", 
    "invoice_commodity_group", 
    "invoice_commodity_description",
    "freight_class", 
    "historical_rate",
    "xgs_rate",
    "invoice_commodity_quantity"
]].copy()

# Step 2: Compute arithmetic and weighted averages

group_cols = ["site", "rate_unit", "invoice_commodity_group", "invoice_commodity_description", "freight_class"]

# Group invoice data
grouped = invoice_filtered.groupby(group_cols)

# Step 2A: Arithmetic averages using .agg()
summary_avg = grouped.agg(
    hist_avg=("historical_rate", "mean"),
    xgs_avg=("xgs_rate", "mean"),
    xgs_q2 = ("xgs_rate", lambda x: x.quantile(0.2)),
    xgs_q3 = ("xgs_rate", lambda x: x.quantile(0.75)),
)

# Step 2B: Weighted averages using .apply()
def compute_wavg(grp):
    return pd.Series({
        "hist_wavg": average(grp["historical_rate"], weights=grp["invoice_commodity_quantity"]),
        "xgs_wavg": average(grp["xgs_rate"], weights=grp["invoice_commodity_quantity"])
    })

summary_wavg = grouped.apply(compute_wavg)

# Step 2C: Combine both summaries
summary = pd.concat([summary_avg, summary_wavg], axis=1).reset_index()

# Preview
print("✅ Summary with arithmetic and weighted averages:")
summary.head()

✅ Summary with arithmetic and weighted averages:


  summary_wavg = grouped.apply(compute_wavg)


Unnamed: 0,site,rate_unit,invoice_commodity_group,invoice_commodity_description,freight_class,hist_avg,xgs_avg,xgs_q2,xgs_q3,hist_wavg,xgs_wavg
0,BSC,CWT,1VNL,LVP,L5C,0.316676,0.283196,0.283196,0.283196,0.316676,0.283196
1,BSC,CWT,1VNL,LVT,2M,0.036611,0.16696,0.166959,0.16696,0.03614,0.16696
2,BSC,CWT,1VNL,LVT,40M,0.001607,0.079963,0.079963,0.079963,0.001607,0.079963
3,BSC,CWT,1VNL,LVT,5C,0.175277,0.250845,0.250845,0.250845,0.175277,0.250845
4,BSC,CWT,1VNL,LVT,L5C,0.3756,0.453594,0.408236,0.491392,0.365167,0.440995


In [14]:
summary['invoice_commodity_description'].unique()

array(['LVP', 'LVT', 'VCT', 'Carpet Roll', 'Carpet Tiles', 'carpet tiles'],
      dtype=object)

In [15]:
# Step 4: Create block tables for each metric (no column prefixes)

index_cols = ["site", "rate_unit", "invoice_commodity_group", "invoice_commodity_description"]
freight_classes = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

def safe_pivot(metric_col, source_name):
    pivoted = summary.pivot(index=index_cols, columns="freight_class", values=metric_col).reset_index()
    
    # Ensure all freight class columns are present
    for fc in freight_classes:
        if fc not in pivoted.columns:
            pivoted[fc] = None

    # Add required template columns
    pivoted.rename(columns={
        "rate_unit": "unit",
        "invoice_commodity_group": "commodity_group",
        "invoice_commodity_description": "commodity_description"
    }, inplace=True)
    pivoted["site_description"] = "Itasca"
    pivoted["unitclass"] = pivoted["unit"].apply(lambda x: "Weight" if x == "CWT" else "Area")
    pivoted["source"] = source_name

    # Reorder
    ordered_cols = ["site_description", "site", "unit", "unitclass", "commodity_group", "commodity_description"] + freight_classes + ["source"]
    return pivoted[ordered_cols]

# Generate four blocks
hist_avg_block = safe_pivot("hist_avg", "hist_avg")
xgs_avg_block = safe_pivot("xgs_avg", "xgs_avg")
hist_wavg_block = safe_pivot("hist_wavg", "hist_wavg")
xgs_wavg_block = safe_pivot("xgs_wavg", "xgs_wavg")

# Optionally combine all
combined_output = pd.concat([hist_avg_block, xgs_avg_block, hist_wavg_block, xgs_wavg_block], ignore_index=True)

# Sort for visual clarity
combined_output = combined_output.sort_values(by=["commodity_group", "commodity_description", "site", "unit", "source"]).reset_index(drop=True)

# Preview
print("✅ Combined pivot output (clean format):")
combined_output.head()


✅ Combined pivot output (clean format):


freight_class,site_description,site,unit,unitclass,commodity_group,commodity_description,L5C,5C,1M,2M,3M,5M,10M,20M,30M,40M,source
0,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,1.026,,,,,,,,,,hist_avg
1,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,1.026,,,,,,,,,,hist_wavg
2,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,0.808556,,,,,,,,,,xgs_avg
3,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,0.808556,,,,,,,,,,xgs_wavg
4,Itasca,CCS,SQYD,Area,1CBL,Carpet Roll,1.423281,0.857188,,1.052129,,,,,,,hist_avg


In [16]:
# Step 3: Ensure All Required Columns in Combined Output

freight_classes = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

# Ensure all freight class columns exist in the output
for col in freight_classes:
    if col not in combined_output.columns:
        combined_output[col] = None

# Ensure proper column order
ordered_cols = [
    "site_description", "site", "unit", "unitclass", "commodity_group", "commodity_description"
] + freight_classes + ["source"]

combined_output = combined_output[ordered_cols]

# Preview the cleaned, structured result
print("✅ Final Structured Invoice Summary (Step 3):")
combined_output.head()


✅ Final Structured Invoice Summary (Step 3):


freight_class,site_description,site,unit,unitclass,commodity_group,commodity_description,L5C,5C,1M,2M,3M,5M,10M,20M,30M,40M,source
0,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,1.026,,,,,,,,,,hist_avg
1,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,1.026,,,,,,,,,,hist_wavg
2,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,0.808556,,,,,,,,,,xgs_avg
3,Itasca,BSC,SQYD,Area,1CBL,Carpet Roll,0.808556,,,,,,,,,,xgs_wavg
4,Itasca,CCS,SQYD,Area,1CBL,Carpet Roll,1.423281,0.857188,,1.052129,,,,,,,hist_avg


In [None]:
# Step 4: Add Source Column and Append to Vendor Data

# Load vendor data
vendor_path = "freight_rates_operating_multi_reporting_all.csv"  # Update path if needed
vendor_df = pd.read_csv(vendor_path)

# Filter vendor freight rates to selected sites
vendor_df = vendor_df[vendor_df["site"].isin(selected_sites)]


# Add source tag
vendor_df["source"] = "vendor"

# Ensure all required freight class columns exist in vendor_df
freight_classes = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']
for col in freight_classes:
    if col not in vendor_df.columns:
        vendor_df[col] = None

# Ensure consistent column ordering
final_cols = [
    "site_description", "site", "unit", "unitclass", "commodity_group", "commodity_description"
] + freight_classes + ["source"]

vendor_df = vendor_df[final_cols]
combined_output = combined_output[final_cols]  # Already structured in prior step

# Append invoice summary blocks to vendor table
combined_df = pd.concat([vendor_df, combined_output], ignore_index=True)

# Preview the result
print("✅ Appended Final Table (Step 4):")
combined_df.tail()


FileNotFoundError: [Errno 2] No such file or directory: 'freight_rates_operating_multi_reporting_all.csv'

: 

In [None]:
# Step 6: Normalize Vendor Rates from $/CWT to $/LBS

# Identify rows where unit is CWT (used for 1VNL)
vendor_cwt_mask = (combined_df["source"] == "vendor") & (combined_df["unit"] == "CWT")

# List of freight class columns to scale
freight_class_cols = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

# Convert vendor rates from $/CWT to $/LBS
combined_df.loc[vendor_cwt_mask, freight_class_cols] = combined_df.loc[vendor_cwt_mask, freight_class_cols] / 100

print("✅ Converted vendor CWT rates to $/LBS for comparability.")


✅ Converted vendor CWT rates to $/LBS for comparability.


In [None]:
combined_df['source'].unique()

array(['vendor', 'hist_avg', 'hist_wavg', 'xgs_avg', 'xgs_wavg'],
      dtype=object)

In [None]:
# Step X: Append Variance Rows Between hist_invoice and xgs_invoice

# Columns used to match rows
index_cols = [
    "site_description", "site", "unit", "unitclass",
    "commodity_group", "commodity_description"
]

# Freight class columns to compute variance on
freight_class_cols = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

# Separate historical and xgs rows
hist_df = combined_df[combined_df["source"] == "hist_wavg"]
xgs_df = combined_df[combined_df["source"] == "xgs_wavg"]

# Merge them on the index columns
variance_df = pd.merge(hist_df, xgs_df, on=index_cols, suffixes=("_hist", "_xgs"))

# Compute variance
variance_data = variance_df[index_cols].copy()
for col in freight_class_cols:
    variance_data[col] = variance_df[f"{col}_hist"] - variance_df[f"{col}_xgs"]

# Add source column
variance_data["source"] = "variance"

# Append to combined table
combined_df = pd.concat([combined_df, variance_data], ignore_index=True)

# Optional: sort for clarity
combined_df.sort_values(by=index_cols + ["source"], inplace=True)

# Preview result
print("✅ Variance rows added.")
combined_df.tail()


✅ Variance rows added.


Unnamed: 0,site_description,site,unit,unitclass,commodity_group,commodity_description,L5C,5C,1M,2M,3M,5M,10M,20M,30M,40M,source
16,Tampa,SPT,CWT,Weight,1VNL,LVP,0.2537,0.2313,0.1794,0.1441,0.1441,0.113,0.0942,0.0942,0.0942,0.0942,vendor
15,Tampa,SPT,CWT,Weight,1VNL,LVT,0.2537,0.2313,0.1794,0.1441,0.1441,0.113,0.0942,0.0942,0.0942,0.0942,vendor
17,Tampa,SPT,CWT,Weight,1VNL,VCT,0.2537,0.2313,0.1794,0.1441,0.1441,0.113,0.0942,0.0942,0.0942,0.0942,vendor
18,Tampa,SPT,SQYD,Area,1CBL,Carpet Roll,0.4608,0.4497,0.4424,0.435,0.4166,0.4166,0.4166,0.4166,0.4166,0.4166,vendor
19,Tampa,SPT,SQYD,Area,1CPT,Carpet Tiles,0.7834,0.7646,0.752,0.7394,0.7082,0.7082,0.7082,0.7082,0.7082,0.7082,vendor


In [None]:
combined_df['commodity_description'].unique()

array(['LVP', 'LVT', 'VCT', 'Carpet Roll', 'Carpet Tiles'], dtype=object)

In [None]:
# Define freight classes in correct order
freight_classes = ['L5C', '5C', '1M', '2M', '3M', '5M', '10M', '20M', '30M', '40M']

# Group by site, commodity, and freight class → count unique invoice_ids
invoice_counts = invoice_df.groupby(
    ["site", "invoice_commodity_description", "freight_class"]
)["invoice_id"].nunique().reset_index(name="invoice_count")

# List to collect all site-level matrices
invoice_matrix_list = []

# Loop over sites
for site in invoice_counts["site"].unique():
    site_df = invoice_counts[invoice_counts["site"] == site]

    # Pivot per site
    matrix = site_df.pivot_table(
        index="invoice_commodity_description",
        columns="freight_class",
        values="invoice_count",
        fill_value=0
    )

    # Reindex to ensure all freight classes are present
    matrix = matrix.reindex(columns=freight_classes, fill_value=0)
    matrix = matrix.reset_index()

    # Add required columns
    matrix["site_description"] = "Itasca"  # Adjust dynamically if needed
    matrix["site"] = site
    matrix["unit"] = None
    matrix["unitclass"] = None
    matrix["commodity_group"] = None
    matrix["source"] = "invoice_counts"

    # Reorder to match combined_df structure
    final = matrix[[
        "site_description", "site", "unit", "unitclass",
        "commodity_group", "invoice_commodity_description"
    ] + freight_classes + ["source"]]

    final.rename(columns={"invoice_commodity_description": "commodity_description"}, inplace=True)
    invoice_matrix_list.append(final)

# Concatenate all site-level matrices
invoice_matrix_final = pd.concat(invoice_matrix_list, ignore_index=True)

# ✅ Now append to combined_df
combined_df = pd.concat([combined_df, invoice_matrix_final], ignore_index=True)

combined_df


Unnamed: 0,site_description,site,unit,unitclass,commodity_group,commodity_description,L5C,5C,1M,2M,3M,5M,10M,20M,30M,40M,source
0,Continental Floors,SPCB,CWT,Weight,1VNL,LVP,0.2282,0.1877,0.1411,0.1121,0.1121,0.0795,0.0578,0.0578,0.0578,0.0578,vendor
1,Continental Floors,SPCB,CWT,Weight,1VNL,LVT,0.2282,0.1877,0.1411,0.1121,0.1121,0.0795,0.0578,0.0578,0.0578,0.0578,vendor
2,Continental Floors,SPCB,CWT,Weight,1VNL,VCT,0.2282,0.1877,0.1411,0.1121,0.1121,0.0795,0.0578,0.0578,0.0578,0.0578,vendor
3,Continental Floors,SPCB,SQYD,Area,1CBL,Carpet Roll,0.4305,0.4225,0.4104,0.4024,0.3934,0.3934,0.3934,0.3934,0.3934,0.3934,vendor
4,Continental Floors,SPCB,SQYD,Area,1CPT,Carpet Tiles,0.7319,0.7182,0.6977,0.6840,0.6703,0.6703,0.6703,0.6703,0.6703,0.6703,vendor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520,Itasca,SPW,,,,Carpet Roll,70.0000,2.0000,1.0000,2.0000,0.0000,0.0000,2.0000,0.0000,0.0000,0.0000,invoice_counts
521,Itasca,SPW,,,,Carpet Tiles,484.0000,38.0000,20.0000,8.0000,15.0000,3.0000,0.0000,0.0000,0.0000,0.0000,invoice_counts
522,Itasca,SPW,,,,LVP,37.0000,15.0000,5.0000,11.0000,8.0000,7.0000,4.0000,6.0000,1.0000,1.0000,invoice_counts
523,Itasca,SPW,,,,LVT,67.0000,24.0000,19.0000,16.0000,17.0000,5.0000,2.0000,0.0000,0.0000,0.0000,invoice_counts


In [None]:
# Final Sort: Enforce output row order for readability

# Define source display order
source_order = {
       "invoice_counts": 0,  # Always shown last
    "vendor": 1,
    "hist_avg": 2,
    "hist_wavg": 3,
    "xgs_avg": 4,
    "xgs_wavg": 5,
    "variance": 6 , # Always shown last
}

# Add sorting key column
combined_df["source_sort"] = combined_df["source"].map(source_order)

# Sort rows to follow commodity hierarchy and defined source order
combined_df = combined_df.sort_values(
    by=["commodity_group", "commodity_description", "site", "unit", "source_sort"]
).drop(columns="source_sort")

# Reset index for cleanliness
combined_df.reset_index(drop=True, inplace=True)

print("✅ Rows sorted for visual clarity.")
display(combined_df.head(20))  # Display first 20 rows for quick check


✅ Rows sorted for visual clarity.


Unnamed: 0,site_description,site,unit,unitclass,commodity_group,commodity_description,L5C,5C,1M,2M,3M,5M,10M,20M,30M,40M,source
0,Contract Carpet Solutions,CCS,SQYD,Area,1CBL,Carpet Roll,0.496,0.4795,0.4671,0.4599,0.4506,0.4506,0.4506,0.4506,0.4506,0.4506,vendor
1,Itasca,CCS,SQYD,Area,1CBL,Carpet Roll,1.38459,0.857188,,1.052129,,,,,,,hist_avg
2,Itasca,CCS,SQYD,Area,1CBL,Carpet Roll,1.131966,0.832737,,1.052129,,,,,,,hist_wavg
3,Itasca,CCS,SQYD,Area,1CBL,Carpet Roll,1.706036,0.532654,,0.510878,,,,,,,xgs_avg
4,Itasca,CCS,SQYD,Area,1CBL,Carpet Roll,0.737659,0.532654,,0.510878,,,,,,,xgs_wavg
5,Itasca,CCS,SQYD,Area,1CBL,Carpet Roll,0.394307,0.300083,,0.541252,,,,,,,variance
6,Contract Carpet Solutions (Gov),CCSG,SQYD,Area,1CBL,Carpet Roll,0.496,0.4795,0.4671,0.4589,0.4506,0.4506,0.4506,0.4506,0.4506,0.4506,vendor
7,Itasca,CCSG,SQYD,Area,1CBL,Carpet Roll,1.798415,,,,,,,,,,hist_avg
8,Itasca,CCSG,SQYD,Area,1CBL,Carpet Roll,1.179662,,,,,,,,,,hist_wavg
9,Itasca,CCSG,SQYD,Area,1CBL,Carpet Roll,1.343611,,,,,,,,,,xgs_avg


In [None]:
# # Use vendor_df to create a mapping from site to site_description
# site_desc_map = vendor_df.set_index("site")["site_description"].to_dict()

# # Update site_description in all relevant DataFrames by matching on 'site'
# for df_name in ["hist_wavg_block", "xgs_avg_block", "xgs_wavg_block", "variance_data", "combined_df", "invoice_matrix_final"]:
#     df = globals()[df_name]
#     df["site_description"] = df["site"].map(site_desc_map).fillna(df["site_description"])
# display(combined_df.tail(20))

In [None]:
# 🔄 Save combined_df with each site as a separate Excel sheet

import pandas as pd

# Set export path
output_path = "freight_rates_by_site.xlsx"  # Change path if needed

# Get unique sites
sites = combined_df["site"].dropna().unique()

# Export to Excel with one sheet per site
with pd.ExcelWriter(output_path, engine="xlsxwriter") as writer:
    for site in sites:
        sheet_name = str(site)[:31]  # Excel sheet names must be ≤ 31 characters
        site_df = combined_df[combined_df["site"] == site]
        site_df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"✅ Exported to {output_path} with one sheet per site.")


✅ Exported to freight_rates_by_site.xlsx with one sheet per site.
