In [3]:

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# ✅ Load and clean data
df = pd.read_csv('../../data/downloads/freight_model_20250423_224657.csv')
df = df.drop_duplicates(subset='invoice_id', keep='first')
df.columns

Index(['site', 'site_description', 'supplier_no', 'supplier_name',
       'invoice_id', 'account', 'account_description', 'ship_to_zip', 'po_no',
       'part_no', 'part_description', 'inv_uom', 'invoiced_line_qty',
       'est_commodity_group', 'est_method_used', 'est_standard_quantity',
       'est_standard_uom', 'est_lbs_per_uom', 'est_market_freight_costs',
       'est_total_quantity', 'est_market_rate', 'est_freight_class',
       'est_xgs_rate', 'est_rate_unit', 'est_shipment_type',
       'est_xgs_total_raw_cost', 'est_xgs_total_normalised_cost',
       'est_normalised_xgs_rate', 'est_xgs_min_applied',
       'market_cost_outlier'],
      dtype='object')

In [5]:
vnl = df[df['est_commodity_group'] == '1VNL']
cbl = df[df['est_commodity_group'] == '1CBL']

In [13]:
# Updated LBS-based simulation (e.g. 1VNL)
def simulate_ftl_thresholds_verbose(data, freight_price_col='est_market_freight_costs', quantity_col='est_total_quantity', thresholds=[5000,10000, 15000, 20000, 25000]):
    data['quantity'] = pd.to_numeric(data[quantity_col], errors='coerce')
    data['freight_price'] = pd.to_numeric(data[freight_price_col], errors='coerce')

    results = []

    for threshold in thresholds:
        data['simulated_type'] = data['quantity'].apply(lambda q: 'FTL' if q >= threshold else 'LTL')
        data['unit_cost'] = (data['freight_price'] / data['quantity']).round(2)

        grouped = data.groupby('simulated_type').agg(
            avg_cost_per_lb=('unit_cost', 'mean'),
            shipment_count=('unit_cost', 'count')
        ).reset_index()
        grouped['threshold'] = threshold
        results.append(grouped)

    combined = pd.concat(results)
    pivot = combined.pivot(index='threshold', columns='simulated_type', values=['avg_cost_per_lb', 'shipment_count']).round(3)
    pivot.columns = [f"{metric}_{stype}" for metric, stype in pivot.columns]
    return pivot.reset_index()


In [15]:
# Run simulation on 1VNL
valid_rows = vnl[(df['est_market_freight_costs'] > 0) & (df['est_total_quantity'] > 0)]
summary_df = simulate_ftl_thresholds_verbose(valid_rows)
display(summary_df)


  valid_rows = vnl[(df['est_market_freight_costs'] > 0) & (df['est_total_quantity'] > 0)]


Unnamed: 0,threshold,avg_cost_per_lb_FTL,avg_cost_per_lb_LTL,shipment_count_FTL,shipment_count_LTL
0,5000,0.072,1.18,284.0,2509.0
1,10000,0.06,1.123,145.0,2648.0
2,15000,0.057,1.106,102.0,2691.0
3,20000,0.054,1.096,77.0,2716.0
4,25000,0.058,1.089,59.0,2734.0


In [16]:
# Updated SQYD-based simulation (e.g. 1CBL)
def simulate_ftl_thresholds_cbl(data, freight_price_col='est_market_freight_costs', quantity_col='est_total_quantity', thresholds_in_rolls=[20, 25, 30, 45, 50, 60]):
    thresholds = [rolls * 100 for rolls in thresholds_in_rolls]

    data['quantity'] = pd.to_numeric(data[quantity_col], errors='coerce')
    data['freight_price'] = pd.to_numeric(data[freight_price_col], errors='coerce')

    results = []

    for threshold, rolls in zip(thresholds, thresholds_in_rolls):
        data['simulated_type'] = data['quantity'].apply(lambda q: 'FTL' if q >= threshold else 'LTL')
        data['unit_cost'] = (data['freight_price'] / data['quantity']).round(2)

        grouped = data.groupby('simulated_type').agg(
            avg_cost_per_sqyd=('unit_cost', 'mean'),
            shipment_count=('unit_cost', 'count')
        ).reset_index()
        grouped['threshold_rolls'] = rolls
        results.append(grouped)

    combined = pd.concat(results)
    pivot = combined.pivot(index='threshold_rolls', columns='simulated_type', values=['avg_cost_per_sqyd', 'shipment_count']).round(3)
    pivot.columns = [f"{metric}_{stype}" for metric, stype in pivot.columns]
    return pivot.reset_index()


In [17]:
# Run simulation on 1CBL
valid_rows = cbl[(cbl['est_market_freight_costs'] > 0) & (cbl['est_total_quantity'] > 0)]
summary_df = simulate_ftl_thresholds_cbl(valid_rows)
display(summary_df)


Unnamed: 0,threshold_rolls,avg_cost_per_sqyd_FTL,avg_cost_per_sqyd_LTL,shipment_count_FTL,shipment_count_LTL
0,20,0.613,1.484,22.0,482.0
1,25,0.501,1.477,16.0,488.0
2,30,0.461,1.474,14.0,490.0
3,45,0.207,1.468,9.0,495.0
4,50,0.125,1.467,8.0,496.0
5,60,0.114,1.465,7.0,497.0
