In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

In [3]:
input_file='../app/downloads/'

In [8]:
df = pd.read_csv('../../app/downloads/freight_dual_results_20250413_113136.csv')
df.head(2)

Unnamed: 0,project_id,project_name,po_no,account,account_description,siteid,site,supplierid,suppliername,partnumber,...,match_supplier,est_estimated_area_cost,est_estimated_cwt_cost,est_freight_class_area,est_freight_class_lbs,est_lbs,est_rate_area,est_rate_cwt,est_sqyd,est_uom
0,2311123624,Rick Sidor Residence,47568,2008,Received Not Yet Invoiced,DIT,Diverzify Itasca,890,All Surfaces,1000008205,...,No supplier found,Not applicable,204.88,,1M,1172.06,Not applicable,17.48,112.65,SQFT
1,230485284,Epic Delphi Conference Room Refresh,40635,2008,Received Not Yet Invoiced,DIT,Diverzify Itasca,108164,"Mannington Mills, Inc.",1000006036,...,Supplier registered,Not applicable,58.82,,L5C,208.08,Not applicable,28.27,20.0,SQFT


In [18]:

def summarize_freight_comparison(df: pd.DataFrame):
    """
    Summarizes freight cost comparisons (area vs CWT) across:
    - Commodity Group
    - Commodity Description
    - Site

    Parameters:
    - df (pd.DataFrame): Must contain columns:
        'est_commodity_group', 'new_commodity_description', 'siteid', 'site',
        'est_estimated_area_cost', 'est_estimated_cwt_cost'

    Returns:
    - Tuple of 3 DataFrames: (by_group, by_description, by_site)
    """

    # Validate input type
    if not isinstance(df, pd.DataFrame):
        raise TypeError("Input must be a pandas DataFrame.")

    # Required columns
    required = [
        'est_commodity_group', 'new_commodity_description', 'siteid', 'site',
        'est_estimated_area_cost', 'est_estimated_cwt_cost'
    ]
    missing = [col for col in required if col not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    # Convert cost columns to numeric safely
    df['est_estimated_area_cost'] = pd.to_numeric(df['est_estimated_area_cost'], errors='coerce')
    df['est_estimated_cwt_cost'] = pd.to_numeric(df['est_estimated_cwt_cost'], errors='coerce')

    # Group and summarize
    def summarize(group_cols):
        grouped = df.groupby(group_cols)[['est_estimated_area_cost', 'est_estimated_cwt_cost']].sum().reset_index()
        grouped = grouped.rename(columns={
            'est_estimated_area_cost': 'total_area_cost',
            'est_estimated_cwt_cost': 'total_cwt_cost'
        })
        return grouped

    return (
        summarize(['est_commodity_group']),
        summarize(['est_commodity_group', 'new_commodity_description']),
        summarize(['siteid', 'site'])
    )


In [19]:
group_df, description_df, site_df = summarize_freight_comparison(df)


In [22]:
site_df.head(2)

Unnamed: 0,siteid,site,total_area_cost,total_cwt_cost
0,DIT,Diverzify Itasca,6153.43,54628.8
1,SPJ,Spectra Jacksonville,5337.24,114609.14
