In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
### Import Libraries

# Azure Data Lake libraries
import common.utils.azure_data_lake_interface as adl

# config libraries
from importlib.resources import files
from common.utils.configuration_management import load_config

# Data analysis libraries
import pandas as pd

In [3]:
def clean_and_reorder(df: pd.DataFrame) -> pd.DataFrame:
    """
    Cleans and reorders a pandas DataFrame for subsequent data analysis or processing. This function removes
    unnecessary columns, renames specific columns to align with standardized naming conventions,
    and reorganizes the DataFrame columns into a defined order.

    Args:
        df (pd.DataFrame): A pandas DataFrame containing the data to clean and reorder.

    Returns:
        pd.DataFrame: The cleaned and reordered pandas DataFrame.
    """
    # define columns to drop
    drop_cols = ['amount', 'assembly_component', 'created_from', 'custcol_ava_taxamount', 'custcol_sa_quote_po_rate',
                 'display_name', 'est_extended_cost', 'est_gross_profit', 'est_gross_profit_percent', 'handling_cost', 'id',
                 'item_base_price', 'labor_hours', 'last_purchase_price', 'line_number', 'mainline', 'nx_customer_id', 'purchased_price',
                 'special_order', 'tax_line', 'transaction_table_id', 'valve_spec_size', 'vendor_commission_percent',
                 'highest_recent_cost', 'highest_quoted_cost', 'custom_manufacturer', 'vsi_mfr', 'commission_or_mfr_direct', ]

    # define new column order
    new_order = ['tranid', 'subsidiary_name', 'location', 'sales_rep', 'entered_by', 'created_date', 'sku', 'manufacturer', 'item_name',
                 'description', 'item_type', 'vsi_item_category', 'level_1_category', 'level_2_category', 'level_3_category',
                 'level_4_category', 'level_5_category', 'level_6_category', 'ai_order_type', 'cost_estimate_type', 'customer_id',
                 'company_name', 'end_market', 'quantity', 'unit_cost', 'unit_price', 'total_cost', 'total_sales', 'gross_profit',
                 'gross_profit_percent']

    # define new column names
    column_map = {'quote_po_rate':'unit_cost','total_amount':'total_sales'}

    # return updated database
    return (
        df
        .drop(drop_cols, axis=1)
        .rename(columns=column_map)
        .reindex(columns=new_order)
    )

In [4]:
def find_declining_margins(
    df: pd.DataFrame,
    days: int,
    tolerance: float = 0.0,
    date_col: str = "created_date",
    sku_col: str = "sku",
    gpp_col: str = "gross_profit_percent",
    trans_id_col: str = "tranid",
) -> pd.DataFrame:

    """
    Identifies products with declining gross profit margins within a specified time window.

    This function takes transaction data, computes the decline in gross profit percent for
    each SKU over a specified period, and filters for transactions where this decline
    exceeds a provided tolerance level. The final result is returned as a subset DataFrame
    meeting these criteria.

    Args:
        df (pd.DataFrame): Input DataFrame containing transactional data with gross profit
            percent and dates of transactions.
        days (int): Number of days to consider for the cutoff window. Only transactions
            within this number of days from the latest transaction date are considered.
        tolerance (float, optional): Minimum difference between the previous gross profit
            percentage and the current one to qualify as a "decline". Default is 0.0.
        date_col (str, optional): Name of the column in the DataFrame representing the
            transaction dates. Default is "created_date".
        sku_col (str, optional): Name of the column in the DataFrame identifying the
            stock keeping unit (SKU). Default is "sku".
        gpp_col (str, optional): Name of the column in the DataFrame representing the
            gross profit percentage (GPP). Default is "gross_profit_percent".
        trans_id_col (str, optional): Name of the column in the DataFrame representing the
            transaction identifier. Default is "tranid".

    Returns:
        pd.DataFrame: A subset of the input DataFrame containing rows where the gross profit
        percentage for a product has declined by more than the specified tolerance within
        the defined time window. The returned DataFrame includes columns for the current
        and previous gross profit percentages, transaction dates, and transaction IDs.
    """

    df = df.copy()

    # 1. Parse and sort
    df[date_col] = pd.to_datetime(df[date_col], errors='raise')
    df = df.sort_values([sku_col, date_col])

    # 2. Compute the prior-value column
    df['prev_gpp'] = df.groupby(sku_col)[gpp_col].shift(1)
    df['prev_trans_date'] = df.groupby(sku_col)[date_col].shift(1)
    if df[trans_id_col].dtype != 'object':
        df['prev_trans_id'] = df.groupby(sku_col)[trans_id_col].shift(1).astype('Int64')

    # 3. Compute cutoff
    cutoff = df['created_date'].max() - pd.Timedelta(days=days)

    # 4. Filter: within window AND declined
    mask_window   = df[date_col] >= cutoff
    mask_declined = (df["prev_gpp"] - df[gpp_col]) > tolerance
    result = df[mask_window & mask_declined]

    return result

In [5]:
# attach to the data lake
file_path = files("common.config") / "datalake_config.json"
config = load_config(str(file_path))
service_client = adl.get_azure_service_client(config["blob_url"])
file_system_client = adl.get_azure_file_system_client(service_client, "consolidated")

# get data
data_state = "enhanced"
trans_type = "Estimate"
filename = f"transaction/{trans_type}ItemLineItems_{data_state}.parquet"
est_lines_df = adl.get_parquet_file_from_data_lake(file_system_client, f"{data_state}/netsuite", filename)
saved_est_lines_df = est_lines_df.copy()

trans_type = "CustInvc"
filename = f"transaction/{trans_type}ItemLineItems_{data_state}.parquet"
inv_lines_df = adl.get_parquet_file_from_data_lake(file_system_client, f"{data_state}/netsuite", filename)
saved_inv_lines_df = inv_lines_df.copy()

In [6]:
inv_lines_df

Unnamed: 0,amount,assembly_component,cost_estimate_type,created_from,custcol_ava_taxamount,custcol_sa_quote_po_rate,custom_manufacturer,customer_id,description,display_name,...,level_3_category,level_4_category,level_5_category,level_6_category,total_amount,total_cost,gross_profit,gross_profit_percent,highest_recent_cost,highest_quoted_cost
0,3804.00,False,LASTPURCHPRICE,Sales Order #285216,0.0,0.0,,73445,,"2"" 150# WCB YD valve",...,,,,,3804.00,0.0,3804.00,1.000000,,0.00
1,3187.00,False,LASTPURCHPRICE,Sales Order #285216,0.0,0.0,,73445,,667-40 DVC6200 w/Position Feedback,...,,,,,3187.00,0.0,3187.00,1.000000,,0.00
2,5118.85,False,PURCHPRICE,Sales Order #287196,0.0,0.0,,165737,"Fisher® 2"" 300# WCB EZ/667-40/DVC6200 \r\nE Se...","Fisher® 2"" 300# WCB EZ/667-40/DVC6200 E Serie...",...,Not Specified,Not Specified,Not Specified,Not Specified,5118.85,4351.0,767.85,0.150004,4351.0,4351.00
3,2005.00,False,PURCHPRICE,Sales Order #287196,0.0,0.0,,165737,Fisher® DVC6200 101/G60P/D/HC/HW1/FR 2 FIELDV...,DVC6200-101/G60P/D/HC/HW1/FR (Reman AS),...,Not Specified,Not Specified,Not Specified,Not Specified,2005.00,1704.0,301.00,0.150125,1704.0,1704.00
4,377.00,False,AVGCOST,Sales Order #287196,0.0,0.0,,165737,Fisher Controls® Part # GG03708X042\r\nMountin...,Fisher Controls® Part # GG03708X042 Mounting Kit,...,Not Specified,Not Specified,Not Specified,Not Specified,377.00,320.0,57.00,0.151194,320.0,320.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396685,152.00,False,CUSTOM,Sales Order #436798,0.0,0.0,Kunkle,13069,15715001107 KUNKLE STEM 6252M,,...,PRV,Not Specified,Not Specified,Part,152.00,99.0,53.00,0.348684,99.0,99.00
396686,35.00,False,ITEMDEFINED,Sales Order #436798,0.0,0.0,,13069,,,...,,,,,35.00,0.0,35.00,1.000000,,26762.50
396687,1747.00,False,CUSTOM,Sales Order #436828,0.0,0.0,,13069,"3"" Safety Valve Repair",,...,,,,,1747.00,263.0,1484.00,0.849456,,1246.99
396688,1000.00,False,PURCHPRICE,Sales Order #436828,0.0,0.0,Farris,13069,254433-649\r\nSeat O-RIng,Seat O-Ring,...,,,,,1000.00,650.0,350.00,0.350000,650.0,650.00


In [56]:
est_lines_df = saved_est_lines_df.copy()
inv_lines_df = saved_inv_lines_df.copy()

In [57]:
est_lines_df = clean_and_reorder(est_lines_df)
inv_lines_df = clean_and_reorder(inv_lines_df)

In [75]:
lookback = 10
source = "invoice"
df = est_lines_df if source == "estimate" else inv_lines_df
margin_declines = find_declining_margins(df, days=lookback)
margin_declines

Unnamed: 0,tranid,subsidiary_name,location,sales_rep,entered_by,created_date,sku,manufacturer,item_name,description,...,end_market,quantity,unit_cost,unit_price,total_cost,total_sales,gross_profit,gross_profit_percent,prev_gpp,prev_trans_date
396680,513431,Allied Valve,Hastings,Multiple,Ashley Richert,2025-04-29,59,Kunkle,912BHGM01 1.5 KUNKLE SV,"912BHGM01 \r\n1.5"" KUNKLE SV",...,Food & Beverage,2.0,453.25,741.00,906.50,1482.00,575.50,38.83,39.27,2025-04-14
394604,512428,Allied Valve,Hastings,Josh Dahl,Joshua A Marklevits,2025-04-26,94,Kunkle,912BFEM01 1 KUNKLE SV,"912BFEM01 \r\n1"" KUNKLE SV",...,Biofuels,1.0,149.09,344.00,149.09,344.00,194.91,56.66,62.38,2025-03-14
394070,512192,Allied Valve,Hastings,Multiple,Ashley Richert,2025-04-24,116,,DRIP PAN ELBOW 2 (Cast Iron FNPT),"2"" DRIP PAN ELBOW\r\n(Cast Iron - female NPT)",...,Reseller/Distributor,1.0,51.00,110.00,51.00,110.00,59.00,53.64,56.78,2025-04-08
394069,512189,Allied Valve,Hastings,Multiple,Ashley Richert,2025-04-24,120,,DRIP PAN ELBOW 1.25 (Cast Iron FNPT),"1.25"" DRIP PAN ELBOW\r\n(Cast Iron - female NPT)",...,Reseller/Distributor,1.0,54.96,100.00,54.96,100.00,45.04,45.04,48.64,2025-04-08
392885,511590,Allied Valve,Joliet-AV Shop,Multiple,Michele York,2025-04-22,126,Consolidated,GK38 GASKET KIT 1905/06/10/12H,GK38 Gasket Kit,...,Downstream O&G,1.0,205.70,360.00,205.70,360.00,154.30,42.86,43.56,2025-04-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395924,512958,Allied Valve,Joliet-AV Shop,Chad Kalecki,Michele York,2025-04-29,2921269,,04.4805.032,04.4805.032 AG Soft Goods Kit\r\n.,...,Chemical/Petrochem,1.0,217.00,389.00,217.00,389.00,172.00,44.22,47.81,2025-04-10
394699,512450,Allied Valve,Cannon Falls,Multiple,Joshua A Marklevits,2025-04-26,2924041,,75x1-19110MCF-2-CC-MS-31-MT-FT-LA-W4080NC,New Consolidated\r\n19110MCF-2-CC-MS-31-MT-FT-...,...,Downstream O&G,1.0,598.64,1226.00,598.64,1226.00,627.36,51.17,53.02,2025-04-08
394850,512459,Allied Valve,Riverdale Shop,Alan R Birkhofer,Laura A Diaz,2025-04-27,2932087,,138121,138121 Guide,...,Power/Utilities,1.0,8997.00,14995.00,8997.00,14995.00,5998.00,40.00,53.62,2025-04-27
395894,512893,Allied Valve,Cannon Falls,Distribution,David Johnson,2025-04-29,2933509,,0332303,0332303\r\nGUIDE 1914/16-30DEF,...,Reseller/Distributor,1.0,1310.98,1542.33,1310.98,1542.33,231.35,15.00,51.50,2025-04-26


In [76]:
margin_declines.to_excel(f'../excel_outputs/{source}_line_item_margin_declines_in_past_{lookback}_days.xlsx', index=False)