In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
### Import Libraries

# Azure Data Lake libraries
import common.utils.azure_data_lake_interface as adl

# config libraries
import common.config
from common.utils.configuration_management import load_config

# Data analysis libraries
import pandas as pd

In [20]:
def find_declining_margins(
    df: pd.DataFrame,
    days: int,
    tolerance: float = 0.0,
    date_col: str = "created_date",
    sku_col: str = "sku",
    gpp_col: str = "gross_profit_percent",
    trans_id_col: str = "tranid",
) -> pd.DataFrame:

    """
    Identifies products with declining gross profit margins within a specified time window.

    This function takes transaction data, computes the decline in gross profit percent for
    each SKU over a specified period, and filters for transactions where this decline
    exceeds a provided tolerance level. The final result is returned as a subset DataFrame
    meeting these criteria.

    Args:
        df (pd.DataFrame): Input DataFrame containing transactional data with gross profit
            percent and dates of transactions.
        days (int): Number of days to consider for the cutoff window. Only transactions
            within this number of days from the latest transaction date are considered.
        tolerance (float, optional): Minimum difference between the previous gross profit
            percentage and the current one to qualify as a "decline". Default is 0.0.
        date_col (str, optional): Name of the column in the DataFrame representing the
            transaction dates. Default is "created_date".
        sku_col (str, optional): Name of the column in the DataFrame identifying the
            stock keeping unit (SKU). Default is "sku".
        gpp_col (str, optional): Name of the column in the DataFrame representing the
            gross profit percentage (GPP). Default is "gross_profit_percent".
        trans_id_col (str, optional): Name of the column in the DataFrame representing the
            transaction identifier. Default is "tranid".

    Returns:
        pd.DataFrame: A subset of the input DataFrame containing rows where the gross profit
        percentage for a product has declined by more than the specified tolerance within
        the defined time window. The returned DataFrame includes columns for the current
        and previous gross profit percentages, transaction dates, and transaction IDs.
    """

    df = df.copy()

    # 1. Parse and sort
    df[date_col] = pd.to_datetime(df[date_col], errors='raise')
    df = df.sort_values([sku_col, date_col])

    # 2. Compute the prior-value column
    df['prev_gpp'] = df.groupby(sku_col)[gpp_col].shift(1)
    df['prev_trans_date'] = df.groupby(sku_col)[date_col].shift(1)
    df['prev_trans_id'] = df.groupby(sku_col)[trans_id_col].shift(1).fillna(0).astype(int)

    # 3. Compute cutoff
    cutoff = df['created_date'].max() - pd.Timedelta(days=days)

    # 4. Filter: within window AND declined
    mask_window   = df[date_col] >= cutoff
    mask_declined = (df["prev_gpp"] - df[gpp_col]) > tolerance
    result = df[mask_window & mask_declined]

    return result

In [18]:
# attach to the data lake
config = load_config(common.config, "datalake_config.json")
service_client = adl.get_azure_service_client(config["blob_url"])
file_system_client = adl.get_azure_file_system_client(service_client, "consolidated")

# get data
data_state = "enhanced"
trans_type = "Estimate"
filename = f"transaction/{trans_type}ItemLineItems_{data_state}.parquet"
est_lines_df = adl.get_parquet_file_from_data_lake(file_system_client, f"{data_state}/netsuite", filename)
saved_est_lines_df = est_lines_df.copy()

trans_type = "CustInvc"
filename = f"transaction/{trans_type}ItemLineItems_{data_state}.parquet"
inv_lines_df = adl.get_parquet_file_from_data_lake(file_system_client, f"{data_state}/netsuite", filename)
saved_inv_lines_df = inv_lines_df.copy()

In [56]:
est_lines_df = saved_est_lines_df.copy()
inv_lines_df = saved_inv_lines_df.copy()

In [21]:
lookback = 10
source = "estimate"
df = est_lines_df if source == "estimate" else inv_lines_df
margin_declines = find_declining_margins(df, days=lookback)
margin_declines

Unnamed: 0,created_date,created_from,entered_by,ai_order_type,commission_or_mfr_direct,id,tranid,sku,item_type,vsi_item_category,...,handling_cost,labor_hours,unit_price,total_cost,total_amount,gross_profit,gross_profit_percent,prev_gpp,prev_trans_date,prev_trans_id
343313,2025-04-22,Not Specified,Antonio Penaherrera,Not Specified,False,45195049,302413,114,Inventory Item,Not Specified,...,2.82,0.00,111.00,61.00,111.00,50.00,45.05,48.88,2025-01-22,287460
346149,2025-04-29,Not Specified,Jeffrey A Diercks,Not Specified,False,45287993,303586,127,Inventory Item,Not Specified,...,142.44,0.00,6463.00,3877.56,6463.00,2585.44,40.00,40.01,2025-02-04,289934
345427,2025-04-25,Not Specified,Stacie Comia,Not Specified,False,45260521,303294,176,Inventory Item,Not Specified,...,24.81,0.42,275.71,202.62,275.71,73.09,26.51,41.63,2025-01-15,286298
345424,2025-04-25,Not Specified,Stacie Comia,Not Specified,False,45260508,303292,239,Inventory Item,Not Specified,...,4.83,0.00,127.05,79.56,127.05,47.49,37.38,46.80,2025-03-12,296039
344469,2025-04-24,Not Specified,Frank Comia,Not Specified,False,45236876,302913,272,Inventory Item,Not Specified,...,17.34,0.00,648.00,365.86,648.00,282.14,43.54,46.15,2025-04-07,300108
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
344281,2025-04-24,Not Specified,Sean DeGeest,Manufacturer Direct,True,45232348,302845,2938866,Non-inventory Item,Not Specified,...,132.66,0.00,4422.00,8844.00,8844.00,0.00,0.00,100.00,2025-03-21,297829
346206,2025-04-29,Not Specified,Sean DeGeest,Manufacturer Direct,True,45288168,303590,2940494,Non-inventory Item,Not Specified,...,66.00,0.00,2200.00,2200.00,2200.00,0.00,0.00,100.00,2025-04-17,302005
346456,2025-04-29,Not Specified,John Witek,Not Specified,False,45300614,303725,2945208,Non-inventory Item,Not Specified,...,34.72,0.00,1601.50,2314.98,3203.00,888.02,27.72,30.69,2025-04-22,302539
343832,2025-04-23,Not Specified,Cathryn L Hindman,Not Specified,False,45251796,302643,2949309,Assembly/Bill of Materials,Not Specified,...,77.28,0.00,1872.00,1300.08,1872.00,571.92,30.55,32.92,2025-04-23,302643


In [11]:
margin_declines.to_excel(f'../excel_outputs/{source}_line_item_margin_declines_in_past_{lookback}_days.xlsx', index=False)