In [1]:
### Import Libraries
# Azure Data Lake libraries
import common.utils.azure_data_lake_interface as adl

# data cleansing libraries
from common.utils.data_cleansing import clean_illegal_chars_in_column

# config libraries
import common.config
from common.utils.configuration_management import load_config

# Data analysis libraries
from pandas import DataFrame, Timedelta, to_datetime

In [2]:
def find_declining_margins(
    df: DataFrame,
    days: int,
    tolerance: float = 0.0,
    date_col: str = "created_date",
    sku_col: str = "sku",
    gpp_col: str = "gross_profit_percent",
    trans_id_col: str = "tranid",
) -> DataFrame:

    """
    Identifies products with declining gross profit margins within a specified time window.

    This function takes transaction data, computes the decline in gross profit percent for
    each SKU over a specified period, and filters for transactions where this decline
    exceeds a provided tolerance level. The final result is returned as a subset DataFrame
    meeting these criteria.

    Args:
        df (DataFrame): Input DataFrame containing transactional data with gross profit
            percent and dates of transactions.
        days (int): Number of days to consider for the cutoff window. Only transactions
            within this number of days from the latest transaction date are considered.
        tolerance (float, optional): Minimum difference between the previous gross profit
            percentage and the current one to qualify as a "decline". Default is 0.0.
        date_col (str, optional): Name of the column in the DataFrame representing the
            transaction dates. Default is "created_date".
        sku_col (str, optional): Name of the column in the DataFrame identifying the
            stock keeping unit (SKU). Default is "sku".
        gpp_col (str, optional): Name of the column in the DataFrame representing the
            gross profit percentage (GPP). Default is "gross_profit_percent".
        trans_id_col (str, optional): Name of the column in the DataFrame representing the
            transaction identifier. Default is "tranid".

    Returns:
        DataFrame: A subset of the input DataFrame containing rows where the gross profit
        percentage for a product has declined by more than the specified tolerance within
        the defined time window. The returned DataFrame includes columns for the current
        and previous gross profit percentages, transaction dates, and transaction IDs.
    """

    df = df.copy()

    # 1. Parse and sort
    df[date_col] = to_datetime(df[date_col], errors='raise')
    df = df.sort_values([sku_col, date_col])

    # 2. Compute the prior-value column
    df['prev_gpp'] = df.groupby(sku_col)[gpp_col].shift(1)
    df['prev_trans_date'] = df.groupby(sku_col)[date_col].shift(1)
    if df[trans_id_col].dtype == 'string':
        df['prev_trans_id'] = df.groupby(sku_col)[trans_id_col].shift(1).fillna('Not Specified').astype('string')
    else:
        df['prev_trans_id'] = df.groupby(sku_col)[trans_id_col].shift(1).fillna(0).astype(int)

    # 3. Compute cutoff
    cutoff = df['created_date'].max() - Timedelta(days=days)

    # 4. Filter: within window AND declined
    mask_window   = df[date_col] >= cutoff
    mask_declined = (df["prev_gpp"] - df[gpp_col]) > tolerance
    result = df[mask_window & mask_declined]

    return result

In [5]:
# attach to the data lake
config = load_config(common.config, "datalake_config.json")
service_client = adl.get_azure_service_client(config["blob_url"])
file_system_client = adl.get_azure_file_system_client(service_client, "consolidated")

# get data
data_state = "curated"
trans_type = "Estimate"
filename = f"transaction/{trans_type}ItemLineItems_{data_state}.parquet"
df = adl.get_parquet_file_from_data_lake(file_system_client, f"{data_state}/netsuite", filename)

In [6]:
lookback = 10
margin_declines = find_declining_margins(df, days=lookback)
print(f"{margin_declines.shape[0]} declining margins found.")
df = clean_illegal_chars_in_column(df, "description")
margin_declines.to_excel(f'../excel_outputs/{trans_type}ItemLineItems_margin_declines_in_past_{lookback}_days.xlsx', index=False)

742 declining margins found.
