In [13]:
def preprocess_transaction(conf: dict, trx):
    """
    Preprocesses the transaction data by filtering and transforming the input DataFrame.

    This function filters the transactions based on outlet codes specified in the configuration A,B,C,
    adds a 'week' column derived from the 'DATE_ID' column, and orders the data by the 'week' column.

    Parameters:
    conf (dict): Configuration dictionary containing the list of outlet names to filter.
    trx (DataFrame): The input DataFrame containing transaction data.

    Returns:
    DataFrame: The preprocessed DataFrame with filtered and transformed data.
    """
    trx_info= (
          trx
          .filter(
              trx["stg_outlet_cd"].isin(conf["outlet_name"])
          )
          .withColumn(
              "week",
              week_start(f.col("DATE_ID"))
          )
          .orderBy("week")
      )
    return trx_info

In [15]:
#function is return to get the previous friday of the
def week_start(date):
    """
    Calculate the start date of the week for a given date.

    Args:
    - date: Column, the date column for which to calculate the start of the week

    Returns:
    - Column: The start date of the week (Friday to Thursday) for the given date
    """
    week_start = f.date_sub(f.next_day(date, "friday"), 7)
    return week_start


In [17]:
def preprocess_item_info(conf,item_info_df):
    """
    preprocess the item info by filtering out Grocery, Beverages, Chilled department when item_info table is given
    This function filters item information based on department names given on configuration
    
    Parameters:
    conf (dict): Configuration dictionary containing the list of outlet names to filter.
    item_info_df (DataFrame): The input DataFrame containing item infomation data.

    Returns:
    preprocess_item_info: The preprocessed DataFrame with filtered  data.
    """
    # Filter the DataFrame by department
    preprocess_item_info = (
        item_info_df
        .filter(
            f.col("stg_item_dept_desc_txt").isin(conf["department_name"])
        )
    )
    return preprocess_item_info
    

In [19]:
def preprocess_store_info(store_info):
    """
    Filterout store info based on outlet names specified in configuration.
    Parameters:
    store_info:Input dataframe where store infomation contains

    Returns:
    filtered_store_category_df:The preprocessed DataFrame with filtered  data.

    """
    # Filter the DataFrame for rows where 'stg_outlet_cd' is in the specified list
    filtered_store_category_df = (
        store_info
        .filter(f.col("stg_outlet_cd").isin(conf["outlet_name"])
               )
    )
    return filtered_store_category_df