In [1]:
def get_week_df(transaction):
    """
    Extract and return a DataFrame with distinct weeks from the transaction data.

    Args:
    - transaction: DataFrame, the transaction data containing a 'week' column

    Returns:
    - DataFrame: A DataFrame with distinct weeks, ordered in ascending order
    """
    weeks_df = (
        transaction
        .select("week")
        .distinct()
        .orderBy(f.col("week").asc())
    )

    return weeks_df

In [3]:

def outlets_df(preprocess_store_info):
    """
    Extract and return a DataFrame with distinct outlet codes from the store information data.

    Args:
    - preprocess_store_info: DataFrame, the store information data containing an 'stg_outlet_cd' column

    Returns:
    - DataFrame: A DataFrame with distinct outlet codes
    """
    outlet_df = preprocess_store_info.select("stg_outlet_cd").distinct()
    return outlet_df

In [5]:
def create_primary_key_table(weeks_df, final_categories, outlet):
    """
     takes three DataFrames weeks_df, final_categories, outlet dataframes and performs a cross join on them.
    
     weeks_df: DataFrame containing distinct weeks
     final_categories: DataFrame containing final categories
     preprocess_item_info: DataFrame containing filtered store categories
     
    :return: DataFrame resulting from cross join of the three input DataFrames
    """
    primary_key_table = (
        weeks_df
        .crossJoin(
            final_categories
        )
        .crossJoin(
            outlet
        )
    )
    return primary_key_table