# MAPE For Store | Item Category | Week

# MAPE For Store | Department | Week

In [11]:
def process_predictions_with_department_info(
    train_model_and_predict,
    preprocess_item_info_df
):
    """
    Process predictions by joining with department information and aggregating results.

    Args:
    - train_model_and_predict: DataFrame, the DataFrame containing predictions
    - preprocess_item_info_df: DataFrame, the DataFrame containing item and department information

    Returns:
    - DataFrame: The resulting DataFrame after joining with department info and aggregating results
    """
    # Select necessary columns from item info DataFrame
    department_info = preprocess_item_info_df.select("stg_item_category_desc_txt", "stg_item_dept_desc_txt")
    
    # Join predictions with department info
    prediction_with_dept = train_model_and_predict.join(
        department_info,
        on="stg_item_category_desc_txt",
        how="left"
    )
    
    # Aggregate results by week, outlet, and department
    prediction_with_dept_final = prediction_with_dept.groupBy(
        "week", "stg_outlet_cd", "stg_item_dept_desc_txt"
    ).agg(
        f.sum("next_week_qty").alias("next_week_qty"),
        f.sum("prediction").alias("prediction")
    )
    
    return prediction_with_dept_final

In [26]:
def add_absolute_error_and_calculate_mape(predictions):
    """
    Add an 'absolute_error' column to the DataFrame which calculates the absolute error percentage,
    and calculate the mean absolute percentage error (MAPE).

    Parameters:
    predictions (DataFrame): The input DataFrame containing 'next_week_qty' and 'prediction' columns.

    Returns:
    float: The mean absolute percentage error (MAPE).
    """
        
        predictions_with_error = predictions.withColumn(
            "absolute_error",
            f.abs((f.col("next_week_qty") - f.col("prediction")) / f.col("next_week_qty")) * 100
        )

        # Calculate MAPE
        mape = predictions_with_error.select(f.mean("absolute_error")).collect()[0][0]
        logger.info("MAPE calculation completed in {:.2f} seconds".format(time.time() - start_time))

        return predictions_with_error, mape
      
    except Exception as e:
        error_message = "An error occurred while calculating the total MAPE: {}".format(e)
        logger.error(error_message)
        raise Exception(error_message)