In [None]:
def assemble_features(conf,df, output_col="features"):
    """
    Combines the specified feature columns into a single vector column using VectorAssembler.

    Parameters:
    conf (dict): Configuration dictionary containing the feature columns to be combined.
    df (DataFrame): The input DataFrame.
    feature_columns (list): The list of feature column names to be combined into a single vector column.
    output_col (str): The name of the output vector column. Default is "features".

    Returns:
    DataFrame: The DataFrame with the specified feature columns combined into a single vector column.
    """
    # Initialize VectorAssembler to combine all feature columns into a single vector column
    assembler = VectorAssembler(inputCols=conf["feature_columns"], outputCol=output_col)
    
    # Transform the DataFrame using the VectorAssembler
    df_final = assembler.transform(df)
    
    return df_final

In [None]:
def train_and_predict( label_col,train_df,test_df):
    """
    Trains a linear regression model and makes predictions on the test set.
   
    Returns:
    DataFrame: The DataFrame with predictions on the test set.
    """
    # Define the linear regression model
    lr = LinearRegression(featuresCol="features", labelCol=label_col)
    
    # Train the model
    lr_model = lr.fit(train_df)
    
    # Make predictions on test data
    predictions = lr_model.transform(test_df)
    predictions = predictions.select("week","stg_item_category_desc_txt","stg_outlet_cd", "next_week_qty", "prediction")
    
    # Return predictions
    return predictions