In [None]:
# Function to create target aggregation by district

def create_district_target_agg(feature_dataframe, client) :

    df_copy = client.copy()

    analysis_cols = ['target']
    
    # aggregate by district and calculate the mean of target variable
    aggregated_df = df_copy.groupby('disrict')[analysis_cols].mean().reset_index()

    # rename column
    aggregated_df.rename(columns={'target': 'f_t_district_target_mean'}, inplace=True)

    # merge to the feature_dataframe
    feature_dataframe = feature_dataframe.merge(aggregated_df, on='disrict', how='left')

    return feature_dataframe


In [None]:
# Function to create target aggregation by client_catg

def create_client_catg_target_agg(feature_dataframe, client) :

    df_copy = client.copy()

    analysis_cols = ['target']
    
    # aggregate by client_catg and calculate the mean of target variable
    aggregated_df = df_copy.groupby('client_catg')[analysis_cols].mean().reset_index()

    # rename column
    aggregated_df.rename(columns={'target': 'f_t_client_catg_target_mean'}, inplace=True)

    # merge to the feature_dataframe
    feature_dataframe = feature_dataframe.merge(aggregated_df, on='client_catg', how='left')

    return feature_dataframe


In [None]:
# Function to create index_cons_error aggregated by client_id

def create_index_cons_error_agg(feature_dataframe, invoice) :

    df_copy = invoice.copy()

    df_copy['index_cons_error'] = ((df_copy['new_index'] - df_copy['old_index']) -
                                  (df_copy['consommation_level_1'] + df_copy['consommation_level_2'] +
                                   df_copy['consommation_level_3'] + df_copy['consommation_level_4']))

    analysis_cols = ['index_cons_error']
    
    # aggregate by client_id and calculate the sum
    aggregated_df = df_copy.groupby('client_id')[analysis_cols].sum().reset_index()

    # rename column
    aggregated_df.rename(columns={'index_cons_error': 'f_index_cons_error_sum'}, inplace=True)

    # merge to the feature_dataframe
    feature_dataframe = feature_dataframe.merge(aggregated_df, on='client_id', how='left')

    return feature_dataframe


In [None]:
# Function to clean and create counter_statue aggregated by client_id

def create_counter_statue_agg(feature_dataframe, invoice) :

    df_copy = invoice.copy()

    # replace string values in counter_statue with numerical values of 0 to 5
    df_copy['counter_statue'] = merged_df['counter_statue'].replace({
        '0': 0,
        '1': 1,
        '2': 2,
        '3': 3,
        '4': 4,
        '5': 5,
    })

    # replace 'counter_statue' values that out of range (0,6) with None:
    merged_df['counter_statue'] = merged_df['counter_statue'].apply(lambda x: x if x in [0, 1, 2, 3, 4, 5] else None)

    # fill None values in 'counter_statue' with the mode of the column:
    from sklearn.impute import SimpleImputer
    imputer = SimpleImputer(strategy='most_frequent')
    merged_df['counter_statue'] = imputer.fit_transform(merged_df[['counter_statue']])

    analysis_cols = ['counter_statue']
    
    # aggregate by client_id and calculate the mean
    aggregated_df = df_copy.groupby('client_id')[analysis_cols].mean().reset_index()

    # rename column
    aggregated_df.rename(columns={'counter_statue': 'f_counter_statue_mean'}, inplace=True)

    # merge to the feature_dataframe
    feature_dataframe = feature_dataframe.merge(aggregated_df, on='client_id', how='left')

    return feature_dataframe
