In [None]:
def clean_remaining_NaNs(data_df, feature_df):
    """
    Address any remaining NaNs in data_df.
        
    INPUT:  Data DataFrame, Feature Information DataFrame
    OUTPUT: Data DataFrame modified by replacement of any remaining NaNs   
    """

    data      = data_df.copy()
    feat_info = feature_df.copy()

    # create a dictionary of columns with NaNs. The dictionary can be used for analysis. 
    remaining_NaN_cols  = {}
    remaining_feats_NaN = data.isna().any()[lambda x: x].index

    for col in remaining_feats_NaN:        
        remaining_NaN_cols[col] = data[col].isnull().sum()

    # turn the dictionary into a list
    remaining_missing_features = pd.DataFrame.from_dict(remaining_NaN_cols, orient='index')
    
    # add the type to remaining_missing_features 
    temp_rem_feat_view = feat_info.set_index('attribute')   
    for feature in remaining_missing_features.index:
        remaining_missing_features.at[feature, 'type'] = temp_rem_feat_view.loc[feature]['type'] 
    
    # set the mixed and ordinal NaNs to the median and the numeric NaNs to the mean.
    for feature in remaining_missing_features.index:
        if remaining_missing_features.loc[feature]['type'] == 'mixed':
            data[feature].fillna(value=data[feature].median(), inplace=True)
        elif remaining_missing_features.loc[feature]['type'] == 'ordinal':
            data[feature].fillna(value=data[feature].median(), inplace=True)
        elif remaining_missing_features.loc[feature]['type'] == 'numeric':
            data[feature].fillna(value=data[feature].mean(), inplace=True)
            
    return data

In [None]:
def transform_data(data_df):
    """
    Apply a StandardScaler to all columns (features) in data_df
        
    INPUT:  Data DataFrame
    OUTPUT: Data DataFrame with StandardScaler applied to all columns   
    """
    
    data = data_df.copy()
    
    # use ColumnTransformer to transform the data 
    data_features = data.columns
    ct = ColumnTransformer([('data_transformed', StandardScaler(), data_features)], remainder='passthrough')
    data_transform = ct.fit_transform(data)

    # reassemble the DataFrame
    data_scaled = pd.DataFrame(data_transform, columns=data_features)
    
    return data_scaled