In [6]:
import re
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

class Preprocessor(BaseEstimator, TransformerMixin):
    def __init__(self):
        print("init was called!")
        self.fill_values = {
            'Loan Date End': 'Not on Loan',
            'Hits': 'Unknown'
        }
        self.features = ["Age","Potential", "Finishing", "Dribbling", "ShortPassing", "LongPassing", "Acceleration",
                         "SprintSpeed", "Jumping", "Strength", "Wage", "Stamina", "Positioning", "Aggression"]
    
    def fit(self, X, y=None):
        print("fit was called!")
        return self
    
    def transform(self, X, y=None):
        """
        all preprocessing process.
        """
        print("transform was called!")
        print(X.columns)

        # X = X.dropna(subset=['Overall'], axis=1)
        X = X.apply(self.remove_char, character='\n')
        X = X.apply(self.remove_char, character='★')
        X = self.fill_na(X, self.fill_values)
        X["Wage"] = X["Wage"].apply(self.convert_unit)

        return X[self.features]

    def remove_char(self, column, character):
        """
        remove specific character.
        """
        return column.replace(character, '')
    
    def fill_na(self, df, fill_values):
        """
        fill null/na values with specific values.
        """
        return df.fillna(value=fill_values)

    def convert_unit(self, column):
        """
        convert currency-based values to float-based values.
        """
        multiplier = {'K': 1000.0, 'M': 1000000.0}    
        column = str(column)[1:]
        
        unit = column[-1]
        if unit in multiplier:
            return float(column[:-1])*multiplier[unit]
        else:
            return float(column)
