In [1]:
import numpy as np
import pandas as pd

from sklearn.base import TransformerMixin

In [2]:
class DataFrameImputer(TransformerMixin):
    def __init__(self):
        """Impute missing values.
        - Columns of dtype object are imputed with the most frequent value in column.
        - Columns of other types are imputed with mean of column.
        """
    def fit(self, X, y=None):
        self.fill = pd.Series([X[c].value_counts().index[0]
            # numerical --> mean, categorical --> median
            if X[c].dtype == np.dtype('O') else X[c].mean() for c in X], index=X.columns)  
                               
            # numerical, categorical --> median                   
            #if X[c].dtype == np.dtype('O') else X[c].median() for c in X], index=X.columns)
        return self

    def transform(self, X, y=None):
        return X.fillna(self.fill)

In [3]:
x1 = [1.,1.,2.,4.,np.nan]
x2 = ['a','a','b',np.nan,'a']
x = list(zip(x1, x2))
#df = pd.DataFrame()

In [4]:
x

[(1.0, 'a'), (1.0, 'a'), (2.0, 'b'), (4.0, nan), (nan, 'a')]

In [5]:
df = pd.DataFrame(x, columns = ['x1', 'x2'])
df

Unnamed: 0,x1,x2
0,1.0,a
1,1.0,a
2,2.0,b
3,4.0,
4,,a


In [6]:
df = DataFrameImputer().fit_transform(df)
df

Unnamed: 0,x1,x2
0,1.0,a
1,1.0,a
2,2.0,b
3,4.0,a
4,2.0,a
