In [3]:
import pandas as pd
import numpy as np

from sklearn.base import TransformerMixin

class DataFrameImputer(TransformerMixin):

    def __init__(self):
        """Impute missing values.

        Columns of dtype object are imputed with the most frequent value
        in column.

        Columns of other types are imputed with mean of column.

        """
    def fit(self, X, y=None):

        self.fill = pd.Series([X[c].value_counts().index[0]
            if X[c].dtype == np.dtype('O') else X[c].mean() for c in X],
            index=X.columns)

        return self

    def transform(self, X, y=None):
        return X.fillna(self.fill)


df = pd.read_csv('sessionleveldata.csv', sep=';')


In [4]:
df

Unnamed: 0,session,date,datetime,views,carts,abandon,sale,remove,startyear,birthyear,...,livedate,articletype,minprice,maxprice,avgprice,numberitems,minsales,maxsales,avgsales,totalsales
0,000074577A9B10B5990A7429408F26C5,21-2-2017,21-2-2017 14:05,3,0,0,0,0,1996.0,1977.0,...,21-12-2015,Bikinibroekje,17.95,29.95,25.95,0,0.00,0.00,0.00,0.00
1,00008248450C8A56D51604D6B1488269,25-5-2017,25-5-2017 15:58,0,1,1,0,1,2016.0,1976.0,...,25-1-2017,Bikinitopje met cupmaat,34.99,44.99,38.32,0,0.00,0.00,0.00,0.00
2,00009B888226B6CA62844CB717553A72,30-5-2017,30-5-2017 11:52,1,1,1,0,0,2017.0,1995.0,...,21-12-2016,Badpak (Corrigerend),59.95,59.95,59.95,0,0.00,0.00,0.00,0.00
3,0000FDB0FF89F99CADBCFE845D0B0C9A,15-6-2017,15-6-2017 07:17,7,1,0,0,1,,,...,22-4-2017,Bikinibroekje,23.99,39.99,28.20,0,0.00,0.00,0.00,0.00
4,0001888D34D30114762B3B30368CF2BB,26-6-2017,26-6-2017 11:34,1,0,0,0,0,,,...,13-4-2017,Jurk,55.00,55.00,55.00,0,0.00,0.00,0.00,0.00
5,0001AFCA408798E1FC63E5F56D5B8BC5,3-6-2017,3-6-2017 11:19,2,0,0,0,0,2010.0,1983.0,...,3-3-2017,Short (<14 cm),29.95,29.95,29.95,0,0.00,0.00,0.00,0.00
6,00025E5E1298F083DD39385803133B80,16-7-2017,16-7-2017 16:23,7,2,0,1,1,2014.0,1985.0,...,21-12-2016,Bikini,34.95,44.95,42.22,1,35.96,35.96,35.96,35.96
7,00033EBBE8319DCD5A3928E7CFBD3EDA,12-4-2017,12-4-2017 16:58,1,0,0,0,0,,,...,24-2-2017,Jurk,49.95,49.95,49.95,0,0.00,0.00,0.00,0.00
8,0004931D8C12BD1EB467CC99E2931BA6,4-6-2017,4-6-2017 14:09,0,0,0,1,0,1989.0,1966.0,...,16-5-2017,Badpak (Corrigerend),59.95,59.95,59.95,1,59.95,59.95,59.95,59.95
9,0005025EE90D5C1F221F89C7DD235AA5,25-1-2017,25-1-2017 11:37,2,0,0,0,0,,,...,4-11-2016,Bikinibroekje,24.99,34.99,29.99,0,0.00,0.00,0.00,0.00


In [5]:
xt = DataFrameImputer().fit_transform(df)


In [6]:
xt

Unnamed: 0,session,date,datetime,views,carts,abandon,sale,remove,startyear,birthyear,...,livedate,articletype,minprice,maxprice,avgprice,numberitems,minsales,maxsales,avgsales,totalsales
0,000074577A9B10B5990A7429408F26C5,21-2-2017,21-2-2017 14:05,3,0,0,0,0,1996.000000,1977.000000,...,21-12-2015,Bikinibroekje,17.95,29.95,25.95,0,0.00,0.00,0.00,0.00
1,00008248450C8A56D51604D6B1488269,25-5-2017,25-5-2017 15:58,0,1,1,0,1,2016.000000,1976.000000,...,25-1-2017,Bikinitopje met cupmaat,34.99,44.99,38.32,0,0.00,0.00,0.00,0.00
2,00009B888226B6CA62844CB717553A72,30-5-2017,30-5-2017 11:52,1,1,1,0,0,2017.000000,1995.000000,...,21-12-2016,Badpak (Corrigerend),59.95,59.95,59.95,0,0.00,0.00,0.00,0.00
3,0000FDB0FF89F99CADBCFE845D0B0C9A,15-6-2017,15-6-2017 07:17,7,1,0,0,1,2007.330134,1974.505053,...,22-4-2017,Bikinibroekje,23.99,39.99,28.20,0,0.00,0.00,0.00,0.00
4,0001888D34D30114762B3B30368CF2BB,26-6-2017,26-6-2017 11:34,1,0,0,0,0,2007.330134,1974.505053,...,13-4-2017,Jurk,55.00,55.00,55.00,0,0.00,0.00,0.00,0.00
5,0001AFCA408798E1FC63E5F56D5B8BC5,3-6-2017,3-6-2017 11:19,2,0,0,0,0,2010.000000,1983.000000,...,3-3-2017,Short (<14 cm),29.95,29.95,29.95,0,0.00,0.00,0.00,0.00
6,00025E5E1298F083DD39385803133B80,16-7-2017,16-7-2017 16:23,7,2,0,1,1,2014.000000,1985.000000,...,21-12-2016,Bikini,34.95,44.95,42.22,1,35.96,35.96,35.96,35.96
7,00033EBBE8319DCD5A3928E7CFBD3EDA,12-4-2017,12-4-2017 16:58,1,0,0,0,0,2007.330134,1974.505053,...,24-2-2017,Jurk,49.95,49.95,49.95,0,0.00,0.00,0.00,0.00
8,0004931D8C12BD1EB467CC99E2931BA6,4-6-2017,4-6-2017 14:09,0,0,0,1,0,1989.000000,1966.000000,...,16-5-2017,Badpak (Corrigerend),59.95,59.95,59.95,1,59.95,59.95,59.95,59.95
9,0005025EE90D5C1F221F89C7DD235AA5,25-1-2017,25-1-2017 11:37,2,0,0,0,0,2007.330134,1974.505053,...,4-11-2016,Bikinibroekje,24.99,34.99,29.99,0,0.00,0.00,0.00,0.00


In [8]:
xt.to_csv("afterimputted.csv", sep=';')