In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

%matplotlib inline

In [2]:
path = r'C:\Users\Jerome Pintucan\OneDrive - TRANSNATIONAL E-BUSINESS SOLUTIONS INC\LEARN\PYTHON\DATASETS\sales_data_sample.xlsx'

df = pd.read_excel(path)

df = df[['ORDERNUMBER', 'QUANTITYORDERED', 'PRICEEACH', 'ORDERLINENUMBER',
       'SALES', 'ORDERDATE', 'MONTH_ID', 'YEAR_ID', 'PRODUCTLINE', 'MSRP',
       'PRODUCTCODE', 'CUSTOMERNAME', 'ADDRESSLINE1',
       'CITY', 'STATE','COUNTRY', 'TERRITORY',
       'CONTACTLASTNAME', 'CONTACTFIRSTNAME', 'DEALSIZE']].copy()

df2 = df.copy()

In [3]:
class NameDropper(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X.drop(['CUSTOMERNAME'], axis=1)
    

class AgeImputer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        imputer = SimpleImputer(strategy='mean')
        X['SALES IMPUTED'] = imputer.fit_transform(X[['SALES']])
        return X
    

class FeatureEncoder(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        encoder = OneHotEncoder()
        matrix = encoder.fit_transform(X[['TERRITORY']]).toarray()

        column_names = ['nan','EMEA', 'APAC', 'Japan']

        for i in range(len(matrix.T)):
            X[column_names[i]] = matrix.T[i]

        return X.drop(['TERRITORY'], axis=1)


In [4]:
pipe = Pipeline([
    ('droppper', NameDropper()),
    ('imputer', AgeImputer()),
    ('encoder', FeatureEncoder())
    ])

pipe.fit_transform(df2)

Unnamed: 0,ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,MONTH_ID,YEAR_ID,PRODUCTLINE,MSRP,...,STATE,COUNTRY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE,SALES IMPUTED,nan,EMEA,APAC,Japan
0,10107,30,95.70,2,,2/24/2003,2,2003,Motorcycles,95,...,NY,USA,Yu,Kwai,Small,3554.293479,0.0,0.0,0.0,1.0
1,10121,34,81.35,5,,2003-07-05 00:00:00,5,2003,Motorcycles,95,...,,France,Henriot,Paul,Small,3554.293479,0.0,1.0,0.0,0.0
2,10134,41,94.74,2,,2003-01-07 00:00:00,7,2003,Motorcycles,95,...,,France,Da Cunha,Daniel,Medium,3554.293479,0.0,1.0,0.0,0.0
3,10145,45,83.26,6,3746.70,8/25/2003,8,2003,Motorcycles,95,...,CA,USA,Young,Julie,Medium,3746.700000,0.0,0.0,0.0,1.0
4,10159,49,100.00,14,5205.27,2003-10-10 00:00:00,10,2003,Motorcycles,95,...,CA,USA,Brown,Julie,Medium,5205.270000,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2818,10350,20,100.00,15,2244.40,2004-02-12 00:00:00,12,2004,Ships,54,...,,Spain,Freyre,Diego,Small,2244.400000,0.0,1.0,0.0,0.0
2819,10373,29,100.00,1,3978.51,1/31/2005,1,2005,Ships,54,...,,Finland,Koskitalo,Pirkko,Medium,3978.510000,0.0,1.0,0.0,0.0
2820,10386,43,100.00,4,5417.57,2005-01-03 00:00:00,3,2005,Ships,54,...,,Spain,Freyre,Diego,Medium,5417.570000,0.0,1.0,0.0,0.0
2821,10397,34,62.24,1,2116.16,3/28/2005,3,2005,Ships,54,...,,France,Roulet,Annette,Small,2116.160000,0.0,1.0,0.0,0.0
