In [7]:
import numpy as np 
import pandas as pd 
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer

In [8]:
df = pd.read_csv('covid_toy.csv')
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [9]:
df.isna().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [12]:
# So basically it is a tuple which contains 3 things
# First is a name then the type of operation with required arguements, then the column on which it has to be operated
tnsf1 = ('tsf1', SimpleImputer(),['fever'])
tnsf2 = ('tsf2', OrdinalEncoder(categories= [['Mild', 'Strong']]),['cough'])
tnsf3 = ('tsf3', OneHotEncoder(drop = 'first'),['gender','city'])

In [13]:
transformer = ColumnTransformer(transformers = [tnsf1, tnsf2, tnsf3], remainder = 'passthrough')
# remainder = 'passthrough' means that the columns which are not applied with column transformer are kept as it is
# In transformers pass tuples, each tuple tell about type of encoding

In [20]:
newDf = pd.DataFrame(transformer.fit_transform(df)) 
print(newDf.shape == df.shape)
newDf

False


Unnamed: 0,0,1,2,3,4,5,6,7
0,103.0,0.0,1.0,0.0,1.0,0.0,60,No
1,100.0,0.0,1.0,1.0,0.0,0.0,27,Yes
2,101.0,0.0,1.0,1.0,0.0,0.0,42,No
3,98.0,0.0,0.0,0.0,1.0,0.0,31,No
4,101.0,0.0,0.0,0.0,0.0,1.0,65,No
...,...,...,...,...,...,...,...,...
95,104.0,0.0,0.0,0.0,0.0,0.0,12,No
96,101.0,1.0,0.0,0.0,1.0,0.0,51,Yes
97,101.0,0.0,0.0,0.0,0.0,0.0,20,No
98,98.0,1.0,0.0,0.0,0.0,1.0,5,No


In [21]:
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No
...,...,...,...,...,...,...
95,12,Female,104.0,Mild,Bangalore,No
96,51,Female,101.0,Strong,Kolkata,Yes
97,20,Female,101.0,Mild,Bangalore,No
98,5,Female,98.0,Strong,Mumbai,No
