In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

In [3]:
df = pd.read_csv('/content/covid_toy.csv')

In [4]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['has_covid']),df['has_covid'], test_size=0.2)

In [10]:
X_train.shape

(80, 5)

By using Simple Imputer we will transform fever column, by using Ordinal Encoder we will transform cough column, and similarly we will use One Hot Encoding we will transform gender & city column

In [11]:
#SimpleImputer

In [25]:
Si = SimpleImputer()
X_train_fever = Si.fit_transform(X_train[['fever']])
X_test_fever = Si.fit_transform(X_train[['fever']])
X_train_fever.shape

(80, 1)

In [17]:
#Ordinal Encoder

In [26]:
Oe = OrdinalEncoder(categories=[['Mild','Strong']])
X_train_cough = Oe.fit_transform(X_train[['cough']])
X_test_cough = Oe.fit_transform(X_train[['cough']])
X_train_cough.shape

(80, 1)

In [22]:
#OneHotEncoding

In [27]:
Ohe = OneHotEncoder(drop='first', sparse=False)
X_train_gender_city = Ohe.fit_transform(X_train[['gender','city']])
X_test_gender_city = Ohe.fit_transform(X_train[['gender','city']])
X_train_gender_city.shape



(80, 4)

In [28]:
#Extracting Age column

In [30]:
X_train_age = X_train.drop(columns=['gender','fever','cough','city']).values
X_test_age = X_test.drop(columns=['gender','fever','cough','city']).values
X_train_age.shape

(80, 1)

In [31]:
#Concatenating all the columns

In [32]:
X_train_transformed = np.concatenate((X_train_age,X_train_fever,X_train_gender_city,X_train_cough),axis=1)

Now we will transform all the column using ColumnTransformer

In [44]:
from sklearn.compose import ColumnTransformer
transformers = ColumnTransformer(transformers=[
    ('tnf1', SimpleImputer(), ['fever']),
    ('tnf2', OrdinalEncoder(categories=[['Mild', 'Strong']]), ['cough']),
    ('tnf3', OneHotEncoder(sparse=False, drop='first'), ['gender', 'city'])
],remainder='passthrough')


In [46]:
transformers.fit_transform(X_train).shape



(80, 7)

In [47]:
transformers.fit_transform(X_test).shape



(20, 7)