In [1]:
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
from sklearn.impute import SimpleImputer 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder

In [3]:
df= pd.read_csv('covid_toy.csv')

In [4]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [5]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [6]:
X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['has_covid']),df['has_covid'],
                                                test_size=0.2)

In [7]:
X_train

Unnamed: 0,age,gender,fever,cough,city
17,40,Female,98.0,Strong,Delhi
32,34,Female,101.0,Strong,Delhi
50,19,Male,101.0,Mild,Delhi
38,49,Female,101.0,Mild,Delhi
29,34,Female,,Strong,Mumbai
...,...,...,...,...,...
24,13,Female,100.0,Strong,Kolkata
80,14,Female,99.0,Mild,Mumbai
67,65,Male,99.0,Mild,Bangalore
5,84,Female,,Mild,Bangalore


# Aam Zindagi

In [13]:
# adding simple imputer to fever col
si=SimpleImputer()
X_train_fever=si.fit_transform(X_train[['fever']])

#Also the test data
X_test_fever=si.fit_transform(X_test[['fever']])

X_train_fever.shape

(80, 1)

In [15]:
# Ordinalencoding -> cough
oe=OrdinalEncoder(categories=[['Mild','Strong']])
X_train_cough=oe.fit_transform(X_train[['cough']])

X_test_cough=oe.fit_transform(X_test[['cough']])
X_train_cough.shape

(80, 1)

In [17]:
# OneHotEncoding -> gender,city
ohe=OneHotEncoder(drop='first',sparse=False)
X_train_gender_city=ohe.fit_transform(X_train[['gender','city']])

#also the test data
X_test_gender_city=ohe.fit_transform(X_test[['gender','city']])
X_train_gender_city.shape



(80, 4)

In [18]:
#Extracting age 
X_train_age=X_train.drop(columns=['gender','fever','cough','city']).values
X_test_age=X_test.drop(columns=['gender','fever','cough','city']).values

X_train_age.shape

(80, 1)

In [19]:
X_train_transformed = np.concatenate((X_train_age,X_train_fever,X_train_gender_city,X_train_cough),axis=1)
# also the test data
X_test_transformed = np.concatenate((X_test_age,X_test_fever,X_test_gender_city,X_test_cough),axis=1)

X_train_transformed.shape

(80, 7)

# Mentos Zindagi

In [20]:
from sklearn.compose import ColumnTransformer

In [22]:
transformer=ColumnTransformer(transformers=[
    ('tnf1',SimpleImputer(),['fever']),
    ('tnf2',OrdinalEncoder(categories=[['Mild','Strong']]),['cough']),
    ('tnf3',OneHotEncoder(sparse=False,drop='first'),['gender','city'])                                    
],remainder='passthrough')

In [23]:
transformer.fit_transform(X_train).shape



(80, 7)

In [24]:
transformer.transform(X_test).shape

(20, 7)