In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

In [3]:
df = pd.read_csv('covid_toy.csv')

In [4]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [17]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(df.drop(columns = ['has_covid']), df['has_covid'], test_size = 0.2, random_state = 0)

# aam zindagi

In [38]:
# adding simple imputer to fever col
si = SimpleImputer()
X_train_fever = si.fit_transform(X_train[['fever']])

# also the test data
X_test_fever = si.fit_transform(X_test[['fever']])
X_train_fever.shape

(80, 1)

In [39]:
# ordinalencoding -> cough
oe = OrdinalEncoder(categories = [['Mild', 'Strong']])
X_train_cough = oe.fit_transform(X_train[['cough']])

# also the test data
X_test_cough = oe.fit_transform(X_test[['cough']])

X_train_cough.shape

(80, 1)

In [29]:
# onehotencoding -> gender,city
ohe = OneHotEncoder(drop = 'first', sparse_output = False)
X_train_gender_city = ohe.fit_transform(X_train[['gender', 'city']])

# also the test data
X_test_gender_city = ohe.fit_transform(X_test[['gender', 'city']])
X_train_gender_city.shape

(80, 4)

In [30]:
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No
...,...,...,...,...,...,...
95,12,Female,104.0,Mild,Bangalore,No
96,51,Female,101.0,Strong,Kolkata,Yes
97,20,Female,101.0,Mild,Bangalore,No
98,5,Female,98.0,Strong,Mumbai,No


In [41]:
# extracting Age from X_train
X_train_age = X_train.drop(columns = ['gender', 'fever', 'cough', 'city']).values

#also extracting age from X_test
X_test_age = X_test.drop(columns = ['gender', 'fever', 'cough', 'city']).values
X_test_age.shape

(20, 1)

In [45]:
# in X_train data
X_train_transformed = np.concatenate((X_train_age, X_train_fever, X_train_gender_city, X_train_cough), axis = 1)

# in X_test data
X_test_transformed = np.concatenate((X_test_age, X_test_fever, X_test_gender_city, X_test_cough), axis = 1)

X_train_transformed

array([[ 22.        ,  99.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ],
       [ 56.        , 104.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   1.        ],
       [ 31.        ,  98.        ,   0.        ,   0.        ,
          1.        ,   0.        ,   0.        ],
       [ 75.        , 104.        ,   0.        ,   1.        ,
          0.        ,   0.        ,   1.        ],
       [ 72.        ,  99.        ,   1.        ,   0.        ,
          0.        ,   0.        ,   0.        ],
       [ 66.        ,  99.        ,   1.        ,   0.        ,
          0.        ,   0.        ,   1.        ],
       [ 14.        , 101.        ,   1.        ,   0.        ,
          0.        ,   0.        ,   1.        ],
       [ 10.        ,  98.        ,   0.        ,   0.        ,
          1.        ,   0.        ,   1.        ],
       [ 24.        ,  98.        ,   1.        ,   0.        ,
          1.    

In [54]:
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No
...,...,...,...,...,...,...
95,12,Female,104.0,Mild,Bangalore,No
96,51,Female,101.0,Strong,Kolkata,Yes
97,20,Female,101.0,Mild,Bangalore,No
98,5,Female,98.0,Strong,Mumbai,No


# Mentos Zindagi

In [46]:
from sklearn.compose import ColumnTransformer

In [60]:
transformer = ColumnTransformer(transformers = [
    ('trfr1', SimpleImputer(), ['fever']),
    ('trfr2', OrdinalEncoder(categories = [['Mild', 'Strong']]), ['cough']),
    ('trfr3', OneHotEncoder(sparse_output = False, drop = 'first'), ['gender', 'city'])
], remainder = 'passthrough')

In [61]:
transformer

In [63]:
transformer.fit_transform(X_train).shape

(80, 7)

In [64]:
transformer.transform(X_test).shape

(20, 7)

In [66]:
df1 = pd.read_csv('train.csv')

In [69]:
df1

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [68]:
from sklearn.model_selection import train_test_split
X_train1, X_test2, Y_train1, Y_test2 = train_test_split(df1.drop(['']))