# Importing Libraries

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA

In [3]:
dataset1=pd.read_csv("Cleaned_Data.csv")
dataset1.head()

Unnamed: 0,Fever,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,None_Sympton,Pains,Nasal-Congestion,Runny-Nose,Diarrhea,...,Gender_Female,Gender_Male,Gender_Transgender,Severity_Mild,Severity_Moderate,Severity_None,Severity_Severe,Contact_Dont-Know,Contact_No,Contact_Yes
0,1,1,1,1,1,0,1,1,1,1,...,0,1,0,1,0,0,0,0,0,1
1,1,1,1,1,1,0,1,1,1,1,...,0,1,0,1,0,0,0,0,1,0
2,1,1,1,1,1,0,1,1,1,1,...,0,1,0,1,0,0,0,1,0,0
3,1,1,1,1,1,0,1,1,1,1,...,0,1,0,0,1,0,0,0,0,1
4,1,1,1,1,1,0,1,1,1,1,...,0,1,0,0,1,0,0,0,1,0


In [3]:
dataset1.shape

(316800, 26)

In [4]:
dataset1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 316800 entries, 0 to 316799
Data columns (total 26 columns):
 #   Column                   Non-Null Count   Dtype
---  ------                   --------------   -----
 0   Fever                    316800 non-null  int64
 1   Tiredness                316800 non-null  int64
 2   Dry-Cough                316800 non-null  int64
 3   Difficulty-in-Breathing  316800 non-null  int64
 4   Sore-Throat              316800 non-null  int64
 5   None_Sympton             316800 non-null  int64
 6   Pains                    316800 non-null  int64
 7   Nasal-Congestion         316800 non-null  int64
 8   Runny-Nose               316800 non-null  int64
 9   Diarrhea                 316800 non-null  int64
 10  None_Experiencing        316800 non-null  int64
 11  Age_0-9                  316800 non-null  int64
 12  Age_10-19                316800 non-null  int64
 13  Age_20-24                316800 non-null  int64
 14  Age_25-59                316800 non-

In [5]:
dataset1.isnull().sum()

Fever                      0
Tiredness                  0
Dry-Cough                  0
Difficulty-in-Breathing    0
Sore-Throat                0
None_Sympton               0
Pains                      0
Nasal-Congestion           0
Runny-Nose                 0
Diarrhea                   0
None_Experiencing          0
Age_0-9                    0
Age_10-19                  0
Age_20-24                  0
Age_25-59                  0
Age_60+                    0
Gender_Female              0
Gender_Male                0
Gender_Transgender         0
Severity_Mild              0
Severity_Moderate          0
Severity_None              0
Severity_Severe            0
Contact_Dont-Know          0
Contact_No                 0
Contact_Yes                0
dtype: int64

# For Mild Severity

In [6]:
smote=SMOTE()
balanced_msx,balanced_msy=smote.fit_resample(dataset1.iloc[:,0:19],dataset1.iloc[:,19])
print(balanced_msx.shape)
print(balanced_msy.shape)

(475200, 19)
(475200,)


In [7]:
scaler=StandardScaler()
scale_x=scaler.fit_transform(balanced_msx)
scale_x

array([[ 1.48206373,  0.99965494,  0.88216977, ..., -0.70702978,
         1.41481642, -0.70748514],
       [ 1.48206373,  0.99965494,  0.88216977, ..., -0.70702978,
         1.41481642, -0.70748514],
       [ 1.48206373,  0.99965494,  0.88216977, ..., -0.70702978,
         1.41481642, -0.70748514],
       ...,
       [-0.67473482, -1.00034518, -1.13356866, ..., -0.70702978,
         1.41481642, -0.70748514],
       [ 1.48206373,  0.99965494,  0.88216977, ..., -0.70702978,
         1.41481642, -0.70748514],
       [-0.67473482, -1.00034518,  0.88216977, ...,  1.41436759,
        -0.70680548, -0.70748514]])

In [8]:
print("Values count in Balanced dependent dataset for mild severity",balanced_msy.value_counts())

Values count in Balanced dependent dataset for mild severity 0    237600
1    237600
Name: Severity_Mild, dtype: int64


In [9]:
pca=PCA(0.95)
x_red=pca.fit_transform(scale_x)
x_red.shape

(475200, 15)

## Splitting the Dataset for Training 

In [10]:
train_x,test_x,train_y,test_y=train_test_split(scale_x,balanced_msy,test_size=0.30,random_state=123)
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(332640, 19)
(332640,)
(142560, 19)
(142560,)


## Training Models

In [11]:
lr=LogisticRegression()
lr.fit(train_x,train_y)
pred_lr=lr.predict(test_x)
print("The confussion metrix for Logistic Regression is:\n",metrics.confusion_matrix(test_y,pred_lr))
print("The accuracy of Logistic Regression is:\t\t",metrics.accuracy_score(test_y,pred_lr))
print("The recal score of Logistic Regression is is:\t\t",metrics.recall_score(test_y,pred_lr))
print("The precision of Logistic Regression is:\t\t",metrics.precision_score(test_y,pred_lr))
print("The f1 score of Logistic Regression is:\t\t",metrics.f1_score(test_y,pred_lr))

The confussion metrix for Logistic Regression is:
 [[28853 42547]
 [28935 42225]]
The accuracy of Logistic Regression is:		 0.4985830527497194
The recal score of Logistic Regression is is:		 0.593381112984823
The precision of Logistic Regression is:		 0.4981007879960364
The f1 score of Logistic Regression is:		 0.5415822281507324


In [12]:
rf=RandomForestClassifier()
rf.fit(train_x,train_y)
pred=rf.predict(test_x)
print("The confussion metrix for Random forest is:\n",metrics.confusion_matrix(test_y,pred))
print("The accuracy of Random Forest Classifier is:\t\t",metrics.accuracy_score(test_y,pred))
print("The recal score of Random Forest Classifier is:\t\t",metrics.recall_score(test_y,pred))
print("The precision of Random Forest Classifier is:\t\t",metrics.precision_score(test_y,pred))
print("The f1 score of Random Forest Classifier is:\t\t",metrics.f1_score(test_y,pred))

The confussion metrix for Random forest is:
 [[33348 38052]
 [36256 34904]]
The accuracy of Random Forest Classifier is:		 0.47875982042648707
The recal score of Random Forest Classifier is:		 0.49050028105677346
The precision of Random Forest Classifier is:		 0.4784253522671199
The f1 score of Random Forest Classifier is:		 0.4843875766743457


In [13]:
ada=AdaBoostClassifier()
ada.fit(train_x,train_y)
pred_ada=ada.predict(test_x)
print("The confussion metrix for AdaBoost is:\n",metrics.confusion_matrix(test_y,pred_ada))
print("The accuracy of AdaBoost Classifier is:\t\t",metrics.accuracy_score(test_y,pred_ada))
print("The recal score of AdaBoost Classifier is is:\t\t",metrics.recall_score(test_y,pred_ada))
print("The precision of AdaBoost Classifier is:\t\t",metrics.precision_score(test_y,pred_ada))
print("The f1 score of AdaBoost Classifier is:\t\t",metrics.f1_score(test_y,pred_ada))

The confussion metrix for AdaBoost is:
 [[28907 42493]
 [29014 42146]]
The accuracy of AdaBoost Classifier is:		 0.4984076879910213
The recal score of AdaBoost Classifier is is:		 0.5922709387296233
The precision of AdaBoost Classifier is:		 0.4979501175580997
The f1 score of AdaBoost Classifier is:		 0.5410304302338268


In [14]:
dsc=DecisionTreeClassifier()
dsc.fit(train_x,train_y)
pred_dsc=dsc.predict(test_x)
print("The confussion metrix for Decision Tree is:\n",metrics.confusion_matrix(test_y,pred_dsc))
print("The accuracy of Decision Tree Classifier is:\t\t",metrics.accuracy_score(test_y,pred_dsc))
print("The recal score of Decision Tree Classifier is is:\t\t",metrics.recall_score(test_y,pred_dsc))
print("The precision of Decision Tree Classifier is:\t\t",metrics.precision_score(test_y,pred_dsc))
print("The f1 score of Decision Tree Classifier is:\t\t",metrics.f1_score(test_y,pred_dsc))

The confussion metrix for Decision Tree is:
 [[35206 36194]
 [38088 33072]]
The accuracy of Decision Tree Classifier is:		 0.4789421997755331
The recal score of Decision Tree Classifier is is:		 0.4647554806070826
The precision of Decision Tree Classifier is:		 0.47746369069962175
The f1 score of Decision Tree Classifier is:		 0.4710238844658397


# For Moderate Severity

In [15]:
balanced_mosx,balanced_mosy=smote.fit_resample(dataset1.iloc[:,0:19],dataset1.iloc[:,20])
print(balanced_msx.shape)
print(balanced_msy.shape)

(475200, 19)
(475200,)


## Training Models

In [16]:
train_mox,test_mox,train_moy,test_moy=train_test_split(balanced_mosx,balanced_mosy,test_size=0.30,random_state=123)
print(train_mox.shape)
print(train_moy.shape)
print(test_mox.shape)
print(test_moy.shape)

(332640, 19)
(332640,)
(142560, 19)
(142560,)


### Using AdaBoost Classifier 

In [17]:
ada_mo=AdaBoostClassifier()
ada_mo.fit(train_mox,train_moy)
pred_ada_mo=ada_mo.predict(test_mox)
print("The confussion metrix for AdaBoost is:\n",metrics.confusion_matrix(test_y,pred_ada_mo))
print("The accuracy of AdaBoost Classifier is:\t\t",metrics.accuracy_score(test_y,pred_ada_mo))
print("The recal score of AdaBoost Classifier is is:\t\t",metrics.recall_score(test_y,pred_ada_mo))
print("The precision of AdaBoost Classifier is:\t\t",metrics.precision_score(test_y,pred_ada_mo))
print("The f1 score of AdaBoost Classifier is:\t\t",metrics.f1_score(test_y,pred_ada_mo))

The confussion metrix for AdaBoost is:
 [[29850 41550]
 [29916 41244]]
The accuracy of AdaBoost Classifier is:		 0.4986952861952862
The recal score of AdaBoost Classifier is is:		 0.5795952782462057
The precision of AdaBoost Classifier is:		 0.49815204000289875
The f1 score of AdaBoost Classifier is:		 0.5357964067188902


### Using Random Forest Classifier

In [18]:
rf_mo=RandomForestClassifier()
rf_mo.fit(train_mox,train_moy)
pred_rf_mo=rf_mo.predict(test_mox)
print("The confussion metrix for Decision Tree Classifier is:\n",metrics.confusion_matrix(test_y,pred_rf_mo))
print("The accuracy of Decision Tree Classifier is:\t\t",metrics.accuracy_score(test_y,pred_rf_mo))
print("The recal score of Decision Tree Classifier is is:\t\t",metrics.recall_score(test_y,pred_rf_mo))
print("The precision of Decision Tree Classifier is:\t\t",metrics.precision_score(test_y,pred_rf_mo))
print("The f1 score of Decision Tree Classifier is:\t\t",metrics.f1_score(test_y,pred_rf_mo))

The confussion metrix for Decision Tree Classifier is:
 [[34691 36709]
 [34812 36348]]
The accuracy of Decision Tree Classifier is:		 0.4983094837261504
The recal score of Decision Tree Classifier is is:		 0.5107925801011804
The precision of Decision Tree Classifier is:		 0.49752932641635983
The f1 score of Decision Tree Classifier is:		 0.5040737222380164


### Using Decision Tree Classifier

In [19]:
dsc_mo=DecisionTreeClassifier()
dsc_mo.fit(train_mox,train_moy)
pred_dsc_mo=dsc.predict(test_mox)
print("The confussion metrix for Decision Tree is:\n",metrics.confusion_matrix(test_y,pred_dsc_mo))
print("The accuracy of Decision Tree Classifier is:\t\t",metrics.accuracy_score(test_y,pred_dsc_mo))
print("The recal score of Decision Tree Classifier is is:\t\t",metrics.recall_score(test_y,pred_dsc_mo))
print("The precision of Decision Tree Classifier is:\t\t",metrics.precision_score(test_y,pred_dsc_mo))
print("The f1 score of Decision Tree Classifier is:\t\t",metrics.f1_score(test_y,pred_dsc_mo))

The confussion metrix for Decision Tree is:
 [[37869 33531]
 [38106 33054]]
The accuracy of Decision Tree Classifier is:		 0.49749579124579124
The recal score of Decision Tree Classifier is is:		 0.4645025295109612
The precision of Decision Tree Classifier is:		 0.4964181121874296
The f1 score of Decision Tree Classifier is:		 0.47993030600021774


### Using Logistic Regression 

In [20]:
lr_mo=LogisticRegression()
lr_mo.fit(train_mox,train_moy)
pred_lr_mo=lr_mo.predict(test_mox)
print("The confussion metrix for Logistic Regression is:\n",metrics.confusion_matrix(test_y,pred_lr_mo))
print("The accuracy of Logistic Regression is:\t\t",metrics.accuracy_score(test_y,pred_lr_mo))
print("The recal score of Logistic Regression is is:\t\t",metrics.recall_score(test_y,pred_lr_mo))
print("The precision of Logistic Regression is:\t\t",metrics.precision_score(test_y,pred_lr_mo))
print("The f1 score of Logistic Regression is:\t\t",metrics.f1_score(test_y,pred_lr_mo))

The confussion metrix for Logistic Regression is:
 [[29823 41577]
 [29879 41281]]
The accuracy of Logistic Regression is:		 0.49876543209876545
The recal score of Logistic Regression is is:		 0.580115233277122
The precision of Logistic Regression is:		 0.4982138115812595
The f1 score of Logistic Regression is:		 0.5360542274279629


# For Severe Severity

In [21]:
balanced_sesx,balanced_sesy=smote.fit_resample(dataset1.iloc[:,0:19],dataset1.iloc[:,22])
print(balanced_sesx.shape)
print(balanced_sesy.shape)

(475200, 19)
(475200,)


## Splitting the Dateset for Training 

In [22]:
train_sx,test_sx,train_sy,test_sy=train_test_split(balanced_sesx,balanced_sesy,test_size=0.30,random_state=123)
print(train_sx.shape)
print(train_sy.shape)
print(test_sx.shape)
print(test_sy.shape)

(332640, 19)
(332640,)
(142560, 19)
(142560,)


# Training Models

### Using AdaBoost Classifier

In [23]:
ada_se=AdaBoostClassifier()
ada_se.fit(train_sx,train_sy)
pred_ada_se=ada_se.predict(test_sx)
print("The confussion metrix for AdaBoost is:\n",metrics.confusion_matrix(test_y,pred_ada_se))
print("The accuracy of AdaBoost Classifier is:\t\t",metrics.accuracy_score(test_y,pred_ada_se))
print("The recal score of AdaBoost Classifier is is:\t\t",metrics.recall_score(test_y,pred_ada_se))
print("The precision of AdaBoost Classifier is:\t\t",metrics.precision_score(test_y,pred_ada_se))
print("The f1 score of AdaBoost Classifier is:\t\t",metrics.f1_score(test_y,pred_ada_se))

The confussion metrix for AdaBoost is:
 [[38534 32866]
 [38107 33053]]
The accuracy of AdaBoost Classifier is:		 0.5021534792368125
The recal score of AdaBoost Classifier is is:		 0.4644884766722878
The precision of AdaBoost Classifier is:		 0.5014184074394332
The f1 score of AdaBoost Classifier is:		 0.482247463141692


### Using Random Forest Classifier


In [24]:
rf_se=RandomForestClassifier()
rf_se.fit(train_sx,train_sy)
pred_rf_se=rf_mo.predict(test_sx)
print("The confussion metrix for Decision Tree Classifier is:\n",metrics.confusion_matrix(test_y,pred_rf_se))
print("The accuracy of Decision Tree Classifier is:\t\t",metrics.accuracy_score(test_y,pred_rf_se))
print("The recal score of Decision Tree Classifier is is:\t\t",metrics.recall_score(test_y,pred_rf_se))
print("The precision of Decision Tree Classifier is:\t\t",metrics.precision_score(test_y,pred_rf_se))
print("The f1 score of Decision Tree Classifier is:\t\t",metrics.f1_score(test_y,pred_rf_se))

The confussion metrix for Decision Tree Classifier is:
 [[34691 36709]
 [34689 36471]]
The accuracy of Decision Tree Classifier is:		 0.499172278338945
The recal score of Decision Tree Classifier is is:		 0.5125210792580102
The precision of Decision Tree Classifier is:		 0.4983738726427986
The f1 score of Decision Tree Classifier is:		 0.5053484827490647


### Using Logistic Regression 

In [25]:
lr_se=LogisticRegression()
lr_se.fit(train_sx,train_sy)
pred_lr_se=lr_se.predict(test_sx)
print("The confussion metrix for Logistic Regression is:\n",metrics.confusion_matrix(test_y,pred_lr_se))
print("The accuracy of Logistic Regression is:\t\t",metrics.accuracy_score(test_y,pred_lr_se))
print("The recal score of Logistic Regression is is:\t\t",metrics.recall_score(test_y,pred_lr_se))
print("The precision of Logistic Regression is:\t\t",metrics.precision_score(test_y,pred_lr_se))
print("The f1 score of Logistic Regression is:\t\t",metrics.f1_score(test_y,pred_lr_se))

The confussion metrix for Logistic Regression is:
 [[38584 32816]
 [38168 32992]]
The accuracy of Logistic Regression is:		 0.5020763187429854
The recal score of Logistic Regression is is:		 0.46363125351320966
The precision of Logistic Regression is:		 0.5013372234378799
The f1 score of Logistic Regression is:		 0.48174756147421294


### Using Decision Tree Classifier

In [26]:
dsc_se=DecisionTreeClassifier()
dsc_se.fit(train_sx,train_sy)
pred_dsc_se=dsc_se.predict(test_sx)
print("The confussion metrix for Decision Tree is:\n",metrics.confusion_matrix(test_y,pred_dsc_se))
print("The accuracy of Decision Tree Classifier is:\t\t",metrics.accuracy_score(test_y,pred_dsc_se))
print("The recal score of Decision Tree Classifier is is:\t\t",metrics.recall_score(test_y,pred_dsc_se))
print("The precision of Decision Tree Classifier is:\t\t",metrics.precision_score(test_y,pred_dsc_se))
print("The f1 score of Decision Tree Classifier is:\t\t",metrics.f1_score(test_y,pred_dsc_se))

The confussion metrix for Decision Tree is:
 [[37715 33685]
 [37646 33514]]
The accuracy of Decision Tree Classifier is:		 0.4996422558922559
The recal score of Decision Tree Classifier is is:		 0.47096683530073075
The precision of Decision Tree Classifier is:		 0.49872765963779225
The f1 score of Decision Tree Classifier is:		 0.484449873156065


In [4]:
severe=dataset1.iloc[:,19]|dataset1.iloc[:20]|dataset1.iloc[:,22]

In [None]:
print(severe.value_counts())