# Importing Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Importing Dataset

In [2]:
dataset1=pd.read_csv("Cleaned_Data.csv")
dataset1.head()

Unnamed: 0,Fever,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,None_Sympton,Pains,Nasal-Congestion,Runny-Nose,Diarrhea,...,Gender_Female,Gender_Male,Gender_Transgender,Severity_Mild,Severity_Moderate,Severity_None,Severity_Severe,Contact_Dont-Know,Contact_No,Contact_Yes
0,1,1,1,1,1,0,1,1,1,1,...,0,1,0,1,0,0,0,0,0,1
1,1,1,1,1,1,0,1,1,1,1,...,0,1,0,1,0,0,0,0,1,0
2,1,1,1,1,1,0,1,1,1,1,...,0,1,0,1,0,0,0,1,0,0
3,1,1,1,1,1,0,1,1,1,1,...,0,1,0,0,1,0,0,0,0,1
4,1,1,1,1,1,0,1,1,1,1,...,0,1,0,0,1,0,0,0,1,0


In [4]:
dataset1.shape

(316800, 26)

In [5]:
dataset1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 316800 entries, 0 to 316799
Data columns (total 26 columns):
 #   Column                   Non-Null Count   Dtype
---  ------                   --------------   -----
 0   Fever                    316800 non-null  int64
 1   Tiredness                316800 non-null  int64
 2   Dry-Cough                316800 non-null  int64
 3   Difficulty-in-Breathing  316800 non-null  int64
 4   Sore-Throat              316800 non-null  int64
 5   None_Sympton             316800 non-null  int64
 6   Pains                    316800 non-null  int64
 7   Nasal-Congestion         316800 non-null  int64
 8   Runny-Nose               316800 non-null  int64
 9   Diarrhea                 316800 non-null  int64
 10  None_Experiencing        316800 non-null  int64
 11  Age_0-9                  316800 non-null  int64
 12  Age_10-19                316800 non-null  int64
 13  Age_20-24                316800 non-null  int64
 14  Age_25-59                316800 non-

In [6]:
dataset1.isnull().sum()

Fever                      0
Tiredness                  0
Dry-Cough                  0
Difficulty-in-Breathing    0
Sore-Throat                0
None_Sympton               0
Pains                      0
Nasal-Congestion           0
Runny-Nose                 0
Diarrhea                   0
None_Experiencing          0
Age_0-9                    0
Age_10-19                  0
Age_20-24                  0
Age_25-59                  0
Age_60+                    0
Gender_Female              0
Gender_Male                0
Gender_Transgender         0
Severity_Mild              0
Severity_Moderate          0
Severity_None              0
Severity_Severe            0
Contact_Dont-Know          0
Contact_No                 0
Contact_Yes                0
dtype: int64

In [3]:
smote=SMOTE()
balanced_msx,balanced_msy=smote.fit_resample(dataset1.iloc[:,0:10],dataset1.iloc[:,19])
print(balanced_msx.shape)
print(balanced_msy.shape)

(475200, 10)
(475200,)


In [7]:
severity_mild=dataset1.iloc[:,19]
severity_moderate=dataset1.iloc[:,20]
severity_none=dataset1.iloc[:,21]
severity_severe=dataset1.iloc[:,22]

In [13]:
mild_x,mild_y=smote.fit_resample(dataset1.iloc[:,0:10],dataset1.iloc[:,19])


In [14]:
moderate_x,moderate_y=smote.fit_resample(dataset1.iloc[:,0:10],dataset1.iloc[:,20])
severe_x,severe_y=smote.fit_resample(dataset1.iloc[:,0:19],dataset1.iloc[:,22])

In [20]:
severe= mild_y*moderate_y*severe_y
print(severe)

0         0
1         0
2         0
3         0
4         0
         ..
475195    1
475196    1
475197    1
475198    1
475199    1
Length: 475200, dtype: int64


In [21]:
print(severe.value_counts())

0    316800
1    158400
dtype: int64


In [58]:
final_x,final_y=smote.fit_resample(real_x,severe)

In [5]:
print(severity_mild.value_counts())
print(severity_none.value_counts())
print(severity_moderate.value_counts())
print(severity_severe.value_counts())

0    237600
1     79200
Name: Severity_Mild, dtype: int64
0    237600
1     79200
Name: Severity_None, dtype: int64
0    237600
1     79200
Name: Severity_Moderate, dtype: int64
0    237600
1     79200
Name: Severity_Severe, dtype: int64


In [63]:
print(final_x.shape)
print(final_y.shape)

(475200, 19)
(475200,)


# For Mild Severity

In [9]:
real_x=dataset1.iloc[:,0:10]
print(real_x.shape)

(316800, 10)


In [22]:
train_x,test_x,train_y,test_y=train_test_split(mild_x,severe,test_size=0.3,random_state=55)
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

(332640, 10)
(332640,)
(142560, 10)
(142560,)


In [23]:
rf=RandomForestClassifier()
rf.fit(train_x,train_y)
pred=rf.predict(test_x)
print("The confussion metrix for Random forest is:\n",metrics.confusion_matrix(test_y,pred))
print("The accuracy of Random Forest Classifier is:\t\t",metrics.accuracy_score(test_y,pred))
print("The recal score of Random Forest Classifier is:\t\t",metrics.recall_score(test_y,pred))
print("The precision of Random Forest Classifier is:\t\t",metrics.precision_score(test_y,pred))
print("The f1 score of Random Forest Classifier is:\t\t",metrics.f1_score(test_y,pred))


The confussion metrix for Random forest is:
 [[94918     0]
 [47642     0]]
The accuracy of Random Forest Classifier is:		 0.66581088664422
The recal score of Random Forest Classifier is:		 0.0
The precision of Random Forest Classifier is:		 0.0
The f1 score of Random Forest Classifier is:		 0.0


  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
lr=LogisticRegression(max_iter=100000)
lr.fit(train_x,train_y)
pred=lr.predict(test_x)
print("The confussion metrix for Random forest is:\n",metrics.confusion_matrix(test_y,pred))
print("The accuracy of Random Forest Classifier is:\t\t",metrics.accuracy_score(test_y,pred))
print("The recal score of Random Forest Classifier is:\t\t",metrics.recall_score(test_y,pred))
print("The precision of Random Forest Classifier is:\t\t",metrics.precision_score(test_y,pred))
print("The f1 score of Random Forest Classifier is:\t\t",metrics.f1_score(test_y,pred))

The confussion metrix for Random forest is:
 [[45401 25743]
 [46046 25370]]
The accuracy of Random Forest Classifier is:		 0.49642957351290684
The recal score of Random Forest Classifier is:		 0.35524252268399237
The precision of Random Forest Classifier is:		 0.4963512218026725
The f1 score of Random Forest Classifier is:		 0.41410604836406073


In [7]:
dsc=DecisionTreeClassifier()
dsc.fit(train_x,train_y)
pred=dsc.predict(test_x)
print("The confussion metrix for Random forest is:\n",metrics.confusion_matrix(test_y,pred))
print("The accuracy of Random Forest Classifier is:\t\t",metrics.accuracy_score(test_y,pred))
print("The recal score of Random Forest Classifier is:\t\t",metrics.recall_score(test_y,pred))
print("The precision of Random Forest Classifier is:\t\t",metrics.precision_score(test_y,pred))
print("The f1 score of Random Forest Classifier is:\t\t",metrics.f1_score(test_y,pred))

The confussion metrix for Random forest is:
 [[36654 34490]
 [37526 33890]]
The accuracy of Random Forest Classifier is:		 0.49483726150392815
The recal score of Random Forest Classifier is:		 0.47454351965946007
The precision of Random Forest Classifier is:		 0.49561275226674467
The f1 score of Random Forest Classifier is:		 0.4848493519127873
