# Covid 19 Severity Model with Logistic Regression & Random Forest Classifier 

## Importing all the required libraries 

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [2]:
df = pd.read_csv("Covid Dataset.csv")
df.head()

FileNotFoundError: [Errno 2] File Covid Dataset.csv does not exist: 'Covid Dataset.csv'

In [None]:
df.isnull().sum() # No null values in the dataset

In [None]:
df.describe()

In [None]:
# Dropping cols masks and sanitization as the attribute domain has only value 
df.drop(["Wearing Masks","Sanitization from Market"], axis =1, inplace = True)

## Feature Transformation

In [None]:
# Categorical Features -> Numerical Data
l = LabelEncoder()
df['Breathing Problem'] = l.fit_transform(df['Breathing Problem'])
df['Fever'] = l.fit_transform(df['Fever'])
df['Dry Cough'] = l.fit_transform(df['Dry Cough'])
df['Sore throat'] = l.fit_transform(df['Sore throat'])
df['Running Nose'] = l.fit_transform(df['Running Nose'])
df['Asthma'] = l.fit_transform(df['Asthma'])
df['Chronic Lung Disease'] = l.fit_transform(df['Chronic Lung Disease'])
df['Headache'] = l.fit_transform(df['Headache'])
df['Heart Disease'] = l.fit_transform(df['Heart Disease'])
df['Diabetes'] = l.fit_transform(df['Diabetes'])
df['Hyper Tension']=l.fit_transform(df['Hyper Tension'])
df['Fatigue '] = l.fit_transform(df['Fatigue '])
df['Gastrointestinal '] = l.fit_transform(df['Gastrointestinal '])
df['Abroad travel'] = l.fit_transform(df['Abroad travel'])
df['Contact with COVID Patient'] = l.fit_transform(df['Contact with COVID Patient'])
df['Attended Large Gathering'] = l.fit_transform(df['Attended Large Gathering'])
df['Visited Public Exposed Places'] = l.fit_transform(df['Visited Public Exposed Places'])
df['Family working in Public Exposed Places'] = l.fit_transform(df['Family working in Public Exposed Places'])
df['COVID-19'] = l.fit_transform(df['COVID-19'])

In [None]:
# New Table :
df.head()

## Plotting the values using a heat map

In [None]:
plt.figure(figsize=(25,25))
sns.heatmap(df.corr(), annot = True)
plt.show()

## The ML Modelling:

### Defining x and y from data set:

In [None]:
x = df.drop('COVID-19',axis=1) 
y = df['COVID-19']
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2 , random_state=0)

### Logistic Regression :

In [None]:
# Fitting the model and training it:
LRmodel = LogisticRegression()
LRmodel.fit(x_train, y_train)

In [None]:
# Making Predicitons 
y_pred = LRmodel.predict(x_test)
print("Accuracy Score using LR model is: ", accuracy_score(y_test,y_pred), "\n\n")
print("Confusion matrix using LR model is :\n\n ", confusion_matrix(y_test,y_pred),"\n\n")
print("Classification report using LR model is :\n\n", classification_report(y_test,y_pred))

In [None]:
# Score/ Accuracy :
LR_accuracy= LRmodel.score(x_test, y_test)*100
LR_accuracy

### Random Forest Classifier

In [None]:
# Fitting & Training Model 
RFmodel = RandomForestClassifier(random_state=0)
RFmodel.fit(x_train,y_train)

In [None]:
# Making Predicitons 
y_pred = RFmodel.predict(x_test)
print("Accuracy Score using RF model is: ", accuracy_score(y_test,y_pred), "\n\n")
print("Confusion matrix using RF model is :\n\n ", confusion_matrix(y_test,y_pred),"\n\n")
print("Classification report using RF model is :\n\n", classification_report(y_test,y_pred))

In [None]:
# Score / Accuracy 
RF_accuracy = RFmodel.score(x_test,y_test)*100 
RF_accuracy