## Loading Required Libraries

In [6]:
pip install imbalanced-learn

Note: you may need to restart the kernel to use updated packages.


In [1]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-2.0.0-py3-none-win_amd64.whl (99.7 MB)
Installing collected packages: xgboost
Successfully installed xgboost-2.0.0


In [2]:
# Data Preprocessing

import numpy as np
import pandas as pd

# Data Analysis

import plotly.express as px
import missingno as msno

# Feature Selection

import scipy.stats as stats
from scipy.stats import chi2_contingency

# Data Modeling

from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler 
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Model Evaluation & saving the model

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, recall_score, accuracy_score, precision_score, f1_score
import pickle

In [3]:
data = pd.read_csv("corona_tested_006.csv")

  data = pd.read_csv("corona_tested_006.csv")


In [4]:
data.head()

Unnamed: 0,Ind_ID,Test_date,Cough_symptoms,Fever,Sore_throat,Shortness_of_breath,Headache,Corona,Age_60_above,Sex,Known_contact
0,1,11-03-2020,True,False,True,False,False,negative,,,Abroad
1,2,11-03-2020,False,True,False,False,False,positive,,,Abroad
2,3,11-03-2020,False,True,False,False,False,positive,,,Abroad
3,4,11-03-2020,True,False,False,False,False,negative,,,Abroad
4,5,11-03-2020,True,False,False,False,False,negative,,,Contact with confirmed


In [6]:
data.isnull().sum()

Ind_ID                 0
Test_date              0
Cough_symptoms         0
Fever                  0
Sore_throat            0
Shortness_of_breath    0
Headache               0
Corona                 0
Age_60_above           0
Sex                    0
Known_contact          0
dtype: int64

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 278848 entries, 0 to 278847
Data columns (total 11 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   Ind_ID               278848 non-null  int64 
 1   Test_date            278848 non-null  object
 2   Cough_symptoms       278848 non-null  object
 3   Fever                278848 non-null  object
 4   Sore_throat          278848 non-null  object
 5   Shortness_of_breath  278848 non-null  object
 6   Headache             278848 non-null  object
 7   Corona               278848 non-null  object
 8   Age_60_above         278848 non-null  object
 9   Sex                  278848 non-null  object
 10  Known_contact        278848 non-null  object
dtypes: int64(1), object(10)
memory usage: 23.4+ MB


In [18]:
# Checking the levels for categorical features

def show(data):
  for i in data.columns[2:]:
    print(f"Feature: {i} with {data[i].unique()} Levels")

show(data)

Feature: Cough_symptoms with ['TRUE' 'FALSE' 'None' False True] Levels
Feature: Fever with ['FALSE' 'TRUE' 'None' False True] Levels
Feature: Sore_throat with ['TRUE' 'FALSE' 'None' False True] Levels
Feature: Shortness_of_breath with ['FALSE' 'TRUE' 'None' False True] Levels
Feature: Headache with ['FALSE' 'TRUE' 'None' False True] Levels
Feature: Corona with ['negative' 'positive' 'other'] Levels
Feature: Age_60_above with ['None' 'No' 'Yes'] Levels
Feature: Sex with ['None' 'male' 'female'] Levels
Feature: Known_contact with ['Abroad' 'Contact with confirmed' 'Other'] Levels


In [21]:
def show1(data):
  for i in data.columns[1:]:
    print(f" {i}  {data[i].value_counts()} ")

show1(data)

 Test_date  20-04-2020    10921
19-04-2020    10199
22-04-2020     9646
21-04-2020     9624
16-04-2020     9138
23-04-2020     8744
01-04-2020     8654
13-04-2020     8425
02-04-2020     8188
03-04-2020     8079
17-04-2020     7645
05-04-2020     7509
30-04-2020     7313
27-04-2020     7304
15-04-2020     7149
31-03-2020     7134
24-04-2020     7028
26-03-2020     6663
14-04-2020     6571
28-04-2020     6334
18-04-2020     6321
26-04-2020     6131
12-04-2020     5984
27-03-2020     5963
07-04-2020     5931
30-03-2020     5915
10-04-2020     5678
28-03-2020     5602
25-03-2020     5495
06-04-2020     5368
29-03-2020     5277
04-04-2020     5145
25-04-2020     5052
24-03-2020     4735
09-04-2020     4539
11-04-2020     4341
29-04-2020     4259
08-04-2020     4058
22-03-2020     3565
23-03-2020     3494
19-03-2020     2243
18-03-2020     1991
20-03-2020     1870
21-03-2020     1648
17-03-2020     1463
16-03-2020     1304
15-03-2020      985
13-03-2020      686
12-03-2020      634
14-03-20