### Import dependencies

In [79]:
from sklearnex import patch_sklearn
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [80]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

### Data Loading

In [81]:
data_df = pd.read_csv('/content/data.csv')

In [82]:
data_df.head()

Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,None_Sympton,Pains,Nasal-Congestion,Runny-Nose,None_Experiencing,Age_0-9,Age_10-19,Age_20-24,Age_25-59,Age_60+,Gender_Female,Gender_Male,Severity_Mild,Severity_Moderate,Severity_None
0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0
1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0
2,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,1,0,0
3,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0
4,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0,1,0


In [83]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 316800 entries, 0 to 316799
Data columns (total 19 columns):
 #   Column                   Non-Null Count   Dtype
---  ------                   --------------   -----
 0   Tiredness                316800 non-null  int64
 1   Dry-Cough                316800 non-null  int64
 2   Difficulty-in-Breathing  316800 non-null  int64
 3   Sore-Throat              316800 non-null  int64
 4   None_Sympton             316800 non-null  int64
 5   Pains                    316800 non-null  int64
 6   Nasal-Congestion         316800 non-null  int64
 7   Runny-Nose               316800 non-null  int64
 8   None_Experiencing        316800 non-null  int64
 9   Age_0-9                  316800 non-null  int64
 10  Age_10-19                316800 non-null  int64
 11  Age_20-24                316800 non-null  int64
 12  Age_25-59                316800 non-null  int64
 13  Age_60+                  316800 non-null  int64
 14  Gender_Female            316800 non-

In [84]:
data_df.describe()

Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,None_Sympton,Pains,Nasal-Congestion,Runny-Nose,None_Experiencing,Age_0-9,Age_10-19,Age_20-24,Age_25-59,Age_60+,Gender_Female,Gender_Male,Severity_Mild,Severity_Moderate,Severity_None
count,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0,316800.0
mean,0.5,0.5625,0.5,0.3125,0.0625,0.363636,0.545455,0.545455,0.090909,0.2,0.2,0.2,0.2,0.2,0.333333,0.333333,0.25,0.25,0.25
std,0.500001,0.496079,0.500001,0.463513,0.242062,0.481046,0.49793,0.49793,0.28748,0.400001,0.400001,0.400001,0.400001,0.400001,0.471405,0.471405,0.433013,0.433013,0.433013
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.5,1.0,0.5,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.25,0.25,0.25
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Data Transformation

In [85]:
# rename Severity_None to target
data_df.rename(columns={'Severity_None': 'target'}, inplace=True)
# drop Severity_Mild and Severity_Moderate
data_df.drop(columns=['Severity_Mild', 'Severity_Moderate'], inplace=True)
# data_df
data_df.head()

Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,None_Sympton,Pains,Nasal-Congestion,Runny-Nose,None_Experiencing,Age_0-9,Age_10-19,Age_20-24,Age_25-59,Age_60+,Gender_Female,Gender_Male,target
0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0
1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0
2,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0
3,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0
4,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,1,0


In [86]:
# rename Gender_Female to gender
data_df.rename(columns={'Gender_Female': 'gender'}, inplace=True)
# drop Gender_Male
data_df.drop(columns=['Gender_Male'], inplace=True)
# data_df
data_df.head()

Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,None_Sympton,Pains,Nasal-Congestion,Runny-Nose,None_Experiencing,Age_0-9,Age_10-19,Age_20-24,Age_25-59,Age_60+,gender,target
0,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0
1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0
2,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0
3,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0
4,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0,0


In [87]:
data_df.drop(columns=['None_Sympton', 'None_Experiencing'], inplace=True)
data_df.head()

Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,Pains,Nasal-Congestion,Runny-Nose,Age_0-9,Age_10-19,Age_20-24,Age_25-59,Age_60+,gender,target
0,1,1,1,1,1,1,1,1,0,0,0,0,0,0
1,1,1,1,1,1,1,1,1,0,0,0,0,0,0
2,1,1,1,1,1,1,1,1,0,0,0,0,0,0
3,1,1,1,1,1,1,1,1,0,0,0,0,0,0
4,1,1,1,1,1,1,1,1,0,0,0,0,0,0


In [88]:
data_df['age'] = data_df['Age_0-9'].apply(lambda x: 9 if x == 1 else 0)
data_df['age'] += data_df['Age_10-19'].apply(lambda x: 19 if x == 1 else 0)
data_df['age'] += data_df['Age_20-24'].apply(lambda x: 24 if x == 1 else 0)
data_df['age'] += data_df['Age_25-59'].apply(lambda x: 59 if x == 1 else 0)
data_df['age'] += data_df['Age_60+'].apply(lambda x: 60 if x == 1 else 0)
data_df.drop(columns=['Age_0-9', 'Age_10-19', 'Age_20-24',
             'Age_25-59', 'Age_60+'], inplace=True)
data_df.head()

Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,Pains,Nasal-Congestion,Runny-Nose,gender,target,age
0,1,1,1,1,1,1,1,0,0,9
1,1,1,1,1,1,1,1,0,0,9
2,1,1,1,1,1,1,1,0,0,9
3,1,1,1,1,1,1,1,0,0,9
4,1,1,1,1,1,1,1,0,0,9


In [89]:
# Take only random 300 male and 300 female
data_df = data_df.groupby('gender').apply(lambda x: x.sample(300)).reset_index(drop=True)
data_df

  data_df = data_df.groupby('gender').apply(lambda x: x.sample(300)).reset_index(drop=True)


Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,Pains,Nasal-Congestion,Runny-Nose,gender,target,age
0,0,1,1,0,0,0,1,0,1,24
1,0,1,1,0,1,1,0,0,0,19
2,0,1,0,0,0,1,1,0,0,9
3,0,0,1,1,0,1,1,0,1,9
4,1,1,0,0,0,1,0,0,1,59
...,...,...,...,...,...,...,...,...,...,...
595,1,1,0,0,0,1,1,1,0,9
596,0,0,0,0,0,0,1,1,0,19
597,0,0,0,0,1,0,0,1,1,9
598,1,1,1,0,0,0,0,1,0,9


### Data Modelling

In [90]:
x = data_df.drop(columns=['target'])
y = data_df['target']

In [91]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [92]:
xgb_classifier= XGBClassifier()
xgb_classifier.fit(x_train, y_train)
y_pred = xgb_classifier.predict(x_test)

### Model Evaluation

In [93]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.6166666666666667


In [94]:
# classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.78      0.74        85
           1       0.30      0.23      0.26        35

    accuracy                           0.62       120
   macro avg       0.50      0.50      0.50       120
weighted avg       0.59      0.62      0.60       120



In [95]:
data_df.columns

Index(['Tiredness', 'Dry-Cough', 'Difficulty-in-Breathing', 'Sore-Throat',
       'Pains', 'Nasal-Congestion', 'Runny-Nose', 'gender', 'target', 'age'],
      dtype='object')

In [96]:
# make new csv file with the new data
data_df.to_csv('new_data.csv', index = False)

In [97]:

data= pd.read_csv('new_data.csv')
data.head()

Unnamed: 0,Tiredness,Dry-Cough,Difficulty-in-Breathing,Sore-Throat,Pains,Nasal-Congestion,Runny-Nose,gender,target,age
0,0,1,1,0,0,0,1,0,1,24
1,0,1,1,0,1,1,0,0,0,19
2,0,1,0,0,0,1,1,0,0,9
3,0,0,1,1,0,1,1,0,1,9
4,1,1,0,0,0,1,0,0,1,59


In [98]:

# showing number of unique values in each column with values what it is
for column in data.columns:
    print(column, data[column].unique())

Tiredness [0 1]
Dry-Cough [1 0]
Difficulty-in-Breathing [1 0]
Sore-Throat [0 1]
Pains [0 1]
Nasal-Congestion [0 1]
Runny-Nose [1 0]
gender [0 1]
target [1 0]
age [24 19  9 59 60]


In [101]:
x = data.drop(columns=['target'])
y = data['target']

In [103]:
model = XGBClassifier()
pred = model.fit(x, y)

In [104]:
def predict_new_data(data_of_new_patient):
    data_of_new_patient = np.array(data_of_new_patient).reshape(1, -1)
    prediction = pred.predict(data_of_new_patient)
    return prediction[0]

In [105]:

# Example dummy data
data_of_new_patient = [0, 1, 1, 0, 1, 1, 1, 0, 21]

# Call the predict_new_data function
prediction = predict_new_data(data_of_new_patient)

# Print the prediction
print("Prediction:", prediction)

Prediction: 0
