<a href="https://colab.research.google.com/github/brainmentorspvtltd/ML_OnlineFDP/blob/master/ML_COVID_19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [0]:
dataset = pd.read_csv('https://raw.githubusercontent.com/brainmentorspvtltd/ML_OnlineFDP/master/covid_19_dataset.csv')

In [3]:
dataset.head()

Unnamed: 0,Country,Age,Gender,fever,Bodypain,Runny_nose,Difficulty_in_breathing,Nasal_congestion,Sore_throat,Severity,Contact_with_covid_patient,Infected
0,China,10,Male,102,1,0,0,0,1,Mild,No,0
1,Italy,20,Male,103,1,1,0,0,0,Moderate,Not known,1
2,Iran,55,Transgender,99,0,0,0,1,1,Severe,No,0
3,Republic of Korean,37,Female,100,0,1,1,0,0,Mild,Yes,1
4,France,45,Male,101,1,1,1,1,0,Moderate,Yes,1


In [4]:
pd.unique(dataset['Country'])

array(['China', 'Italy', 'Iran', 'Republic of Korean', 'France', 'Spain',
       'Germany', 'UAE', 'Russia', 'Canada', 'Netherlands', 'Mexico',
       'Pakistan', 'Chile', 'Sweden', 'Peru', 'Ireland', 'Singapore',
       'Turkey', 'India', 'Australia', 'Malaysia', 'Argentina', 'Kuwait',
       'Morocco', 'Afghanistan', 'Finland', 'Norway', 'Bangladesh',
       'South Korea', 'Indonesia', 'Denmark', 'Romania', 'Ukraine',
       'Serbia', 'USA', 'Egypt', 'Thailand', 'Iraq', 'New Zealand',
       'Japan', 'South Africa', 'Slovakia', 'Somalia', 'Sri Lanka',
       'Oman', 'Belgium', 'Kazakhstan', 'Bahrain', 'Nigeria', 'Hungary',
       'Philippines', 'Armenia', 'Bulgaria', 'Cuba', 'Sudan', 'Estoria',
       'CostaRica', 'Gabon', 'Maldives', 'Albania', 'Cyprus', 'Kenya',
       'Georgia', 'Jordan', 'SanMario', 'Malta', 'Taiwan', 'Mauritius',
       'Congo', 'Vietnam', 'Zambia', 'Haiti', 'Liberia', 'Nepal', 'Togo',
       'Myanmar', 'Cambodia', 'Uganda', 'Bermuda', 'Monaco', 'Barbados',
    

In [5]:
len(pd.unique(dataset['Country']))

97

In [6]:
pd.unique(dataset['Gender'])

array(['Male', 'Transgender', 'Female'], dtype=object)

In [7]:
pd.unique(dataset['Severity'])

array(['Mild', 'Moderate', 'Severe'], dtype=object)

In [8]:
pd.unique(dataset['Contact_with_covid_patient'])

array(['No', 'Not known', 'Yes', 'yes'], dtype=object)

In [0]:
dataset['Contact_with_covid_patient'] = dataset['Contact_with_covid_patient'].str.lower()

In [10]:
pd.unique(dataset['Contact_with_covid_patient'])

array(['no', 'not known', 'yes'], dtype=object)

In [11]:
dataset.groupby('Country')['fever'].mean()

Country
Afghanistan    101.000000
Albania        100.642857
Argentina      101.000000
Armenia         99.750000
Australia      102.583333
                  ...    
Ukraine         99.833333
Vietnam        100.304348
Yemen           99.750000
Zambia         101.500000
Zimbabwe       100.555556
Name: fever, Length: 97, dtype: float64

In [0]:
infected = dataset[dataset['Infected'] == 1]

In [13]:
infected.groupby('Country')['fever'].mean()

Country
Afghanistan    100.888889
Albania        100.555556
Argentina      101.666667
Armenia        100.235294
Australia      102.666667
                  ...    
Ukraine        100.727273
Vietnam        101.093750
Yemen          100.000000
Zambia         101.571429
Zimbabwe       102.181818
Name: fever, Length: 94, dtype: float64

In [0]:
gender_label = LabelEncoder()

In [23]:
gender_label.fit_transform(dataset['Gender'])

array([1, 1, 2, ..., 0, 0, 0])

In [0]:
gender = gender_label.fit_transform(dataset['Gender'])

In [0]:
severity_label = LabelEncoder()
severity = severity_label.fit_transform(dataset['Severity'])

In [0]:
contact_label = LabelEncoder()
contact = contact_label.fit_transform(dataset['Contact_with_covid_patient'])

In [27]:
severity

array([0, 1, 2, ..., 0, 0, 2])

In [28]:
contact

array([0, 1, 0, ..., 2, 1, 2])

In [0]:
gender_onehot = OneHotEncoder()
gender = gender_onehot.fit_transform(gender.reshape(len(gender),1))

In [0]:
gender = gender.toarray()

In [0]:
severity_onehot = OneHotEncoder()
severity = severity_onehot.fit_transform(severity.reshape(len(severity), 1)).toarray()

In [0]:
contact_onehot = OneHotEncoder()
contact = contact_onehot.fit_transform(contact.reshape(len(contact),1)).toarray()

In [36]:
contact

array([[1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [0]:
updated_dataset = dataset.drop('Gender', axis=1)

In [0]:
updated_dataset = updated_dataset.drop('Severity', axis=1)
updated_dataset = updated_dataset.drop('Contact_with_covid_patient', axis=1)

In [39]:
updated_dataset.head()

Unnamed: 0,Country,Age,fever,Bodypain,Runny_nose,Difficulty_in_breathing,Nasal_congestion,Sore_throat,Infected
0,China,10,102,1,0,0,0,1,0
1,Italy,20,103,1,1,0,0,0,1
2,Iran,55,99,0,0,0,1,1,0
3,Republic of Korean,37,100,0,1,1,0,0,1
4,France,45,101,1,1,1,1,0,1


In [0]:
X = updated_dataset.iloc[:, 1:-1].values
y = updated_dataset['Infected'].values

In [41]:
X[0]

array([ 10, 102,   1,   0,   0,   0,   1])

In [42]:
y[0]

0

In [44]:
contact.shape

(2499, 3)

In [45]:
severity.shape

(2499, 3)

In [46]:
gender.shape

(2499, 3)

In [47]:
X.shape

(2499, 7)

In [0]:
X = np.c_[X,gender,severity,contact]

In [51]:
X.shape

(2499, 16)

In [52]:
X[0]

array([ 10., 102.,   1.,   0.,   0.,   0.,   1.,   0.,   1.,   0.,   1.,
         0.,   0.,   1.,   0.,   0.])

In [0]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [54]:
X[0]

array([-1.93214208,  0.82660093,  0.73015672, -0.64611936, -0.9688716 ,
       -0.94205214,  0.98058068, -0.9673204 ,  0.99401551, -0.11744404,
        0.75545367, -0.51571062, -0.42544317,  1.03793388, -0.58243308,
       -0.60092521])

In [0]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size = 0.25)

In [56]:
x_train.shape

(1874, 16)

In [57]:
x_test.shape

(625, 16)

In [58]:
y_train.shape

(1874,)

In [59]:
logistic = LogisticRegression()
logistic.fit(x_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [0]:
y_pred = logistic.predict(x_test)

In [61]:
y_pred[0]

1

In [0]:
from sklearn.metrics import confusion_matrix

In [63]:
confusion_matrix(y_test,y_pred)

array([[296,  21],
       [ 16, 292]])

In [64]:
accuracy_score(y_test, y_pred)

0.9408

In [0]:
user_age = 23
user_gender = 'Male'
user_fever = 98
user_bodypain = 1
user_runny_nose = 1
user_breath = 0
user_nasal = 0
user_throat = 0
user_severity = 'Moderate'
user_contact = 'yes'

In [0]:
gen = gender_onehot.transform([gender_label.transform([user_gender])])
sev = severity_onehot.transform([severity_label.transform([user_severity])])
con = contact_onehot.transform([contact_label.transform([user_contact])])

In [0]:
gen = gen.toarray()
sev = sev.toarray()
con = con.toarray()

In [0]:
test_data = np.array([[user_age, user_fever, user_bodypain,user_runny_nose,user_breath, user_nasal, user_throat]])

In [0]:
test_data = np.c_[test_data, gen, sev, con]

In [84]:
test_data

array([[23., 98.,  1.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,
         0.,  0.,  1.]])

In [0]:
test_data = scaler.transform(test_data)

In [86]:
test_data

array([[-1.18399548, -1.51145874,  0.73015672,  1.54770166, -0.9688716 ,
        -0.94205214, -1.0198039 , -0.9673204 ,  0.99401551, -0.11744404,
        -1.32370791,  1.93907194, -0.42544317, -0.96345251, -0.58243308,
         1.66410059]])

In [87]:
logistic.predict(test_data)

array([1])

In [78]:
logistic.predict_proba(test_data)

array([[9.99588363e-01, 4.11637355e-04]])