In [18]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier 
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import gradio as gr

import joblib

In [19]:

def cough_converter(x):
    try:
        return Int32(x)
    except:
        return None

def convertor(x):
    try:
       retval = np.int32(x)
    except :
       retval = None
    return retval

def genderconvertor(g):
    if g == 'female':
        return np.int32(0)
    elif g == 'male':
        return np.int32(1)
    else:
        return None
   

def corona_result_convertor(r):
    if r == 'negative':
        return np.int32(0)
    elif r == 'positive':
        return np.int32(1)
    else:
        return None 


def age_convertor(c):
    if c == 'Yes':
        return np.int32(1)
    elif c == 'No':
        return np.int32(0)
    else:
        return None

def contact_convertor(c):
    if c == 'Other':
        return np.int32(0)
    elif c == 'Abroad':
        return np.int32(1)
    elif c == 'Contact with confirmed':
        return np.int32(2)

parse_dates = ['test_date']
converters = {'cough': convertor,   
              'fever': convertor, 'sore_throat': convertor,
              'shortness_of_breath': convertor, 'head_ache': convertor,'age_60_and_above': age_convertor, 
              'gender': genderconvertor,
              'corona_result' : corona_result_convertor,  
              'test_indication' : contact_convertor}

orig_df = pd.read_csv("corona_tested_individuals_ver_006.english.csv", 
                 parse_dates=parse_dates, converters=converters, low_memory=False)


print(orig_df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 278848 entries, 0 to 278847
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   test_date            278848 non-null  datetime64[ns]
 1   cough                278596 non-null  float64       
 2   fever                278596 non-null  float64       
 3   sore_throat          278847 non-null  float64       
 4   shortness_of_breath  278847 non-null  float64       
 5   head_ache            278847 non-null  float64       
 6   corona_result        274956 non-null  float64       
 7   age_60_and_above     151528 non-null  float64       
 8   gender               259285 non-null  float64       
 9   test_indication      278848 non-null  int32         
dtypes: datetime64[ns](1), float64(8), int32(1)
memory usage: 20.2 MB
None


In [20]:
# Cleanup the data 

df = orig_df.copy()
df.dropna(inplace=True)


In [21]:
Input = df.drop(['test_date', 'corona_result'], axis=1)
output = df['corona_result']

X_train, X_test, y_train, y_test = train_test_split(Input, output, test_size=0.2)


model = DecisionTreeClassifier()
model.fit(X_train.values, y_train)





DecisionTreeClassifier()

In [22]:
predictions = model.predict(X_test)

score = accuracy_score(y_test, predictions) 
print("score = %f" % score) 
joblib.dump(model, "saved-model.joblib") 


score = 0.956748




['saved-model.joblib']

In [23]:
model.predict([[1.,1.,1.,1.,0.,1.,1.,0]])

array([1.])

In [24]:
for col in ['cough', 'fever', 'sore_throat', 'shortness_of_breath',
       'head_ache', 'corona_result', 'age_60_and_above', 'gender',
       'test_indication']:
    print(col, df[col].unique())

cough [1. 0.]
fever [0. 1.]
sore_throat [0. 1.]
shortness_of_breath [0. 1.]
head_ache [0. 1.]
corona_result [0. 1.]
age_60_and_above [1. 0.]
gender [1. 0.]
test_indication [0 1 2]


In [25]:
test_input = {'cough' : 1, 'fever' :0, 'sore_throat' :1, 'shortness_of_breath':1,
       'head_ache' :0,  'age_60_and_above' :1, 'gender' :1,
       'test_indication':1}


In [26]:
t = tree.export_graphviz(model, out_file='covid.dot',
                          feature_names=['cough', 'fever', 'sore_throat', 'shortness_of_breath', 'head_ache', 
                                         'age_60_and_above', 'gender', 'test_indication'], 
                          class_names=[str(x) for x in list(sorted(y_train.unique()))], label='all', rounded=True, filled=True)


In [27]:
reloaded = joblib.load('saved-model.joblib')

In [28]:
reloaded.predict([[1.,1.,1.,1.,0.,1.,1.,0]])

array([1.])

In [29]:
for col in Input.columns:
    print(col, Input[col].unique())

cough [1. 0.]
fever [0. 1.]
sore_throat [0. 1.]
shortness_of_breath [0. 1.]
head_ache [0. 1.]
age_60_and_above [1. 0.]
gender [1. 0.]
test_indication [0 1 2]


In [30]:
Input.shape

(136294, 8)

In [31]:
yesno_dict = {'Yes': 1.0, 'No':0.0}
gender_dict = {'Male':1.0, 'Female':0.0}
contact_dict = {'Abroad': np.int32(2), 'Contact with confirmed':np.int32(1),'Other':np.int32(0)}
def covid_predicter(cough, fever, sore_throat, shortness_of_breath, head_ache,
       age_60_and_above, gender, test_indication):
    n_cough = yesno_dict[cough]
    n_fever = yesno_dict[fever]
    n_sore_throat = yesno_dict[sore_throat]
    n_shortness_of_breath = yesno_dict[shortness_of_breath]
    n_headache = yesno_dict[head_ache]
    n_age_60_and_above = yesno_dict[age_60_and_above]
    n_gender = gender_dict[gender]
    n_contact = contact_dict[test_indication]
    prediction = model.predict([[n_cough, n_fever, n_sore_throat, n_shortness_of_breath,
                                 n_headache,n_age_60_and_above, n_gender, n_contact]])
    if prediction == 1.0:
        return 'Positive'
    else:
        return 'Negative'
covid_predicter('Yes', 'No', 'No', 'No', 'No', 'No', 'Female', 'Abroad')

'Negative'

In [32]:
demo = gr.Interface(fn=covid_predicter, inputs=[gr.Dropdown(choices=["Yes", "No"], label="Cough"),
                                                gr.Dropdown(choices=["Yes", "No"], label="Fever"),
                                                gr.Dropdown(choices=["Yes", "No"], label="Sore Throat"),
                                                gr.Dropdown(choices=["Yes", "No"], label="Shortness of Breath"),
                                                gr.Dropdown(choices=["Yes", "No"], label="Headache"),
                                                gr.Dropdown(choices=["Yes", "No"], label="Age 60 and Above"),
                                                gr.Dropdown(choices=["Female", "Male"], label="Gender"),
                                                gr.Dropdown(choices=["Abroad",
                                                                     "Contact with confirmed",
                                                                     "Other"],  label="Contact")],
                                                outputs = "text", title="Covid-19 Prediction")
demo.launch()

Running on local URL:  http://127.0.0.1:7868/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x1b59b8f4700>, 'http://127.0.0.1:7868/', None)

Traceback (most recent call last):
  File "C:\Users\Chetna\AppData\Roaming\Python\Python39\site-packages\gradio\routes.py", line 259, in run_predict
    output = await app.blocks.process_api(
  File "C:\Users\Chetna\AppData\Roaming\Python\Python39\site-packages\gradio\blocks.py", line 687, in process_api
    predictions, duration = await self.call_function(fn_index, inputs)
  File "C:\Users\Chetna\AppData\Roaming\Python\Python39\site-packages\gradio\blocks.py", line 604, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "C:\ProgramData\Anaconda3\lib\site-packages\anyio\to_thread.py", line 28, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(func, *args, cancellable=cancellable,
  File "C:\ProgramData\Anaconda3\lib\site-packages\anyio\_backends\_asyncio.py", line 818, in run_sync_in_worker_thread
    return await future
  File "C:\ProgramData\Anaconda3\lib\site-packages\anyio\_backends\_asyncio.py", line 754, in run
    result = context.run(fu