In [1]:
from selenium.webdriver import Chrome
from sklearn import tree
import graphviz
import pandas as pd

from datetime import datetime, timedelta
from random import choice, randint

In [2]:
# Click the submit button
def submit(driver, n=1):
    for i in range(n):
        driver.find_element_by_css_selector('button.button--next').click()

In [3]:
# Respond to a question page
def respond(driver, data):
    text = driver.find_element_by_tag_name('h1').text
    if text == 'When did your symptoms start?':
        # Enter date for when symptoms started
        date = datetime.now() - timedelta(days=randint(0,60))
        data[text] = date
        driver.find_element_by_id('SymptomsStart_Day').send_keys(date.day)
        driver.find_element_by_id('SymptomsStart_Month').send_keys(date.month)
        driver.find_element_by_id('SymptomsStart_Year').send_keys(date.year)
    else:
        # Click on a choice
        choices = driver.find_elements_by_css_selector('input[name="SelectedAnswer"]')
        try:
            selected = choice(choices)
            data[text] = selected.get_attribute('id')
            selected.click()
        except:
            # Survey is over; record header as outcome (advice)
            data['Outcome'] = text
            
    try:
        next_btn = driver.find_element_by_css_selector('button.button--next')
    except:
        next_btn = None
    if next_btn is not None and next_btn.text.startswith('Next'):
        # Continue to next page
        submit(driver)
        return respond(driver, data)
    return data

In [4]:
def take_survey(driver):
    # Click through preliminary pages and start survey
    driver.get('https://111.nhs.uk/service/COVID-19/')
    submit(driver)
    driver.find_element_by_id('CurrentPostcode').send_keys('cb2 1rf')
    submit(driver, 2)
    age = str(randint(5,100))
    driver.find_element_by_id('UserInfo_Demography_Age').send_keys(age)
    sex = choice(['Male','Female'])
    driver.find_element_by_id(sex).click()
    submit(driver, 2)
    return respond(driver, {'age': age, 'sex': sex})

In [9]:
# Run 100 participants through the survey
driver = Chrome()
data = []
for i in range(100):
    print(i)
    try:
        data.append(take_survey(driver))
    except:
        print('An error occurred')
driver.close()
df = pd.DataFrame(data)

In [10]:
df.head()

Unnamed: 0,age,sex,Do you have a high temperature (fever)?,Do you have a new continuous cough?,Before you continue,Outcome,When did your symptoms start?,Are you so breathless that you are unable to speak more than a few words?,Are you breathing harder or faster than usual when doing nothing at all?,Are you so ill that you've stopped doing all of your usual daily activities?,"Have you suddenly become confused, or much more confused than normal?",How has your breathing been in the last hour?,Has a doctor told you that getting an infection might be very serious?
0,57,Male,Yes,Yes,Next,Call 111 and speak to a nurse now,2020-02-25 08:27:46.325584,I'mnotsure,No,Ifeelillbutcandosomeofmyusualactivities,Yes,,
1,53,Male,Yes,No,Next,Phone 999 now for an ambulance,2020-02-19 08:27:57.539771,Yes,,,,,
2,89,Male,Yes,Yes,Next,Call 111 and speak to a nurse now,2020-02-17 08:28:05.836143,No,No,No-Ifeelwellenoughtodomostofmyusualdailyactivi...,Yes,,
3,74,Male,No,Yes,Next,Phone 999 now for an ambulance,2020-02-13 08:28:15.365690,Yes,,,,,
4,69,Male,Yes,No,Next,Phone 999 now for an ambulance,2020-03-01 08:28:22.839485,Yes,,,,,


In [11]:
# These cells should all output 1.0 to indicate the decision tree has not changed
tmp = df[(df['Do you have a high temperature (fever)?']=='No')&(df['Do you have a new continuous cough?']=='No')]
print((tmp['Outcome'] == 'Find the right topic').astype(int).mean())

1.0


In [12]:
tmp = df[df['Are you so breathless that you are unable to speak more than a few words?']=='Yes']
print((tmp['Outcome'] == 'Phone 999 now for an ambulance').astype(int).mean())

1.0


In [13]:
tmp = df[df['How has your breathing been in the last hour?']=="It'sworse"]
print((tmp['Outcome'] == 'Phone 999 now for an ambulance').astype(int).mean())

1.0


In [14]:
tmp = df[df["Are you so ill that you've stopped doing all of your usual daily activities?"]=="Yes-I'vestoppeddoingeverythingIusuallydo"]
print((tmp['Outcome'] == 'Call 111 and speak to a nurse now').astype(int).mean())

1.0


In [15]:
tmp = df[df['Have you suddenly become confused, or much more confused than normal?']=='Yes']
print((tmp['Outcome'] == 'Call 111 and speak to a nurse now').astype(int).mean())

1.0


In [16]:
tmp = df[df['Has a doctor told you that getting an infection might be very serious?']=='Yes']
print((tmp['Outcome'] == 'Call 111 and speak to a nurse now').astype(int).mean())

1.0


In [17]:
tmp = df[df['Has a doctor told you that getting an infection might be very serious?']=='No']
print((tmp['Outcome'] == 'You and your family need to stay at home').astype(int).mean())

1.0
