# Challenge 2: Dangerous Situtations


In [1]:
## Import python packages
import pandas as pd
import numpy as np
import math
from collections import defaultdict

from sklearn.feature_extraction.text import  TfidfVectorizer
from nltk.corpus import stopwords
from nltk import word_tokenize, pos_tag
from gensim import corpora
from gensim import models
from gensim import similarities

In [2]:
# get a list of english stop words
en_stops = set(stopwords.words('english'))

## Setup: initialize some constants

In [3]:

GENERAL_RULES = ['Wear a mask',
    'Stay 6 feet from others',
    'Avoid crowds',
    'Avoid poorly ventilated spaces',
    'Wash your hands often',
    'Cover coughs and sneezes',
    'Clean and disinfect frequently touched surfaces daily',
    'Monitor your health daily',
    'Get vaccinated']

rule_shortNames = ['wear_mask',
    'social_distance',
    'avoid_crowds',
    'poor_ventilation',
    'wash_hands',
    'cover_coughs',
    'disinfect_surfaces',
    'monitor_health',
    'vaccine']

PROMPTS = ["If you are sick with COVID-19",
           "If you are an older adult",
           "If you have asthma",
           "If you are at home caring for a newborn and are diagnosed with or test positive for COVID-19"]

PROMPT_KEYS = ['sick', 'older_adult', 'asthma', 'covid_with_newborn']

## Parse the data
Parse the CDC guidelines text file into a dataframe with two columns.  One collumn is the title headers (start with ***) and second column are the sentences that fall under that header
(header index helps id the same header)

In [4]:

def textToDataFrame(text, delimiter):
    """ 
    Takes in input text, splits by delimiter, returns as pandas dataframe with headings as keys e.g. [index, heade, text]
    """
    textArray = text.split(delimiter)
    df = pd.DataFrame(columns=["headerIndex","header", "text"])
    headerIndex = 0
    for line in textArray:
        if len(line) > 0:
            # print(headerIndex)
            # finds the first line in the section and uses that as the heading
            firstNewlineIndex = line.find("\n")
            header = line[0:firstNewlineIndex]
            # print(header)
            # puts the remaining text into dataframe
            df2 = pd.DataFrame({'headerIndex':headerIndex, 'header': header, 'text':(line[firstNewlineIndex + 1:]).replace("\xa0", " ").split("\n")})
            # combines new dataframe with the return dataframe
            df = df.append(df2, ignore_index=True)
            headerIndex += 1
    return df

filename = './data/CDCGuidelines.txt'

with open(filename, encoding="utf8") as myFile:
    data = myFile.read()

df = textToDataFrame(data, "***")
df.sample(10)


Unnamed: 0,headerIndex,header,text
213,6,Contact Tracing,You can take everyday preventive actions to sl...
1608,32,Older Adults,"Before you go out, consider the following:"
3104,51,Small Gatherings,"If soap and water are not readily available, u..."
4260,64,Testing and International Air Travel,Do not travel if your test result is positive;...
2524,45,Deciding to Go Out,How many people will you interact with?
2219,39,"Pregnancy, Breastfeeding, and Caring for Newborns",Caregivers should wash their hands for at leas...
5,0,Things to know about the COVID-19 Pandemic,How to Protect Yourself When Going Out
4311,64,Testing and International Air Travel,
4665,68,COVID-19 and Cruise Ship Travel,"At this time, CDC still recommends avoiding an..."
4273,64,Testing and International Air Travel,"Also, take these actions after you return from..."


In [5]:
headers = df['header'].unique()
#just for nice output in notebook
pd.DataFrame(headers)

Unnamed: 0,0
0,Things to know about the COVID-19 Pandemic
1,Symptoms of Coronavirus
2,COVID-19 Testing Overview
3,Test for Current Infection (Viral Test)
4,Test for Past Infection
...,...
76,COVID-19 And Animals
77,If You Have Pets
78,What to Do if Your Pet Tests Positive for the ...
79,Guidance to Reduce the Risk of SARS-CoV-2 Spre...


## Find best matching titles to the prompts
Next step is to see if we can automatically sort through all the titles 
and find ones that are closely related to one of our categories
to do this, we need to do some preprocessing on the titles to:
 - remove stop words, such as: the, of, to, etc.
 - remove punctuation (i.e., commas, periods, colons, parens)
 - lower case everything
 - remove 's' at end of words to reduce plurals
 

In [6]:
def removeStopWords(text):
    returnText = ''
    for w in text.split(' '):
        if w not in en_stops and len(w)>0:
            returnText += ' ' + w
    returnText = returnText.strip()
    # print(returnText)
    return returnText

# remove words that appear only once
frequency = defaultdict(int)
combined = list(df['text']) + list(df['header']) + GENERAL_RULES

for text in combined:
    for token in text.split(' '):
        frequency[token] += 1
        
def removeSingleOccurances(text):
    returnText = ''
    for w in text.split(' '):
        if frequency[token] > 1:
            returnText += ' ' + w
    returnText = returnText.strip()
    # print(returnText)
    return returnText

def removePlurals(text):
    returnText = ''
    for w in text.split(' '):
        if w.endswith('s'):
            w = w[:-1]
        returnText += ' ' + w
    returnText = returnText.strip()
    return returnText
    
def nlpCleanup(df, columnName):
    df[columnName] = df[columnName].str.replace('\d+', '',regex=True) # for digits
    df[columnName] = df[columnName].str.replace(r'(\b\w{1,2}\b)', '',regex=True) # for word length lt 2
    df[columnName] = df[columnName].str.replace('[^\w\s]', '',regex=True) # for punctuation 
    df[columnName] = df[columnName].apply(removeStopWords)
    df[columnName] = df[columnName].apply(removeSingleOccurances)
    df[columnName] = df[columnName].apply(removePlurals)
    df[columnName] = df[columnName].str.lower()
    df[columnName] = df[columnName].str.strip()
    return df

# first make copy of unaltered text, this will be needed later
df['header_orig'] = df['header']
df['text_orig'] = df['text']

df = nlpCleanup(df, columnName='header')
df = nlpCleanup(df, columnName='text')

df.sample(10)


Unnamed: 0,headerIndex,header,text,header_orig,text_orig
4877,72,covid children teen,child symptom may exposed viru cause covid are...,COVID-19 in Children and Teens,If your child has symptoms and may have been e...
967,21,social distancing,limit contact when running errand only visit s...,Social Distancing,Limit Contact When Running Errands: Only visit...
1181,25,what you are sick,sick,What to Do If You Are Sick,@If you are sick
4651,67,after you travel,medical appointment cannot postponed call doct...,After You Travel,If you have a medical appointment that cannot ...
5258,78,what your pet test positive viru cause covid,,What to Do if Your Pet Tests Positive for the ...,
2649,47,returning work,addition measure busines may implemented reduc...,Returning to Work,In addition to any measures your business may ...
4361,65,know when not travel avoid spreading covid,test negative covid still finish quarantine pe...,Know When Not to Travel to Avoid Spreading COV...,"If you test negative for COVID-19, you should ..."
4533,66,know your travel risk,,Know Your Travel Risk,
572,14,improve how your mask protect you,,Improve How Your Mask Protects You,
424,10,how protect yourself other,before eating preparing food,How to Protect Yourself & Others,Before eating or preparing food


#### Similarly process the general rules and prompts

In [7]:
genRulesDf = pd.DataFrame(GENERAL_RULES, columns=['rule'])

genRulesDf = nlpCleanup(genRulesDf, columnName='rule')

genRulesDf.head()

Unnamed: 0,rule
0,wear mask
1,stay feet other
2,avoid crowd
3,avoid poorly ventilated space
4,wash hand often


In [8]:
promptDf = pd.DataFrame(PROMPTS, columns=['prompt'])
promptDf = nlpCleanup(promptDf, columnName='prompt')

promptDf.head()

Unnamed: 0,prompt
0,sick covid
1,older adult
2,asthma
3,home caring newborn diagnosed test positive covid


## Now vectorize the headers and prompts using TFIDF

learn the bag of words using the titles, and apply the same vectorizer to prompts

In [9]:
headers = list(pd.Series(df['header'].unique()))

vectorizer = TfidfVectorizer(
    analyzer = 'word',
    # ngram_range=(2,2),
    lowercase = True,
    strip_accents='unicode',
    stop_words='english'
)

headerVects = vectorizer.fit_transform(headers).toarray()
vectorizer.get_feature_names()

['activitie',
 'additional',
 'adult',
 'air',
 'animal',
 'answer',
 'associated',
 'asthma',
 'attending',
 'avoid',
 'beache',
 'behavioral',
 'breastfeeding',
 'care',
 'caregiver',
 'caring',
 'cause',
 'cdc',
 'certain',
 'checklist',
 'children',
 'cleaning',
 'close',
 'communitie',
 'community',
 'condition',
 'contact',
 'coronaviru',
 'covid',
 'crew',
 'cruise',
 'current',
 'deciding',
 'dementia',
 'developmental',
 'disabilitie',
 'disease',
 'disinfecting',
 'disorder',
 'distancing',
 'doctor',
 'drug',
 'effect',
 'errand',
 'essential',
 'event',
 'experiencing',
 'facilitie',
 'facility',
 'food',
 'gathering',
 'getting',
 'glove',
 'guidance',
 'guide',
 'hand',
 'handler',
 'healthy',
 'help',
 'helping',
 'hiring',
 'holiday',
 'home',
 'homelessnes',
 'household',
 'housing',
 'improve',
 'improving',
 'indoor',
 'infection',
 'inflammatory',
 'international',
 'isolate',
 'know',
 'large',
 'learn',
 'likely',
 'live',
 'living',
 'longterm',
 'mask',
 'medica

In [10]:
promptList = list(promptDf['prompt'].values)

promptVects = vectorizer.transform(promptList).toarray()

## Now measure similarity
Using cosine similarities between the tfidf vectors
Return the best matching header for each prompt

In [11]:
def cosineSimilarity(vector1, vector2):
    dot_product = sum(p*q for p,q in zip(vector1, vector2))
    magnitude = math.sqrt(sum([val**2 for val in vector1])) * math.sqrt(sum([val**2 for val in vector2]))
    if not magnitude:
        return 0
    return dot_product/magnitude
    

sims = []
for j, h in enumerate(headers):
    d = []
    for i, p in enumerate(promptList):
        a = headerVects[j,:]
        b = promptVects[i,:]
        # print(a)
        # print(b)
        cs = cosineSimilarity(a, b)
        d.append(cs)
        # print(cs)
        # break
    sims.append(d)
        
sims = np.array(sims)

bestMatches = np.argmax(sims, axis=0)

relevantHeaders = []
for m in bestMatches:
    relevantHeaders.append(headers[m])

In [12]:
# set the category for each row in df that matches the relevant 
# header found above
df['category'] = ''

for i, h in enumerate(relevantHeaders):
    df['category'][df['header'] == h] = PROMPT_KEYS[i]


### Next step is to limit to rows that match one of the 4 categories

In [13]:
df = df[np.logical_not(df['category']=='')]

headerDf = df[['headerIndex', 'header', 'category']]
headerDf = headerDf.drop_duplicates()

# print out those rows
headerDf

Unnamed: 0,headerIndex,header,category
1127,25,what you are sick,sick
1533,32,older adult,older_adult
2138,39,pregnancy breastfeeding caring newborn,covid_with_newborn
2396,42,people moderate severe asthma,asthma


## Find actionable instructions
Next we want to limit these texts to just ones that start with a verb
Using the part-of-speech tagging capability of nltk

In [14]:
def startsWithVerb(s):
    # returns if the first word in the sentence is a verb = this is a 
    # shortcut way to check for actionable instructions
    ret = False
    if len(s)>0:
        s=s.lower()
        tag_pos_string = pos_tag(word_tokenize(s))
        firstWordPartOfSpeech = tag_pos_string[0][1]
        ret = firstWordPartOfSpeech in ('VB', 'VBP')
    
    return ret

df['text_orig'] = df['text_orig'].str.replace('@', '', regex=False)
df['text_orig'] = df['text_orig'].str.replace('*', '', regex=False)

df['actionable'] = df['text_orig'].apply(startsWithVerb)

df.sample(25)

Unnamed: 0,headerIndex,header,text,header_orig,text_orig,category,actionable
1558,32,older adult,change treatment plan without talking healthca...,Older Adults,Do not change your treatment plan without talk...,older_adult,True
2241,39,pregnancy breastfeeding caring newborn,you along family healthcare provider decide wh...,"Pregnancy, Breastfeeding, and Caring for Newborns","You, along with your family and healthcare pro...",covid_with_newborn,False
1660,32,older adult,schedule visitation advance enable continued s...,Older Adults,Schedule visitation in advance to enable conti...,older_adult,False
1689,32,older adult,medical condition,Older Adults,"medical conditions,",older_adult,False
2396,42,people moderate severe asthma,,People with Moderate to Severe Asthma,,asthma,False
2211,39,pregnancy breastfeeding caring newborn,isolation covid take following precaution isol...,"Pregnancy, Breastfeeding, and Caring for Newborns","If you are in isolation for COVID-19, take the...",covid_with_newborn,False
2230,39,pregnancy breastfeeding caring newborn,symptom isolation period end,"Pregnancy, Breastfeeding, and Caring for Newborns","If you had symptoms, your isolation period end...",covid_with_newborn,False
2216,39,pregnancy breastfeeding caring newborn,,"Pregnancy, Breastfeeding, and Caring for Newborns",,covid_with_newborn,False
1685,32,older adult,,Older Adults,,older_adult,False
1219,25,what you are sick,clean area item soap water another detergent d...,What to Do If You Are Sick,Clean the area or item with soap and water or ...,sick,True


In [15]:
# limit to those that start with verb to give actionable instruction
df = df[df['actionable']==True]
df.sample(10)

Unnamed: 0,headerIndex,header,text,header_orig,text_orig,category,actionable
1197,25,what you are sick,wash hand,What to Do If You Are Sick,Wash your hands.,sick,True
1686,32,older adult,develop care plan,Older Adults,Develop a Care Plan,older_adult,True
2306,39,pregnancy breastfeeding caring newborn,take step reduce risk sudden infant death synd...,"Pregnancy, Breastfeeding, and Caring for Newborns",Take steps to reduce the risk of sudden infant...,covid_with_newborn,True
1195,25,what you are sick,clean hand often,What to Do If You Are Sick,Clean your hands often,sick,True
1666,32,older adult,contact your healthcare provider seek care,Older Adults,Contact Your Healthcare Provider & Seek Care,older_adult,True
1584,32,older adult,keep feet distance,Older Adults,Keep 6 feet of distance.,older_adult,True
1703,32,older adult,remember importance staying physically active ...,Older Adults,Remember the importance of staying physically ...,older_adult,True
2217,39,pregnancy breastfeeding caring newborn,have healthy caregiver increased risk severe i...,"Pregnancy, Breastfeeding, and Caring for Newborns",Have a healthy caregiver who is not at increas...,covid_with_newborn,True
1142,25,what you are sick,get rest stay hydrated take overthecounter med...,What to Do If You Are Sick,Get rest and stay hydrated. Take over-the-coun...,sick,True
1220,25,what you are sick,sure follow instruction label ensure safe effe...,What to Do If You Are Sick,Be sure to follow the instructions on the labe...,sick,True


In [16]:
# this shows the number of instructions by category
# note that we cannot have more than 20 instructions per category
# TODO: we have too many instructions for each category right now...
# this is probably because there are some duplicated instructions within each category
# need to do a self-similarity comparison to remove dups
df.groupby('category').count()

Unnamed: 0_level_0,headerIndex,header,text,header_orig,text_orig,actionable
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
asthma,3,3,3,3,3,3
covid_with_newborn,38,38,38,38,38,38
older_adult,22,22,22,22,22,22
sick,16,16,16,16,16,16


## Now compare general to specific rules
Next we want to compare the general rules against the rules that passed all 
previous processing steps and find the union.  Using the LSI model to find phrase 
similarity.  When a phrase is similar enough drop that general rule, 
else add that general rule for that category.
We'll also use phrase similarity to remove duplicate instructions within each category

Experimented with num_topics hyper parameter a bit.  Tried 2, 5, and 10.  10 provided reasonable results based on human interpretation of similarity.


In [17]:
# this creates a dictionary and bag of words encoding based on all text
headers = list(pd.Series(df['header'].unique()).str.split())
texts = list(df['text'].str.split()) 
generalRules = list(genRulesDf['rule'].str.split())
allWords = texts + headers + generalRules
dictionary = corpora.Dictionary(allWords)
# create corpus based on text (not headers)
corpus = [dictionary.doc2bow(text) for text in texts]
# create lsi model using gensim
lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=10)
index = similarities.MatrixSimilarity(lsi[corpus])

### Add a column to show similarity of each row to the general rules

In [18]:
for i, gen in enumerate(genRulesDf.values):
    gen = gen[0].split()
    vec_bow = dictionary.doc2bow(gen)
    vec_lsi = lsi[vec_bow]
    sims = index[vec_lsi]
    df[rule_shortNames[i]] = sims

df

Unnamed: 0,headerIndex,header,text,header_orig,text_orig,category,actionable,wear_mask,social_distance,avoid_crowds,poor_ventilation,wash_hands,cover_coughs,disinfect_surfaces,monitor_health,vaccine
1131,25,what you are sick,keep track symptom,What to Do If You Are Sick,Keep track of your symptoms.,sick,True,-0.186307,0.362461,0.0,0.0,0.010062,0.326505,-0.014259,0.244942,0.183463
1141,25,what you are sick,take care,What to Do If You Are Sick,Take care of yourself.,sick,True,-0.010576,-0.031643,0.0,0.0,-0.007789,-0.329172,-0.083289,0.287246,-0.069353
1142,25,what you are sick,get rest stay hydrated take overthecounter med...,What to Do If You Are Sick,Get rest and stay hydrated. Take over-the-coun...,sick,True,-0.156679,0.792615,0.0,0.0,-0.020874,0.457620,-0.187422,-0.080773,0.790896
1154,25,what you are sick,tell close contact may exposed covid infected ...,What to Do If You Are Sick,Tell your close contacts that they may have be...,sick,True,0.053896,0.104484,0.0,0.0,0.005471,0.177780,-0.083671,-0.014206,0.069781
1158,25,what you are sick,see covid animal question pet,What to Do If You Are Sick,See COVID-19 and Animals if you have questions...,sick,True,0.205234,0.215658,0.0,0.0,0.011471,0.141091,-0.046450,-0.106139,0.036375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2312,39,pregnancy breastfeeding caring newborn,cover baby head allow baby get hot sign baby m...,"Pregnancy, Breastfeeding, and Caring for Newborns",Do not cover your baby’s head or allow your ba...,covid_with_newborn,True,0.065271,0.419098,0.0,0.0,-0.010373,0.815870,0.301226,-0.068340,0.640503
2313,39,pregnancy breastfeeding caring newborn,smoke allow anyone smoke around baby,"Pregnancy, Breastfeeding, and Caring for Newborns",Do not smoke or allow anyone to smoke around y...,covid_with_newborn,True,0.232250,0.224551,0.0,0.0,-0.016073,0.635955,0.348191,-0.064881,0.331330
2403,42,people moderate severe asthma,make sure least day supply medicine,People with Moderate to Severe Asthma,Make sure that you have at least a 30-day supp...,asthma,True,0.003816,0.298352,0.0,0.0,0.172642,-0.163970,-0.152979,-0.084192,-0.105751
2404,42,people moderate severe asthma,take everyday precaution like washing hand avo...,People with Moderate to Severe Asthma,Take everyday precautions like washing your ha...,asthma,True,0.052426,0.492487,0.0,0.0,0.706026,0.098933,0.422527,-0.150464,0.165037


## Find overlapping/very similar phrases
Using a threshold to find where the general rules are duplicated in the specific rules.  This threshold was derived using some trial and error.

In [19]:
THRESHOLD = 0.99

dfs = []

for cat in PROMPT_KEYS:
    print('--------')
    print(cat)
    tempDf = df[df['category'] == cat]
    text = tempDf['text'].values
    
    for i, rule in enumerate(rule_shortNames):
        dupRule = np.any(tempDf[rule] > THRESHOLD)

        if not dupRule:
            newRule = pd.DataFrame([[cat, GENERAL_RULES[i]]], columns=['category', 'text_orig'])
            tempDf = pd.concat([tempDf,newRule])
#             print('    ***')
#             print('    No dup found:', rule)
#             print('       ', list(tempDf['text'].values))
        else:
            print('    ***')
            print('    found dup:', rule)
            print('       ', list(tempDf['text'][tempDf[rule] > THRESHOLD].values))
            
    dfs.append(tempDf[['category','text_orig']])

finalDf = pd.concat(dfs)

--------
sick
    ***
    found dup: wash_hands
        ['clean hand often', 'wash hand']
    ***
    found dup: cover_coughs
        ['cover cough sneeze', 'cover mouth nose tissue cough sneeze']
--------
older_adult
--------
asthma
    ***
    found dup: wash_hands
        ['wash hand often soap water least second use hand sanitizer contain least alcohol']
--------
covid_with_newborn
    ***
    found dup: wash_hands
        ['wash hand soap water least second soap water available use hand sanitizer least alcohol', 'wash hand soap water least second holding caring newborn soap water available use hand sanitizer least alcohol', 'wash hand soap water least second touching newborn soap water available use hand sanitizer least alcohol', 'wash hand breastfeeding']


In [20]:
finalDf.sample(25)

Unnamed: 0,category,text_orig
0,covid_with_newborn,Stay 6 feet from others
2404,asthma,Take everyday precautions like washing your ha...
1666,older_adult,Contact Your Healthcare Provider & Seek Care
1567,older_adult,Call your healthcare provider about underlying...
0,sick,Wear a mask
2308,covid_with_newborn,Place your baby on his or her back for all sle...
0,covid_with_newborn,Avoid crowds
0,covid_with_newborn,Cover coughs and sneezes
1684,older_adult,be aware that a single reading higher than 10...
2298,covid_with_newborn,Check your baby for jaundice (yellow color in ...


## Self-similarity comparison
In reviewing the final dataset we found duplicate/highly overlapping instructions within the specific instructions
For example, "Keep track of your symptoms." and "Monitor your symptoms." are 
very similar instructions given under the title: "What to Do If You Are Sick".  
We need to remove the duplicates within each category.

In [21]:
def pruneFinal(finalDf, stoplist, threshold):
    returnDf = pd.DataFrame()

    markedRules = []
    uniqueCategories = finalDf.category.unique()
    finalDfIndex = 0
    docStart = 0
    #split dataframe into categories, so rules can be compared only within category
    for category in uniqueCategories:
        # set up for gensim
        selection = finalDf.loc[finalDf['category'] == category]
        documents = selection['text_orig'].values.tolist()
        texts = [
            [word for word in document.lower().split() if word not in stoplist]
            for document in documents
        ]
        frequency = defaultdict(int)
        for text in texts:
            for token in text:
                frequency[token] += 1
        texts = [
            [token for token in text if frequency[token] > 1]
            for text in texts
        ]
        dictionary = corpora.Dictionary(texts)
        corpus = [dictionary.doc2bow(text) for text in texts]
        lsi = models.LsiModel(corpus, id2word=dictionary, num_topics=2)
        # compare similarities of each rule to every other rule in the set
        docIndex = 0
        for i, gen in enumerate(documents):
            vec_bow = dictionary.doc2bow(gen.lower().split())
            vec_lsi = lsi[vec_bow]  # convert the query to LSI space
            index = similarities.MatrixSimilarity(lsi[corpus])
            sims = index[vec_lsi]  
            overThreshold = [simVal for simVal in list(enumerate(sims)) if simVal[1] > threshold and (simVal[0] + docStart) not in markedRules]

            if len(overThreshold) > 1:
                # add index to list of indexes to remove
                markedRules.append(finalDfIndex)
            docIndex += 1
            finalDfIndex += 1
        docStart += docIndex
        
        finalDf = finalDf.reset_index(drop=True)
        removeDf = finalDf.index.isin(markedRules)
        #print(finalDf.index)
    # return the rules that are not marked
    return finalDf[~removeDf]

finalDf = pruneFinal(finalDf, en_stops, 0.9999) 
finalDf = finalDf.drop_duplicates()

### format final output per HTM spec and save to csv

In [22]:
finalDf.columns = ['situation', 'rules']

fileName = './submission/Challenge2_submission.csv'

finalDf.to_csv(fileName, index=False)

## Show final number of rules per situation

In [23]:
"""
check counts by category
"""
print(finalDf.groupby('situation').count())

                    rules
situation                
asthma                 10
covid_with_newborn     11
older_adult            26
sick                   18
