
In epidemiology, a case fatality rate (CFR) — sometimes called case fatality risk — is the proportion of deaths from a certain disease compared to the total number of people diagnosed with the disease for a certain period of time. A CFR is conventionally expressed as a percentage and represents a measure of disease severity. CFRs are most often used for diseases with discrete, limited time courses, such as outbreaks of acute infections. A CFR can only be considered final when all the cases have been resolved (either died or recovered). The preliminary CFR, for example, during the course of an outbreak with a high daily increase and long resolution time would be substantially lower than the final CFR.


# Set up the notebook

In [1]:

%pprint

Pretty printing has been turned OFF


In [2]:

import sys

# Insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../py')

import numpy as np
import pandas as pd
import re
from stats_scraping_utils import StatsScrapingUtilities
from storage import Storage

s = Storage()
ssu = StatsScrapingUtilities(s=s)


## Build the Case Fatality Rate Dataset

In [3]:

rv_explanation_str = 'Case Fatality Rate'
url = 'https://en.wikipedia.org/wiki/List_of_human_disease_case_fatality_rates'
print(f'The {rv_explanation_str} data is from {url}.')
tables_list = ssu.get_page_tables(url)
CFRs_df = tables_list[0].copy()
print(CFRs_df.columns.tolist())
display(CFRs_df.sample(7).T)

The Case Fatality Rate data is from https://en.wikipedia.org/wiki/List_of_human_disease_case_fatality_rates.
[(0, (70, 6))]
['Disease', 'Type', 'Treatment[clarification needed]', 'CFR', 'Notes', 'Reference(s)']


Unnamed: 0,68,16,15,6,52,47,26
Disease,"Hand, foot and mouth disease, children < 5 yea...",Ebola virus disease – specifically EBOV,"Tularemia, pneumonic",Primary amoebic meningoencephalitis,"Hepatitis A, adults > 50 years old","Cholera, in Africa",Middle Eastern Respiratory Syndrome (MERS)
Type,Viral,Viral,Bacterial,Amoebic infection,Viral,Bacterial,Viral
Treatment[clarification needed],,Unvaccinated & Untreated,Untreated,No cure,Unvaccinated,,
CFR,0.01%,[25–90]%,≤ 60%,[98–99]%,≈1.8%,≈[2–3]%,34%
Notes,,Prognosis improved by early supportive treatme...,,Amphotericin B has shown efficacy in the limit...,,"With proper treatment, may be less than 1%, wh...",Galidesivir has shown promise in treating Coro...
Reference(s),[53],[17][18],[11]: 78,,[9]: 278,[37][38][39][40][41],[25]


In [4]:

# Cast and split columns
CFRs_df.columns = ['disease_name', 'disease_type', 'treatment_by', 'CFR', 'disease_notes', 'references']
dash_regex = re.compile('[–-]')
for i, cn in enumerate(['CFR_low', 'CFR_high']):
    CFRs_df[cn] = np.nan
    mask_series = CFRs_df.CFR.map(lambda x: bool(dash_regex.search(str(x))))
    CFRs_df.loc[~mask_series, cn] = CFRs_df[~mask_series].CFR.map(lambda x: re.sub(r'[^0-9\.]+', '', str(x).split('%')[0]))
    CFRs_df.loc[mask_series, cn] = CFRs_df[mask_series].CFR.map(lambda x: re.split(r'[^0-9\.]+', dash_regex.split(str(x))[i], 0)[i-1])
    CFRs_df[cn] = pd.to_numeric(CFRs_df[cn], errors='coerce', downcast='float')
display(CFRs_df.sample(7).T)

Unnamed: 0,44,35,53,66,2,39,9
disease_name,Yellow fever,Leptospirosis,Coronavirus disease 2019 (COVID-19),"Influenza A, typical pandemics",Sleeping Sickness,Severe acute respiratory syndrome (SARS),"Anthrax, specifically the pulmonary form"
disease_type,Viral,Bacterial,Viral,Viral,Parasitic,Viral,Bacterial
treatment_by,Unvaccinated,,Unvaccinated &Treated with unspecific treatments,,Untreated,,Unvaccinated & Untreated
CFR,7.5%,< [5–30]%,0.5-1%,< 0.1%,≈100%,11%,> 85%
disease_notes,,,Depends largely on the age group of the person...,,,Galidesivir has shown promise in treating Coro...,Early treatments lower the CFR to 45% as seen ...
references,[36],[9]: 352,[46],[42],[4],[31],[11]: 88
CFR_low,7.5,5.0,0.5,0.1,100.0,11.0,85.0
CFR_high,7.5,30.0,1.0,0.1,100.0,11.0,85.0


In [5]:

# Add Monkeypox
if 'Monkeypox' not in CFRs_df.disease_name:
    row_dict = {
        'disease_name': 'Monkeypox',
        'disease_type': 'Viral',
        'treatment_by': 'Vaccinated',
        'CFR': '3%',
        'disease_notes': np.nan,
        'references': np.nan,
        'CFR_low': 1.0,
        'CFR_high': 3.7
    }
    CFRs_df = CFRs_df.append(row_dict, ignore_index=True)
    s.store_objects(CFRs_df=CFRs_df)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\covid19\saves\pkl\CFRs_df.pkl


In [6]:

# Remove disease misspellings
CFRs_df['short_disease_name'] = CFRs_df.disease_name.map(lambda x: ssu.disease_name_dict.get(x, x))

In [7]:

# https://academic.oup.com/jid/article/182/2/383/2190935
# In 1990–1994, adults had a risk 25 times greater and
# infants had a risk 4 times greater of dying from varicella
# than did children 1–4 years old (table 4).
mask_series = (CFRs_df.disease_name == 'Varicella (chickenpox), in newborns')
CFRs_df.loc[mask_series, 'CFR_low'] = 3.0
CFRs_df.loc[mask_series, 'CFR_high'] = 11.9
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Varicella (chickenpox), in newborns', 'disease_type': 'Viral', 'treatment_by': 'Untreated', 'CFR': '≈30%', 'disease_notes': 'Where the mothers develop the disease between 5 days prior to, or 2 days after delivery.', 'references': '[9]:\u200a110', 'CFR_low': 3.0, 'CFR_high': 11.9, 'short_disease_name': 'Varicella'}

In [8]:

# https://academic.oup.com/jid/article/182/2/383/2190935
# In 1990–1994, adults had a risk 25 times greater and
# infants had a risk 4 times greater of dying from varicella
# than did children 1–4 years old (table 4).
mask_series = (CFRs_df.disease_name == 'Varicella (chickenpox), adults')
CFRs_df.loc[mask_series, 'CFR_low'] = 15.8
CFRs_df.loc[mask_series, 'CFR_high'] = 26.8
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Varicella (chickenpox), adults', 'disease_type': 'Viral', 'treatment_by': 'Unvaccinated', 'CFR': '0.02%', 'disease_notes': nan, 'references': '[9]:\u200a110', 'CFR_low': 15.8, 'CFR_high': 26.8, 'short_disease_name': 'Varicella'}

In [9]:

# https://academic.oup.com/jid/article/182/2/383/2190935
# In 1990–1994, adults had a risk 25 times greater and
# infants had a risk 4 times greater of dying from varicella
# than did children 1–4 years old (table 4).
mask_series = (CFRs_df.disease_name == 'Varicella (chickenpox), children')
CFRs_df.loc[mask_series, 'CFR_low'] = 0.4
CFRs_df.loc[mask_series, 'CFR_high'] = 1.3
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Varicella (chickenpox), children', 'disease_type': 'Viral', 'treatment_by': 'Unvaccinated', 'CFR': '0.001%', 'disease_notes': nan, 'references': '[9]:\u200a110', 'CFR_low': 0.4, 'CFR_high': 1.3, 'short_disease_name': 'Varicella'}

In [10]:

# http://apps.who.int/iris/bitstream/handle/10665/69159/WHO_IVB_05.15.pdf;jsessionid=36AECC863764153C468684BC59955A8D?sequence=1
# Age adjusted case fatality rates from the literature were
# used and assigned to the mortality stratum; for children under 1 year of age CFR’s of
# 3.7 were used in high incidence countries and 0.2 for lower incidence countries.
mask_series = (CFRs_df.disease_name == 'Pertussis (whooping cough), infants in developing countries')
CFRs_df.loc[mask_series, 'CFR_low'] = 0.2
CFRs_df.loc[mask_series, 'CFR_high'] = 3.7
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Pertussis (whooping cough), infants in developing countries', 'disease_type': 'Bacterial', 'treatment_by': 'Unvaccinated', 'CFR': '≈3.7%', 'disease_notes': nan, 'references': '[9]:\u200a456', 'CFR_low': 0.2, 'CFR_high': 3.7, 'short_disease_name': 'Pertussis'}

In [11]:

# http://apps.who.int/iris/bitstream/handle/10665/69159/WHO_IVB_05.15.pdf;jsessionid=36AECC863764153C468684BC59955A8D?sequence=1
# In low coverage countries, CFR in children 1–4 years of age was estimated at
# 1% [Crowcroft 2001]. Population size needed to estimate a 0.5%–2% CFR in children
# assuming an outbreak with an attack rate of 10% and that 15% of the
# population is under 5 years of age.
mask_series = (CFRs_df.disease_name == 'Pertussis (whooping cough), children in developing countries')
CFRs_df.loc[mask_series, 'CFR_low'] = 0.5
CFRs_df.loc[mask_series, 'CFR_high'] = 2
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Pertussis (whooping cough), children in developing countries', 'disease_type': 'Bacterial', 'treatment_by': 'Unvaccinated', 'CFR': '≈1%', 'disease_notes': 'For children 1–4 years old.', 'references': '[9]:\u200a456', 'CFR_low': 0.5, 'CFR_high': 2.0, 'short_disease_name': 'Pertussis'}

In [12]:

# https://www.sciencedirect.com/science/article/pii/S1386653219301441
# CFR was significantly higher in patients with HIV than in HIV-negative controls (OR 4.10, 95%CI: 2.63–6.27, I²: 93.7%).
# The risk was significantly higher among children ≤5 years (OR 5.51, 95%CI 2.83–10.74)
# compared to people aged >5 years (OR 1.48, 95%CI 1.17–1.89); p = 0.0002.
mask_series = (CFRs_df.disease_name == 'Tuberculosis, HIV Negative')
CFRs_df.loc[mask_series, 'CFR_low'] = 1.17
CFRs_df.loc[mask_series, 'CFR_high'] = 1.89
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Tuberculosis, HIV Negative', 'disease_type': 'Bacterial', 'treatment_by': 'Vaccinated', 'CFR': '43%', 'disease_notes': 'Vaccines have been developed but have been frequently dismissed for having received controversial and improper testing on African populations.', 'references': '[23]', 'CFR_low': 1.17, 'CFR_high': 1.89, 'short_disease_name': 'Tuberculosis'}

In [13]:

# https://www.news-medical.net/health/What-is-Case-Fatality-Rate-(CFR).aspx
# HIV/AIDS – 80%
mask_series = (CFRs_df.disease_name == 'AIDS/HIV infection')
CFRs_df.loc[mask_series, 'CFR_low'] = 80
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'AIDS/HIV infection', 'disease_type': 'Viral', 'treatment_by': 'Untreated', 'CFR': '90%', 'disease_notes': nan, 'references': '[9]:\u200a1', 'CFR_low': 80.0, 'CFR_high': 90.0, 'short_disease_name': 'HIV'}

In [14]:

# https://www.cidrap.umn.edu/news-perspective/2003/05/estimates-sars-death-rates-revised-upward
# One method the WHO used was to calculate the case-fatality ratio
# using only the cases whose final outcome was known.
# With this method, ratios ranged from 11% to 17% for Hong Kong,
# 13% to 15% for Singapore, 15% to 19% for Canada, and 5% to 13% for China.
mask_series = (CFRs_df.disease_name == 'Severe acute respiratory syndrome (SARS)')
CFRs_df.loc[mask_series, 'CFR_low'] = 5
CFRs_df.loc[mask_series, 'CFR_high'] = 19
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Severe acute respiratory syndrome (SARS)', 'disease_type': 'Viral', 'treatment_by': nan, 'CFR': '11%', 'disease_notes': 'Galidesivir has shown promise in treating Coronaviridae.', 'references': '[31]', 'CFR_low': 5.0, 'CFR_high': 19.0, 'short_disease_name': 'SARS'}

In [15]:

# https://www.who.int/emergencies/disease-outbreak-news/item/20-february-2017-seoulvirus-usa-and-canada-en
# The case fatality rate (CFR) among humans who develop HFRS due to Seoul virus ranges from 1-2%.
mask_series = (CFRs_df.disease_name == 'Hantavirus infection')
CFRs_df.loc[mask_series, 'CFR_low'] = 1
CFRs_df.loc[mask_series, 'CFR_high'] = 2
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Hantavirus infection', 'disease_type': 'Viral', 'treatment_by': nan, 'CFR': '36%', 'disease_notes': 'Ribavirin may be a drug for HPS and HFRS but its effectiveness remains unknown, still, spontaneous recovery is possible with supportive treatment.', 'references': nan, 'CFR_low': 1.0, 'CFR_high': 2.0, 'short_disease_name': 'Hantavirus'}

In [16]:

# https://idpjournal.biomedcentral.com/articles/10.1186/s40249-021-00853-0/tables/1
# Number of deaths (CFR, %)
mask_series = (CFRs_df.disease_name == 'Middle Eastern Respiratory Syndrome (MERS)')
CFRs_df.loc[mask_series, 'CFR_low'] = 16.1
CFRs_df.loc[mask_series, 'CFR_high'] = 35.1
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Middle Eastern Respiratory Syndrome (MERS)', 'disease_type': 'Viral', 'treatment_by': nan, 'CFR': '34%', 'disease_notes': 'Galidesivir has shown promise in treating Coronaviridae', 'references': '[25]', 'CFR_low': 16.1, 'CFR_high': 35.1, 'short_disease_name': 'MERS'}

In [17]:

# https://en.wikipedia.org/wiki/Hong_Kong_flu
# The World Health Organization estimated the case fatality rate of Hong Kong flu to be lower than 0.2%.
mask_series = (CFRs_df.disease_name == 'Hong Kong (1968–69) flu')
CFRs_df.loc[mask_series, 'CFR_high'] = 0.2
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Hong Kong (1968–69) flu', 'disease_type': 'Viral', 'treatment_by': nan, 'CFR': '≈0.1%', 'disease_notes': nan, 'references': '[52]', 'CFR_low': 0.10000000149011612, 'CFR_high': 0.2, 'short_disease_name': '1968 Flu'}

In [18]:

# https://en.wikipedia.org/wiki/1957%E2%80%931958_influenza_pandemic
# On the symposium of Asian influenza in 1958, a range of CFR from 0.01% to 0.33% was provided, most frequently in between 0.02% and 0.05%.
mask_series = (CFRs_df.disease_name == 'Asian (1956–58) flu')
CFRs_df.loc[mask_series, 'CFR_low'] = 0.01
CFRs_df.loc[mask_series, 'CFR_high'] = 0.33
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Asian (1956–58) flu', 'disease_type': 'Viral', 'treatment_by': nan, 'CFR': '≈0.1%', 'disease_notes': nan, 'references': '[52]', 'CFR_low': 0.01, 'CFR_high': 0.33, 'short_disease_name': '1956 Flu'}

In [19]:

# https://en.wikipedia.org/wiki/Influenza_A_virus_subtype_H5N1
# Confirmed human cases and mortality rate of avian influenza (H5N1) – 2003–2021 wikitable
mask_series = (CFRs_df.disease_name == 'Influenza A virus subtype H5N1')
CFRs_df.loc[mask_series, 'CFR_low'] = 10.3
CFRs_df.loc[mask_series, 'CFR_high'] = 90.5
CFRs_df[mask_series].to_dict('records')[0]

{'disease_name': 'Influenza A virus subtype H5N1', 'disease_type': 'Viral', 'treatment_by': nan, 'CFR': '≈60%', 'disease_notes': nan, 'references': '[15]', 'CFR_low': 10.3, 'CFR_high': 90.5, 'short_disease_name': 'H5N1 Flu'}

In [20]:

s.store_objects(CFRs_df=CFRs_df)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\covid19\saves\pkl\CFRs_df.pkl



----

In [21]:

import sys
sys.path.insert(1, '../py')
from storage import Storage
s = Storage()
R0s_df = s.load_object('R0s_df')
CFRs_df = s.load_object('CFRs_df')
merged_df = R0s_df.merge(CFRs_df, on='short_disease_name', suffixes=['_R0', '_CFR']).set_index('short_disease_name', drop=True)
ev_low_column_name = 'R0_low'
ev_high_column_name = 'R0_high'
rv_low_column_name = 'CFR_low'
rv_high_column_name = 'CFR_high'
df = merged_df.reset_index(drop=False).set_index('disease_name_CFR', drop=True)
columns_list = [ev_low_column_name, ev_high_column_name, rv_low_column_name, rv_high_column_name]
df = df[columns_list].dropna()
deficient_cfrs_list = []
deficient_r0s_list = []
for label, R0_low, CFR_low, R0_high, CFR_high, in zip(df.index, df['R0_low'], df['CFR_low'], df['R0_high'], df['CFR_high']):
    rect_width = R0_high - R0_low
    if rect_width == 0:
        deficient_r0s_list.append(label)
    rect_height = CFR_high - CFR_low
    if rect_height == 0:
        deficient_cfrs_list.append(label)
from urllib.parse import quote_plus
for deficient_cfr in deficient_cfrs_list:
    url_str = f'{deficient_cfr.lower()} cfr range'
    url_str = f'https://www.google.com/search?client=firefox-b-1-d&q={quote_plus(url_str)}'
    print(url_str)
    break

In [22]:

CFRs_df.disease_name.tolist()

['Transmissible spongiform encephalopathies', 'Rabies', 'Sleeping Sickness', 'Visceral leishmaniasis', 'Granulomatous amoebic encephalitis', 'AIDS/HIV infection', 'Primary amoebic meningoencephalitis', 'Glanders, septicemic', 'Smallpox Variola major – specifically the malignant (flat) or hemorrhagic type', 'Anthrax, specifically the pulmonary form', 'Macanine alphaherpesvirus 1', 'Aspergillosis, invasive pulmonary form', 'Smallpox, Variola major – in pregnant women', 'Influenza A virus subtype H5N1', 'Mucormycosis (Black fungus)', 'Tularemia, pneumonic', 'Ebola virus disease – specifically EBOV', 'Marburg virus disease – all outbreaks combined', 'Cryptococcal meningitis', 'Anthrax, gastrointestinal, intestinal type', 'Plague, pneumonic', 'Tetanus, Generalized', 'Tuberculosis, HIV Negative', 'Plague, septicemic', 'Baylisascariasis', 'Hantavirus infection', 'Middle Eastern Respiratory Syndrome (MERS)', 'Eastern equine encephalitis virus', 'Bubonic plague', 'Anthrax, gastrointestinal, oro

In [23]:

mask_series = CFRs_df.disease_name.map(lambda x: 'h5n1' in x.lower())
columns_list = ['disease_name', 'CFR', 'CFR_low', 'CFR_high']
CFRs_df[mask_series][columns_list]

Unnamed: 0,disease_name,CFR,CFR_low,CFR_high
13,Influenza A virus subtype H5N1,≈60%,10.3,90.5
