In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm # colormaps
import ipywidgets as wdg
import datetime
import json

In [None]:
import os
display(os.getcwd())

In [None]:
# inject some CSS
wdg.HTML("""\
<style>
html * {
    font-family: Nexa, Arial;
}

a {
    color: #764796;
}

a:hover {
  text-decoration: underline;
}

a:active {
  text-decoration: underline;
}

.dash-title {
    color: #764796;
    font-size: 3em;
    line-height: 150%;
}    

.dash-subtitle {
    color: #00AEB3;
}

strong {
    color: #764796;
}
</style>
""")


In [None]:
logo=wdg.Image(
    value=open('resources/mebo.png','rb').read(),
    format='png',
    width=60,
    height=48
    )

full_logo=wdg.Image(
    value=open('resources/mebomine.png','rb').read(),
    format='png',
    layout=wdg.Layout(width="auto", grid_area='logo', margin='0 0 0 0', object_fit='contain')
    )

title=wdg.HTML('<h1 class="dash-title">Covid-19 OHB Patient Concern and Comorbidity Watch</h1>',
               layout=wdg.Layout(grid_area='title', width="84%", margin="0 0 0 0"))


wdg.GridBox(children=[full_logo, title],
            layout=wdg.Layout(
                width='100%',
                height='auto',
                grid_template_rows='auto auto',
                grid_template_columns='84% 15%',
                grid_template_areas='''
                ". logo"
                "title ."
                ''')
           )

In [None]:
# make figures larger
plt.rcParams['figure.figsize'] = [6, 4]
plt.rcParams['figure.dpi'] = 100 # 200 e.g. is really fine, but slower
cmap=cm.get_cmap('Set3') # colour map for plots

In [None]:
# useful widget layout specifications
style={'description_width': 'initial'}
txtlayout=wdg.Layout(width='50%', margin='3em 0 0 0')

In [None]:
# load general info
with open("data/dataset_totals.dash.json","rt") as FILE:
    totals=json.load(FILE)

intro=("We survey three major multi-topic Online Health Boards (OHBs) based in the US and the UK "+
       "and scan them for threads and posts related to the Covid-19 pandemic. We map patient concerns "+
       "related to Covid symptoms, treatments and comorbidities, including mental health symptoms. "+
       f"Our data covers the period <strong>from {totals['startdate']} to {totals['enddate']}</strong> and includes "+
       f"<strong>{totals['posts']:,} posts</strong> from <strong>{totals['activeusers']:,} active users</strong>,"+
       f" organised in <strong>{totals['threads']:,} threads</strong>.")

wdg.HTML(intro)
       

In [None]:
title=wdg.HTML('<h2 class="dash-subtitle">Active Covid-19 threads and posting behaviour</h2>')
wdg.HBox([logo, title])

In [None]:
text=wdg.HTML("""We track the number of posts about Covid, the numbers of thread 
started and the number of active Covid threads in the date range of interest. 
While a clear peak is visible in early March, interest remains consistent 
throughout the period. As we show, a sizeable proportion of users (from a 
peak of around 12% at the outset of the first wave down to around 5% in 
the later period) choose to start posting in a Covid thread.""")
display(text)

In [None]:
comment="""The number of posts containing references to Covid grows rapidly in early March and peaks around 
the beginning  of lockdown in most countries, but remains consistent throughout the period. The decline is 
less pronounced if we look at Covid related posts as a fraction of total posts, suggesting a general reduction
in posting activity rather than reduced interest for the topic."""


postdf=pd.read_json("data/post_statistics.dash.json")
gtype=wdg.ToggleButtons(options=['Number', 'Percentage'],
    description='Covid posts: show as ',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Weekly no. of Covid posts', 'Covid posts as a fraction of total'],
#     icons=['check'] * 3
    style=style
)

postgraph=wdg.Output()
content=wdg.VBox([gtype, postgraph])
entry=wdg.HBox([content,wdg.HTML(comment, layout=txtlayout)])

def postGraph(gtype):
    postgraph.clear_output(wait=True)
    with postgraph:
        if gtype=="Number":
            postdf['covid'].plot(cmap=cmap)
        else:
            postdf['covid'].div(postdf['all']).plot(cmap=cmap)
        plt.show()
    
wdg.interactive_output(postGraph, {'gtype': gtype})

display(entry)



In [None]:
comment="""Likewise the number of Covid threads started per week peaks around the time of lockdown, and then 
declines rapidly. Many of these threads are however very short (one or two posts). The decline in the number 
of active threads is however slower, indicating some consolidation of the discussion around major threads."""

# add total number of posts and threads and users
active_threads=pd.read_json("data/active_threads.dash.json", typ='series')
threadsdf=pd.read_json("data/thread_statistics.dash.json")

gtype=wdg.ToggleButtons(options=['Number', 'Percentage', 'Active'],
    description='Covid threads started: ',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['No. of Covid threads started', 'Covid threads started as % of all threads', 'Active Covid threads'],
#     icons=['check'] * 3
    style=style
)

threadgraph=wdg.Output()
content=wdg.VBox([gtype,threadgraph])
entry=wdg.HBox([wdg.HTML(comment, layout=wdg.Layout(width='50%', margin='5em 0 0 0')),
                             content])

def threadGraph(gtype):
    threadgraph.clear_output(wait=True)
    with threadgraph:
        if gtype=="Number":
            threadsdf['covid'].plot(cmap=cmap)
        elif gtype=="Percentage":
            threadsdf['covid'].div(threadsdf['all']).plot(cmap=cmap)
        else:
            active_threads.plot(cmap=cmap)
        plt.show()
    
wdg.interactive_output(threadGraph, {'gtype': gtype})

display(entry)


In [None]:
comment="""The number of users who write their first contribution in a Covid thread likewise shows a peak 
and then stabilises at around 5% of total. Especially during the early phase of the pandemic, these users may
have been "lurkers" who have been spurred into posting by the Covid emergency."""

# add total number of posts and threads and users
firstpostdf=pd.read_json("data/first_post_thread_topic.dash.json")

gtype=wdg.ToggleButtons(options=['Number', 'Percentage'],
    description='New users posting on Covid: ',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['No. of 1st time posters writing about Covid', 
             '% of 1st time posters writing about Covid'],
#     icons=['check'] * 3
    style=style
)

firstpostgraph=wdg.Output()
content=wdg.VBox([gtype,firstpostgraph])
entry=wdg.HBox([content, wdg.HTML(comment, layout=txtlayout)])

def firstPostGraph(gtype):
    firstpostgraph.clear_output(wait=True)
    with firstpostgraph:
        if gtype=="Number":
            firstpostdf['covid'].plot(cmap=cmap)
        else:
            firstpostdf['covid'].div(firstpostdf['all']).plot(cmap=cmap)
        plt.show()
    
wdg.interactive_output(firstPostGraph, {'gtype': gtype})

display(entry) 

In [None]:
title=wdg.HTML('<h2 class="dash-subtitle">Threads about comorbid conditions</h2>')
wdg.HBox([logo, title])

In [None]:
txt="""We scan thread titles for signs that they are about Covid and broad categories of comorbid conditions.
We consider mental health problems, heart disease/stroke, diabetes, cancer, autoimmune diseases, and 
respiratory diseases. Use the interactive controls to explore this dataset. As can be seen, heart 
disease and stroke were frequently cited comorbidities in the early days of the pandemic; towards 
the end of the period we cover, the discussion of comorbidities shifts towards cancer, respiratory 
and autoimmune diseases. Mental health conditions remain important throughout the period."""

display(wdg.HTML(txt))

In [None]:
comorbiditydf=pd.read_json("data/thread_comorbidity_counts.dash.json")
comorbidityweighteddf=pd.read_json("data/thread_comorbidity_weighted.dash.json")

In [None]:



sectors=wdg.SelectMultiple(
    options=['mental', 'heart/stroke', 'diabetes', 'cancer', 'autoimmune', 'respiratory'],
    value=['mental', 'heart/stroke', 'diabetes', 'cancer', 'autoimmune', 'respiratory'],
    #rows=10,
    #description='Comorbidity',
    disabled=False
)

datestrings = [(i.strftime('%b-%d'), i) for i in comorbiditydf.index]
dates=wdg.SelectionRangeSlider(
    options=datestrings,
    index=(0, len(datestrings)-1),
    #description='Months (2015)',
    disabled=False,
    continuous_update=False,
    tooltips='Select date range for pie chart'
)

other=wdg.ToggleButton(
    value=False,
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Include threads mentioning any other comorbidity or none',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)


weighting=wdg.RadioButtons(
    options=['one single unit', 'its length in posts'],
    value='one single unit', # Defaults to 'pineapple'
    #    layout={'width': 'max-content'}, # If the items' names are long
    disabled=False
)


form_item_layout = wdg.Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)

# event handler for animation
def on_value_change(event):
    start=event['new']
    stop=start+30 if start>0 else len(datestrings)-1
    with pieplot:
        dates.index=(start, stop)
        pieChart(sectors.value,dates.value, other.value, weighting.value)

animate=wdg.Play(value=0, min=0, max=len(datestrings)-31, step=7, interval=1000, disabled=False)
animate.observe(on_value_change, names='value')

form_items = [
    wdg.Box([wdg.Label(value='Date range (2020)'), dates], layout=form_item_layout),
    wdg.Box([wdg.Label(value='Animate date range'), animate], layout=form_item_layout),
    wdg.Box([wdg.Label(value='Comorbid conditions'), sectors], layout=form_item_layout),
    wdg.Box([wdg.Label(value='Include threads with other/no comorbidity'), other], layout=form_item_layout),
    wdg.Box([wdg.Label(value='Count each thread as'), weighting], layout=form_item_layout),
]

form = wdg.Box(form_items, layout=wdg.Layout(
    display='flex',
    flex_flow='column',
    # border='solid 2px',
    align_items='stretch',
    width='50%',
    margin='2em 2em 0 0'
))

pieplot=wdg.Output(layout=wdg.Layout(
    display='flex',
    flex_flow='column',
    # border='solid 2px',
    align_items='stretch',
    #width='40%'
    height='22em'
    ))

entry=wdg.HBox([form, pieplot])

def pieChart(sectors,dates, other, weighting):
    if "unit" in weighting:
        datadf=comorbiditydf
    else:
        datadf=comorbidityweighteddf
    ccountsdf=datadf.loc[dates[0]:dates[1], :].sum()
    sectors=list(sectors)
    if other:
        sectors.append('other/none')
    labels=ccountsdf[sectors].index
    counts=ccountsdf[sectors].to_numpy()
    pieplot.clear_output(wait=True)
    with pieplot:
        if len(sectors)<=1: 
            print ("\n\n\nUse CTRL-Click to select more than one condition")
            return
        if ccountsdf[sectors].sum()==0:
            print("\n\n\nNo data - please select a wider date range")
            return
        # extract colors from colourmap
        colours = cmap(range(len(sectors)))
        plt.pie(counts, labels=labels, colors=colours)
        plt.show()


    
wdg.interactive_output(pieChart, {'sectors': sectors, 'dates': dates, 'other': other,
                                  'weighting': weighting})
display(entry)


In [None]:
title=wdg.HTML('<h2 class="dash-subtitle">Concerns of posters</h2>')
wdg.HBox([logo, title])

In [None]:
txt="""We survey users who post in Covid threads throughout the period. We scan a three-week window 
centred around each post and check if the author has in that time posted anywhere on the OHB about ICU,
Covid symptoms, treatments such as remdesivir or the controversial hydroxychloroquine, the use of masks,
vaccines, but also mental health symptoms such as anxiety, worrying, depression.  Mental health symptoms
are predominant across the entire period, but especially towards the beginning.  Mentions of symptoms,
the use of masks and the search for a vaccine are also common topics. In early May, about 10% of posters
express concerns about intensive care, while the discussion of hydroxychloroquine and the (less mediatic)
remdesivir remains limited."""

wdg.HTML(txt)

In [None]:
concernsdf=pd.read_json("data/covid_poster_concerns.dash.json")

In [None]:
concerns=wdg.SelectMultiple(
    options=['mental health', 'intensive care', 'symptoms', 'masks', 'vaccine', 'quinine', 'remdesivir'],
    value=['mental health', 'intensive care', 'symptoms', 'masks', 'vaccine', 'quinine', 'remdesivir'],
    #rows=10,
    description="Posters' concerns",
    disabled=False,
    style=style
)

ttable={'mental health': 'mental', 'intensive care': 'intensive', 'symptoms': 'symptom', 
        'masks': 'mask'}

gtype=wdg.ToggleButtons(options=['Number', 'Percentage'],
    description='Show posts as: ',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Number of posters mentioning concern', 'Posters mentioning concern as fraction of total'],
    style=style
)

form=wdg.VBox([concerns, gtype], layout=wdg.Layout(margin='2em 0 0 0'))

concernsplot=wdg.Output()

entry=wdg.HBox([concernsplot, form])


# add option to show absolute numbers
def concernsGraph(concerns, gtype):
    columns=[ttable.get(x,x) for x in concerns]
    concernsplot.clear_output(wait=True)
    with concernsplot:
        if len(columns)==0:
            print("\n\nClick on a list item to select a concern ")
            print("(CTRL-Click to select more than one)\t")
            return
        if gtype=='Number':
            concernsdf[columns].plot(cmap=cmap)
        else:
            concernsdf[columns].div(concernsdf['author'], axis=0).plot(cmap=cmap)
        plt.show()
        

wdg.interactive_output(concernsGraph, {'concerns': concerns, 'gtype': gtype})
display (entry)


In [None]:
title=wdg.HTML('<h2 class="dash-subtitle">Bibliography</h2>')
display(title)
wdg.HTML("""<ul>
<li>Rashmi Patel¹, Fabrizio Smeraldi¹, Maryam Abdollahyan, Jessica Irving, Conrad Bessant: 
Analysis of mental and physical disorders associated with COVID-19 in online health forums: a natural 
language processing study, BMJ Open 2021;11:e056601,  
<a href="https://bmjopen.bmj.com/content/11/11/e056601">doi: 10.1136/bmjopen-2021-056601</a>.</li>
<li>Rashmi Patel¹, Fabrizio Smeraldi¹, Maryam Abdollahyan, Jessica Irving, Conrad Bessant: 
Investigating mental and physical disorders associated with COVID-19 in online health forums, medRxiv, 
December 2020, 
<a href="https://doi.org/10.1101/2020.12.14.20248155">doi: 10.1101/2020.12.14.20248155v1</a>.
</li>
</ul>
(¹ Joint first authors)""")

In [None]:
title=wdg.HTML('<h2 class="dash-subtitle">Contact</h2>')
display(title)
wdg.HTML("""For more information or comments about this dashboard, 
         feel free to contact us through the 
         <a href="https://mebomine.com/">Mebomine website</a>.""")

In [None]:
title=wdg.HTML('<h2 class="dash-subtitle">Acknowledgements</h2>')
display(title)
wdg.HTML("""We gratefully acknowledge the support of:""")

In [None]:
logolayout=wdg.Layout(margin='0 16px 0 0', 
                      object_fit="contain") # preserve aspect ratio

capai=wdg.Image(
    value=open('resources/cap-ai.png','rb').read(),
    format='png',
    width="8%",
    layout=logolayout
    )
ati=wdg.Image(
    value=open('resources/alanturing.png','rb').read(),
    format='png',
    width="8%",
    layout=logolayout
    )
azure=wdg.Image(
    value=open('resources/azure.png','rb').read(),
    format='png',
    width="8%",
    layout=logolayout
    )
epsrc=wdg.Image(
    value=open('resources/epsrc.png','rb').read(),
    format='png',
    width="8%",
    layout=logolayout
    )
qmw=wdg.Image(
    value=open('resources/qmul.png','rb').read(),
    format='png',
    width="12%",
    layout=logolayout # wdg.Layout(margin='0 0 0 10px')
    )
qmi=wdg.Image(
    value=open('resources/qmi.png','rb').read(),
    format='png',
    width="12%",
    layout=logolayout # wdg.Layout(margin='0 0 0 10px')
    )
kings=wdg.Image(
    value=open('resources/kcl.png','rb').read(),
    format='png',
    width="8%",
    layout=logolayout
)
nhs=wdg.Image(
    value=open('resources/nhs.png','rb').read(),
    format='png',
    width="8%",
    layout=logolayout
)

ack=wdg.HBox([ati, azure, capai, epsrc, kings, nhs, qmi, qmw])
display(ack)

In [None]:
wdg.HTML("""Designed and developed by 
                    <a href="mailto:fabrizio@mebomine.com">Fabrizio Smeraldi</a>.""")