Display a dual axis histogram/line chart
- X axis: months
- Y axis bars: monthly publishing counts
- Y Axis line: monthly covid cases



In [None]:
import pandas as pd
import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
# read data from files
papersDF = pd.read_csv('data/Papers.csv', header=0)
covidDF = pd.read_csv('data/covid-19-all.csv', header=0)

Aggregate dataframes

Papers
- Month
- Number published in month

Covid
- Month
- Total confirmed cases in month

In [None]:
# format date strings as datetimes
papersDF['Date'] = pd.to_datetime(papersDF['Date'])
papersDF['CreatedDate'] = pd.to_datetime(papersDF['CreatedDate'])
covidDF['Date'] = pd.to_datetime(covidDF['Date'])

In [None]:
# get rid of most recent and future publications
papersDF = papersDF[papersDF['Date'] <= datetime.datetime(2020, 9, 30)].sort_values('Date')
papersDF.tail()


In [None]:
# get rid of most recent data
covidDF = covidDF[covidDF['Date'] <= datetime.datetime(2020, 9, 30)].sort_values('Date')
covidDF.tail()

In [None]:
# set up columns for aggregation
papersAggDF = papersDF[['PaperId', 'Date']]
papersAggDF.head()

In [None]:
# set up columns for aggregation
covidAggDF = covidDF[['Confirmed', 'Date']].fillna(0)
covidAggDF.head()

In [None]:
# aggregate monthly publication count
monthlyPublishTotalsDF = papersAggDF.groupby(pd.Grouper(key='Date', freq='M')) \
    .count() \
    .rename(columns={'PaperId':'Paper Count'})
monthlyPublishTotalsDF['Month'] = monthlyPublishTotalsDF.index.month.astype(str) + '/' + monthlyPublishTotalsDF.index.year.astype(str)
monthlyPublishTotalsDF.head(30)

In [None]:
# monthly case count
monthlyCovidTotalsDF = covidAggDF.groupby(pd.Grouper(key='Date', freq='M')).count()
monthlyCovidTotalsDF['Month'] = monthlyCovidTotalsDF.index.month.astype(str) + '/' + monthlyCovidTotalsDF.index.year.astype(str)
monthlyCovidTotalsDF.head(10)

Use Plottly to create chart
- X axis: months
- Y axis bars: monthly publishing counts
- Y Axis line: monthly covid cases

In [None]:
# configure secondary axis for this figure's subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])
# add traces
fig.add_trace(
    go.Bar(
        name='Unique publications',
        x=monthlyPublishTotalsDF['Month'], 
        y=monthlyPublishTotalsDF['Paper Count']
    ),
    secondary_y=False
)
fig.add_trace(
    go.Scatter(
        name='Confirmed COVID19 cases',
        x=monthlyCovidTotalsDF['Month'], 
        y=monthlyCovidTotalsDF['Confirmed'],
        mode='lines'
    ),
    secondary_y=True
)
# layout
fig.update_layout(title='Global publications and COVID19 cases by month')

fig.show()