In [1]:
import requests
import json
import os
import sqlalchemy
import pandas as pd
import mdapi_functions as md
from IPython.display import display, Markdown
import markdown
from bokeh.plotting import figure, show
from bokeh.models import (Span, TabPanel, Tabs, ColumnDataSource, DataCube,
                          GroupingInfo, StringFormatter, SumAggregator,
                          TableColumn, HoverTool)
from bokeh.io import output_notebook
from math import pi
from datetime import datetime
import datacite_api_functions as dcf
import numpy as np
import data_doc_helper as dh

In [23]:
ds = dh.NHSEDataSet("IAPT")
dataset = ds.dataset

In [26]:
if dataset in ["MHSDS"]:
    display(Markdown("Cohort counts for the {} dataset will be issued in due course.".format(dataset)))

else:
    output_notebook(hide_banner=True)
    df = md.get_md_api_dsvs()
    df = df[(df["source"] == "nhsd") & (df["table"].str.startswith(dataset))].drop_duplicates(subset="table")

    df = df[~(df["table"] == "CSDS_group_sessions")]

    if dataset in ["HESAPC", "HESOP", "HESAE"]:
        df = df[df["table"] == dataset]

    tbl_names = []
    metrics_tables = []

    for i in range(0, len(df)):
        dfcc = md.get_nhse_cohort_counts(df.iloc[i]["table"])
        dfcc = dfcc[~dfcc['cohort'].isin(['GENSCOT', 'NICOLA', 'SABRE'])]
        tbl_names += len(dfcc) * [df.iloc[i]["table"]]
        dfcc["count"] = dfcc["count"].replace("<10", np.nan).astype(float)
        metrics_tables.append(dfcc)

    source = ColumnDataSource(data=dict(
        d0=tbl_names,
        d1=pd.concat(metrics_tables)["cohort"].to_list(),
        px=pd.concat(metrics_tables)["count"].to_list(),
    ))

    target = ColumnDataSource(data=dict(row_indices=[], labels=[]))

    formatter = StringFormatter(font_style='bold')

    columns = [
        TableColumn(field='d1', title='{} Dataset'.format(dataset), width=80, sortable=False, formatter=formatter),
        TableColumn(field='px', title='Participant Count', width=40, sortable=False, formatter=StringFormatter(text_align='right', nan_format='<10')),
    ]

    grouping = [
        GroupingInfo(getter='d0', aggregators=[SumAggregator(field_='px')]),
    ]

    cube = DataCube(source=source, columns=columns, grouping=grouping, target=target)
    display(Markdown("Click on the plus sign to see the number of participants represented in each dataset."))
    display(Markdown("**Table 2:** Participants from each LPS represented in the {} dataset in the UK LLC TRE. **Note:** Individual cohort counts of less than 10 are suppressed to 0 and are therefore excluded from total participant counts for datasets".format(dataset)))
    show(cube)

Click on the plus sign to see the number of participants represented in each dataset.

**Table 2:** Participants from each LPS represented in the IAPT dataset in the UK LLC TRE. **Note:** Individual cohort counts of less than 10 are suppressed to 0 and are therefore excluded from total participant counts for datasets