## Import Libraries & Setting up

In [1]:
import os
from functools import reduce
from sqlalchemy import create_engine
import psycopg2
import numpy as np
import pandas as pd
import datetime as dt
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 999)
pd.set_option('display.max_colwidth', 999)

In [7]:
# Set some parameters (note: All the timestamps stored in the database are in the UTC timezone)
today = dt.datetime.utcnow()
yday = (today - dt.timedelta(days=1)).strftime('%Y-%m-%d') #based on the previous day’s data

## Credentials & Connecting (PDR PostgreSQL)

In [65]:
# Insert your username into the textbox and hit enter
username = input("Please enter your username: ")

Please enter your username: username


In [64]:
# Insert your password into the textbox and hit enter
password = input("Please enter your password: ")

Please enter your password: password


In [5]:
db_postgres = create_engine('postgresql://'+username+':'+password+'@localhost:7005/drc')

## Data

In [None]:
query = 'select * from pdr.fn_report_peo_overview_v3_2(%s)'
args = [yday]
data = pd.read_sql(query, db_postgres, params=args)
data = pd.read_sql(query, db_postgres)
data.head()

## Participant Enrollment Overview (PEO) - Tab 1

### Enrollment Status (non-overlapping)
      - REGISTERED = 0
      - PARTICIPANT = 1
      - FULLY_CONSENTED = 2 (PARTICIPANT PLUS EHR)
      - ENROLLED_PARTICIPANT = 3
      - CORE_MINUS_PM = 4
      - CORE_PARTICIPANT = 5
      - PMB_ELIGIBLE = 6

In [14]:
Total_Registered = data.groupby(['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Total Registered Individuals'}).sort_values(['organization_type', 'awardee_name'])
Total_Participants_Consented = \
data.loc[(data['total_participants_consented'] == 1)].groupby(['organization_type', 'awardee_name'])[
    'participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Total Participants'}).sort_values(['organization_type', 'awardee_name'])

total_df = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name'], how='left'),
                  [Total_Registered, Total_Participants_Consented])

for c in total_df.columns:
    total_df = total_df.rename(columns={c: ('', c)})

total_df.columns = pd.MultiIndex.from_tuples(tuple(total_df.columns))

total_df.loc['Overall', :] = total_df.sum().values
total_df = total_df.rename(index={'': 'All Awardees'})
total_df = total_df.reindex(['Overall', 'HPO', 'FQHC', 'DV', 'VA', 'HPO-Lite', 'UNSET'], level='organization_type')
pd.set_option('display.float_format', '{:,.0f}'.format)

In [19]:
# enrollment from yesterday
enrollment_df = data.pivot_table(values='participant_id', index=['organization_type', 'awardee_name'],
                                 columns='enrollment_status', aggfunc='nunique').reset_index()
enrollment_df = enrollment_df[
    ['organization_type', 'awardee_name', 'REGISTERED', 'PARTICIPANT', 'CORE_MINUS_PM',
     'CORE_PARTICIPANT', 'ENROLLED_PARTICIPANT', 'PMB_ELIGIBLE']]
enrollment_df = enrollment_df.rename(
    columns={'REGISTERED': 'Registered Individuals', 'PARTICIPANT': 'Participants',
             'CORE_MINUS_PM': 'Total Core -PM',
             'CORE_PARTICIPANT': 'Total Core Participant',
             'ENROLLED_PARTICIPANT': 'Enrolled Participants',
             'PMB_ELIGIBLE': 'PM&B Eligible'}).set_index(['organization_type', 'awardee_name'])

# enrollment since previous report run
enroll_sinceLast_rp_df = data.pivot_table(values='participant_id', index=['organization_type', 'awardee_name'],
                                          columns='enrollment_status_since_last_report',
                                          aggfunc='nunique').reset_index()
enroll_sinceLast_rp_df = enroll_sinceLast_rp_df[
    ['organization_type', 'awardee_name', 'REGISTERED', 'PARTICIPANT', 'CORE_MINUS_PM',
     'CORE_PARTICIPANT', 'ENROLLED_PARTICIPANT', 'PMB_ELIGIBLE']]
enroll_sinceLast_rp_df = enroll_sinceLast_rp_df.rename(
    columns={'REGISTERED': 'Registered_prev', 'PARTICIPANT': 'Participant_prev',
             'CORE_MINUS_PM': 'Core -PM_prev',
             'CORE_PARTICIPANT': 'Core Participant_prev',
             'ENROLLED_PARTICIPANT': 'Enrolled_prev',
             'PMB_ELIGIBLE': 'PMB_prev'}).set_index(['organization_type', 'awardee_name'])

# weekly tally
weekly_tally_df = data.pivot_table(values='participant_id', index=['organization_type', 'awardee_name'],
                                   columns='weekly_tally', aggfunc='nunique')
weekly_tally_df = weekly_tally_df.rename(columns={'CORE_MINUS_PM': 'CPM_prev_week', 'CORE_PARTICIPANT': 'CP_prev_week'})

# previous weekly tally
prev_weekly_tally_df = data.pivot_table(values='participant_id', index=['organization_type', 'awardee_name'],
                                   columns='prev_weekly_tally', aggfunc='nunique')
prev_weekly_tally_df = prev_weekly_tally_df.rename(
    columns={'CORE_MINUS_PM': 'CPM_prev_two_week', 'CORE_PARTICIPANT': 'CP_prev_two_week'})

# merge
enroll_full_df = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name'], how='left'),
                        [enrollment_df, enroll_sinceLast_rp_df, weekly_tally_df, prev_weekly_tally_df])
enroll_full_df.loc['Overall', :] = enroll_full_df.sum().values
enroll_full_df = enroll_full_df.rename(index={'': 'All Awardees'})
enroll_full_df = enroll_full_df.fillna(0)
pd.set_option('display.float_format', '{:,.0f}'.format)

# Date cols
enroll_full_df['Registered Since Last Report'] = enroll_full_df['Registered Individuals'] - enroll_full_df[
    'Registered_prev']
enroll_full_df['Participant Since Last Report'] = enroll_full_df['Participants'] - enroll_full_df[
    'Participant_prev']
enroll_full_df['Enrolled Since Last Report'] = enroll_full_df['Enrolled Participants'] - enroll_full_df[
    'Enrolled_prev']
enroll_full_df['PMB Since Last Report'] = enroll_full_df['PM&B Eligible'] - enroll_full_df[
    'PMB_prev']
enroll_full_df['Since Last Report'] = enroll_full_df['Total Core -PM'] - enroll_full_df['Core -PM_prev']
enroll_full_df['CP Since Last Report'] = enroll_full_df['Total Core Participant'] - enroll_full_df[
    'Core Participant_prev']
enroll_full_df['Weekly tally'] = enroll_full_df['Total Core -PM'] - enroll_full_df['CPM_prev_week']
enroll_full_df['CP Weekly Tally'] = enroll_full_df['Total Core Participant'] - enroll_full_df['CP_prev_week']
enroll_full_df['Last CP Weekly Tally'] = enroll_full_df['CP_prev_week'] - enroll_full_df['CP_prev_two_week']
enroll_full_df = enroll_full_df.drop(
    ['Registered_prev', 'Participant_prev', 'Core -PM_prev', 'Core Participant_prev',
     'CPM_prev_week', 'CP_prev_week'], axis=1)
enroll_full_df = enroll_full_df[
    ['Registered Individuals', 'Registered Since Last Report', 'Participants', 'Participant Since Last Report',
     'Enrolled Participants', 'Enrolled Since Last Report',
     'PM&B Eligible', 'PMB Since Last Report', 'Total Core -PM', 'Since Last Report', 'Weekly tally',
     'Total Core Participant', 'CP Since Last Report', 'CP Weekly Tally', 'Last CP Weekly Tally']]

for c in enroll_full_df.columns:
    enroll_full_df = enroll_full_df.rename(columns={c: ('Enrollment Status (non-overlapping)', c)})

enroll_full_df.columns = pd.MultiIndex.from_tuples(tuple(enroll_full_df.columns))

###  UBR of Core Participants
   - Overall	
   - Racial Identity	
   - Age	
   - Sex	
   - SGM	
   - Income
   - Education	
   - Geography
   - Disability
   - Healthcare Access


In [21]:
ubr_overall_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_overall'] == 1)].groupby(
        ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
        columns={'participant_id': 'Overall'}).sort_values(['organization_type', 'awardee_name'])
ubr_ethnicity_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_ethnicity'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Racial Identity'}).sort_values(['organization_type', 'awardee_name'])
ubr_age_at_consent_c = \
data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_age_at_consent'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Age'}).sort_values(['organization_type', 'awardee_name'])
ubr_sex_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_sex'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Sex'}).sort_values(['organization_type', 'awardee_name'])
ubr_sexual_gender_minority_c = \
data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_sexual_gender_minority'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'SGM'}).sort_values(['organization_type', 'awardee_name'])
ubr_income_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_income'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Income'}).sort_values(['organization_type', 'awardee_name'])
ubr_education_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_education'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Education'}).sort_values(['organization_type', 'awardee_name'])
ubr_geography_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_geography'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Geography'}).sort_values(['organization_type', 'awardee_name'])
ubr_disability_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_disability'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Disability'}).sort_values(['organization_type', 'awardee_name'])
ubr_healthcare_access_c = data.loc[(data['enrollment_status'] == 'CORE_PARTICIPANT') & (data['ubr_healthcare_access'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Healthcare Access & Utilization'}).sort_values(['organization_type', 'awardee_name'])

In [22]:
ubr_core_df = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name'], how='left'),
                         [Total_Registered, ubr_overall_c, ubr_ethnicity_c, ubr_age_at_consent_c, ubr_sex_c,
                          ubr_sexual_gender_minority_c, ubr_income_c, ubr_education_c, ubr_geography_c, ubr_disability_c,
                          ubr_healthcare_access_c])
ubr_core_df = ubr_core_df.drop(['Total Registered Individuals'], axis=1)
ubr_core_df.loc['Overall', :] = ubr_core_df.sum().values
ubr_core_df = ubr_core_df.rename(index={'': 'All Awardees'})

ubr_core_df = ubr_core_df.div(
    enroll_full_df[('Enrollment Status (non-overlapping)', 'Total Core Participant')].astype(float), axis=0)
ubr_core_df = ubr_core_df.fillna(0)
pd.set_option('display.float_format', '{:.2%}'.format)

for c in ubr_core_df.columns:
    ubr_core_df = ubr_core_df.rename(columns={c: ('UBR of Core Participants', c)})

ubr_core_df.columns = pd.MultiIndex.from_tuples(tuple(ubr_core_df.columns))

### UBR of Enrolled Participants
   - Overall	
   - Racial Identity	
   - Age	
   - Sex	
   - SGM	
   - Income
   - Education	
   - Geography
   - Disability
   - Healthcare Access

In [23]:
ubr_overall_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_overall'] == 1)].groupby(
        ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
        columns={'participant_id': 'Overall'}).sort_values(['organization_type', 'awardee_name'])
ubr_ethnicity_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_ethnicity'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Racial Identity'}).sort_values(['organization_type', 'awardee_name'])
ubr_age_at_consent_cpm = \
data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_age_at_consent'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Age'}).sort_values(['organization_type', 'awardee_name'])
ubr_sex_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_sex'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Sex'}).sort_values(['organization_type', 'awardee_name'])
ubr_sexual_gender_minority_cpm = \
data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_sexual_gender_minority'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'SGM'}).sort_values(['organization_type', 'awardee_name'])
ubr_income_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_income'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Income'}).sort_values(['organization_type', 'awardee_name'])
ubr_education_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_education'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Education'}).sort_values(['organization_type', 'awardee_name'])
ubr_geography_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_geography'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Geography'}).sort_values(['organization_type', 'awardee_name'])
ubr_disability_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_disability'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Disability'}).sort_values(['organization_type', 'awardee_name'])
ubr_healthcare_access_cpm = data.loc[(data['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data['ubr_healthcare_access'] == 1)].groupby(
    ['organization_type', 'awardee_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Healthcare Access & Utilization'}).sort_values(['organization_type', 'awardee_name'])

In [24]:
ubr_enrolled_df = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name'], how='left'),
                                  [Total_Registered, ubr_overall_cpm, ubr_ethnicity_cpm, ubr_age_at_consent_cpm,
                                   ubr_sex_cpm, ubr_sexual_gender_minority_cpm, ubr_income_cpm, ubr_education_cpm,
                                   ubr_geography_cpm, ubr_disability_cpm, ubr_healthcare_access_cpm])
ubr_enrolled_df = ubr_enrolled_df.drop(['Total Registered Individuals'], axis=1)
ubr_enrolled_df.loc['Overall', :] = ubr_enrolled_df.sum().values
ubr_enrolled_df = ubr_enrolled_df.rename(index={'': 'All Awardees'})

ubr_enrolled_df = ubr_enrolled_df.div(
    enroll_full_df[('Enrollment Status (non-overlapping)', 'Enrolled Participants')].astype(float), axis=0)
ubr_enrolled_df = ubr_enrolled_df.fillna(0)
pd.set_option('display.float_format', '{:.2%}'.format)

for c in ubr_enrolled_df.columns:
    ubr_enrolled_df = ubr_enrolled_df.rename(columns={c: ('UBR of Enrolled Participants', c)})

ubr_enrolled_df.columns = pd.MultiIndex.from_tuples(tuple(ubr_enrolled_df.columns))

In [25]:
# Both UBR metrics
ubr_dfs = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name'], how='left'),
                     [ubr_core_df, ubr_enrolled_df])

###  Gender Identity/Racial Identity/Age

In [27]:
query = 'select * from pdr.fn_report_peo_gender_race_age(%s)'
args = [yday]
demog = pd.read_sql(query, db_postgres, params=args)
demog.head()

Unnamed: 0,participant_id,organization_type,awardee_name,organization_name,site_name,gender_identity,race_ethnicity,age_group
0,100002184,HPO,Southern Consortium,University of Mississippi Medical Center,"University of Mississippi Medical Center Main, Jackson",Man,White,18-24
1,100005046,HPO,New York,New York Presbyterian Columbia University,Columbia University Irving Medical Center (CUIMC),Woman,Black or African American,55-64
2,100005674,DV,ExamOne,ExamOne,ExamOne_MEA3,Woman,Asian,25-34
3,100006166,HPO,Arizona,Banner Health,Banner Boswell Medical Center,Woman,"Hispanic, Latino, or Spanish",45-54
4,100007606,DV,San Diego Blood Bank,DV San Diego Blood Bank,San Diego Blood Bank Gateway Donor Center (SDBB DV),Woman,"Hispanic, Latino, or Spanish",65-74


In [28]:
gender_df = demog.pivot_table(values='participant_id', index=['organization_type', 'awardee_name'],
                                  columns='gender_identity', aggfunc='nunique').reset_index()
race_df = demog.pivot_table(values='participant_id', index=['organization_type', 'awardee_name'],
                            columns='race_ethnicity', aggfunc='nunique').reset_index()
age_df = demog.pivot_table(values='participant_id', index=['organization_type', 'awardee_name'], columns='age_group',
                           aggfunc='nunique').reset_index()

In [29]:
# Gender
gender_df = gender_df[
    ['organization_type', 'awardee_name', 'Man', 'Non-Binary', 'Other/Addl. Options', 'Transgender', 'Woman',
     'Multiple Selections', 'Skipped', 'Prefer not to say']]
gender_df = gender_df.set_index(['organization_type', 'awardee_name'])

for c in gender_df.columns:
    gender_df = gender_df.rename(columns={c: ('Gender Identity', c)})

gender_df.columns = pd.MultiIndex.from_tuples(tuple(gender_df.columns))

gender_df.loc['Overall', :] = gender_df.sum().values
gender_df = gender_df.rename(index={'': 'All Awardees'})

# Race
race_df = race_df[['organization_type', 'awardee_name', 'Asian', 'Black and Hispanic, Latino, or Spanish',
                   'Black or African American', 'Hispanic, Latino, or Spanish', 'Middle Eastern or North African',
                   'More than one race', 'More than one race and Hispanic, Latino, or Spanish',
                   'Native Hawaiian or other Pacific Islander', 'One other race and Hispanic, Latino, or Spanish',
                   'Other race', 'Prefer not to say', 'White', 'White and Hispanic, Latino, or Spanish', 'Skipped']]
race_df = race_df.set_index(['organization_type', 'awardee_name'])

for c in race_df.columns:
    race_df = race_df.rename(columns={c: ('Racial Identity', c)})

race_df.columns = pd.MultiIndex.from_tuples(tuple(race_df.columns))

race_df.loc['Overall', :] = race_df.sum().values
race_df = race_df.rename(index={'': 'All Awardees'})

# Age
age_df = age_df.set_index(['organization_type', 'awardee_name'])

for c in age_df.columns:
    age_df = age_df.rename(columns={c: ('Age', c)})

age_df.columns = pd.MultiIndex.from_tuples(tuple(age_df.columns))

age_df.loc['Overall', :] = age_df.sum().values
age_df = age_df.rename(index={'': 'All Awardees'})

In [30]:
# All Demographics
demog_df = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name'], how='left'),
                  [gender_df, race_df, age_df])
demog_df = demog_df.fillna(0)
pd.set_option('display.float_format', '{:,.0f}'.format)

### Combined dfs

In [31]:
all_dfs_tab1 = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name'], how='left'),
                          [total_df, enroll_full_df, ubr_dfs, demog_df])

all_dfs_tab1.index = all_dfs_tab1.index.rename(['Type', 'Awardee'])

all_dfs_tab1 = all_dfs_tab1.rename(index={'HPO': 'RMC', 'UNSET': 'UNPAIRED', 'No organization set': 'Unpaired',
                                          'United States Department of Veteran Affairs': 'VA',
                                          'Cherokee Health Systems': 'Cherokee',
                                          'Community Health Center, Inc': 'Community Health Center',
                                          'Eau Claire Cooperative Health Center': 'Eau Claire',
                                          'Hudson River Health Care, Inc.': 'HRHCare',
                                          'Jackson-Hinds Comprehensive Health Center': 'Jackson-Hinds',
                                          'San Ysidro Health Center': 'San Ysidro',
                                          'California Precision Medicine Consortium': 'California',
                                          'New England Precision Medicine Consortium': 'New England',
                                          'Pittsburgh': 'PITT', 'Southern Consortium': 'Southern',
                                          'Trans-American Consortium for the Health Care Systems Research Network (TACH)': 'Trans-America',
                                          'University of Texas Health Science Center at Houston': 'UT_HEALTH',
                                          'Virginia Commonwealth University': 'VCU',
                                          'Washington University in St. Louis': 'WASH U',
                                          'Wisconsin Consortium': 'Wisconsin', 'Quest Labs': 'Quest'})

In [32]:
# Organize and Rename some columns
tab1_final = all_dfs_tab1
tab1_final = tab1_final.rename(
    columns={'Registered Since Last Report': 'Since Last Report', 'Participant Since Last Report': 'Since Last Report'})
tab1_final.index = tab1_final.index.rename(['Type', 'Awardee / Organization / Sites'])

# no data 0-17 age bucket
tab1_final.insert(loc=59, column=('Age', '0-17'), value=0)

## Participant Enrollment Overview (PEO) - Tab 2 (Including Orgs & Sites)

### Organizations

#### Enrollment Status (non-overlapping)

In [33]:
data_org = data.copy()
data_org['organization_name'] = data_org['organization_name'].fillna('zUnpaired')

In [34]:
Total_Registered = data_org.groupby(['organization_type', 'awardee_name', 'organization_name'])[
        'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Total Registered Individuals'}).sort_values(
        ['organization_type', 'awardee_name', 'organization_name']).astype(float)
Total_Participants_Consented = data_org.loc[(data_org['total_participants_consented'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Total Participants'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name']).astype(float)

total_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name'], how='left'),
    [Total_Registered, Total_Participants_Consented])

for c in total_df.columns:
    total_df = total_df.rename(columns={c: ('', c)})

total_df.columns = pd.MultiIndex.from_tuples(tuple(total_df.columns))

total_df = total_df.reindex(['Overall', 'HPO', 'FQHC', 'DV', 'VA', 'HPO-Lite', 'UNSET'], level='organization_type')
pd.set_option('display.float_format', '{:,.0f}'.format)

In [35]:
# enrollment from yesterday
enrollment_df = data_org.pivot_table(values='participant_id',
                                     index=['organization_type', 'awardee_name', 'organization_name'],
                                     columns='enrollment_status', aggfunc='nunique').reset_index()
enrollment_df = enrollment_df[
    ['organization_type', 'awardee_name', 'organization_name', 'REGISTERED', 'PARTICIPANT',
     'CORE_MINUS_PM', 'CORE_PARTICIPANT', 'ENROLLED_PARTICIPANT', 'PMB_ELIGIBLE']]
enrollment_df = enrollment_df.rename(
    columns={'REGISTERED': 'Registered Individuals', 'PARTICIPANT': 'Participants',
             'CORE_MINUS_PM': 'Total Core -PM',
             'CORE_PARTICIPANT': 'Total Core Participant',
             'ENROLLED_PARTICIPANT': 'Enrolled Participants',
             'PMB_ELIGIBLE': 'PM&B Eligible'}).set_index(
    ['organization_type', 'awardee_name', 'organization_name'])

# enrollment since previous report run
enroll_sinceLast_rp_df = data_org.pivot_table(values='participant_id',
                                              index=['organization_type', 'awardee_name', 'organization_name'],
                                              columns='enrollment_status_since_last_report',
                                              aggfunc='nunique').reset_index()
enroll_sinceLast_rp_df = enroll_sinceLast_rp_df[
    ['organization_type', 'awardee_name', 'organization_name', 'REGISTERED', 'PARTICIPANT',
     'CORE_MINUS_PM', 'CORE_PARTICIPANT', 'ENROLLED_PARTICIPANT', 'PMB_ELIGIBLE']]
enroll_sinceLast_rp_df = enroll_sinceLast_rp_df.rename(
    columns={'REGISTERED': 'Registered_prev', 'PARTICIPANT': 'Participant_prev',
             'CORE_MINUS_PM': 'Core -PM_prev',
             'CORE_PARTICIPANT': 'Core Participant_prev',
             'ENROLLED_PARTICIPANT': 'Enrolled_prev',
             'PMB_ELIGIBLE': 'PMB_prev'}).set_index(
    ['organization_type', 'awardee_name', 'organization_name'])

# weekly tally
weekly_tally_df = data_org.pivot_table(values='participant_id',
                                       index=['organization_type', 'awardee_name', 'organization_name'],
                                       columns='weekly_tally', aggfunc='nunique')
weekly_tally_df = weekly_tally_df.rename(columns={'CORE_MINUS_PM': 'CPM_prev_week', 'CORE_PARTICIPANT': 'CP_prev_week'})

# previous weekly tally
prev_weekly_tally_df = data_org.pivot_table(values='participant_id',
                                       index=['organization_type', 'awardee_name', 'organization_name'],
                                       columns='prev_weekly_tally', aggfunc='nunique')
prev_weekly_tally_df = prev_weekly_tally_df.rename(columns={'CORE_MINUS_PM': 'CPM_prev_two_week', 'CORE_PARTICIPANT': 'CP_prev_two_week'})

# merge
enroll_full_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name'], how='left'),
    [enrollment_df, enroll_sinceLast_rp_df, weekly_tally_df, prev_weekly_tally_df])
enroll_full_df.loc['Overall', :] = enroll_full_df.sum().values
enroll_full_df = enroll_full_df.rename(index={'': 'All Awardees'})
enroll_full_df = enroll_full_df.fillna(0)
pd.set_option('display.float_format', '{:,.0f}'.format)

# Date cols
enroll_full_df['Registered Since Last Report'] = enroll_full_df['Registered Individuals'] - enroll_full_df[
    'Registered_prev']
enroll_full_df['Participant Since Last Report'] = enroll_full_df['Participants'] - enroll_full_df[
    'Participant_prev']
enroll_full_df['Enrolled Since Last Report'] = enroll_full_df['Enrolled Participants'] - enroll_full_df[
    'Enrolled_prev']
enroll_full_df['PMB Since Last Report'] = enroll_full_df['PM&B Eligible'] - enroll_full_df[
    'PMB_prev']
enroll_full_df['Since Last Report'] = enroll_full_df['Total Core -PM'] - enroll_full_df['Core -PM_prev']
enroll_full_df['CP Since Last Report'] = enroll_full_df['Total Core Participant'] - enroll_full_df[
    'Core Participant_prev']
enroll_full_df['Weekly tally'] = enroll_full_df['Total Core -PM'] - enroll_full_df['CPM_prev_week']
enroll_full_df['CP Weekly Tally'] = enroll_full_df['Total Core Participant'] - enroll_full_df['CP_prev_week']
enroll_full_df['Last CP Weekly Tally'] = enroll_full_df['CP_prev_week'] - enroll_full_df['CP_prev_two_week']
enroll_full_df = enroll_full_df.drop(
    ['Registered_prev', 'Participant_prev', 'Core -PM_prev', 'Core Participant_prev',
     'CPM_prev_week', 'CP_prev_week', 'CPM_prev_two_week', 'CP_prev_two_week'], axis=1)
enroll_full_df = enroll_full_df[
    ['Registered Individuals', 'Registered Since Last Report', 'Participants', 'Participant Since Last Report',
     'Enrolled Participants', 'Enrolled Since Last Report',
     'PM&B Eligible', 'PMB Since Last Report', 'Total Core -PM', 'Since Last Report',
     'Weekly tally', 'Total Core Participant', 'CP Since Last Report', 'CP Weekly Tally', 'Last CP Weekly Tally']]

for c in enroll_full_df.columns:
    enroll_full_df = enroll_full_df.rename(columns={c: ('Enrollment Status (non-overlapping)', c)})

enroll_full_df.columns = pd.MultiIndex.from_tuples(tuple(enroll_full_df.columns))

#### UBR of Core Participants

In [36]:
ubr_overall_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_overall'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Overall'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_ethnicity_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_ethnicity'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Racial Identity'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name'])
ubr_age_at_consent_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_age_at_consent'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Age'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_sex_c = data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_sex'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Sex'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_sexual_gender_minority_c = data_org.loc[
    (data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_sexual_gender_minority'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'SGM'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_income_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_income'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Income'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_education_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_education'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Education'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_geography_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_geography'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Geography'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_disability_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_disability'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Disability'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_healthcare_access_c = \
data_org.loc[(data_org['enrollment_status'] == 'CORE_PARTICIPANT') & (data_org['ubr_healthcare_access'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Healthcare Access & Utilization'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])

In [37]:
ubr_core_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name'], how='left'),
    [Total_Registered, ubr_overall_c, ubr_ethnicity_c, ubr_age_at_consent_c, ubr_sex_c, ubr_sexual_gender_minority_c,
     ubr_income_c, ubr_education_c, ubr_geography_c, ubr_disability_c, ubr_healthcare_access_c])
ubr_core_df = ubr_core_df.drop(['Total Registered Individuals'], axis=1)

ubr_core_df = ubr_core_df.div(
    enroll_full_df[('Enrollment Status (non-overlapping)', 'Total Core Participant')].astype(float), axis=0)
ubr_core_df = ubr_core_df.fillna(0)
pd.set_option('display.float_format', '{:.2%}'.format)

for c in ubr_core_df.columns:
    ubr_core_df = ubr_core_df.rename(columns={c: ('UBR of Core Participants', c)})

ubr_core_df.columns = pd.MultiIndex.from_tuples(tuple(ubr_core_df.columns))

#### UBR of Enrolled Participants

In [38]:
ubr_overall_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_overall'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Overall'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_ethnicity_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_ethnicity'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Racial Identity'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name'])
ubr_age_at_consent_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_age_at_consent'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Age'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_sex_cpm = data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_sex'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Sex'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_sexual_gender_minority_cpm = data_org.loc[
    (data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_sexual_gender_minority'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'SGM'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_income_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_income'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Income'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_education_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_education'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Education'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_geography_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_geography'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Geography'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_disability_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_disability'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Disability'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])
ubr_healthcare_access_cpm = \
data_org.loc[(data_org['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_org['ubr_healthcare_access'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name'])['participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Healthcare Access & Utilization'}).sort_values(['organization_type', 'awardee_name', 'organization_name'])

In [39]:
ubr_enrolled_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name'], how='left'),
    [Total_Registered, ubr_overall_cpm, ubr_ethnicity_cpm, ubr_age_at_consent_cpm, ubr_sex_cpm,
     ubr_sexual_gender_minority_cpm, ubr_income_cpm, ubr_education_cpm, ubr_geography_cpm, ubr_disability_cpm,
     ubr_healthcare_access_cpm])
ubr_enrolled_df = ubr_enrolled_df.drop(['Total Registered Individuals'], axis=1)

ubr_enrolled_df = ubr_enrolled_df.div(
    enroll_full_df[('Enrollment Status (non-overlapping)', 'Enrolled Participants')].astype(float), axis=0)
ubr_enrolled_df = ubr_enrolled_df.fillna(0)
pd.set_option('display.float_format', '{:.2%}'.format)

for c in ubr_enrolled_df.columns:
    ubr_enrolled_df = ubr_enrolled_df.rename(columns={c: ('UBR of Core -PM Participants', c)})

ubr_enrolled_df.columns = pd.MultiIndex.from_tuples(tuple(ubr_enrolled_df.columns))

In [40]:
# Both UBR metrics
ubr_dfs = reduce(lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name'], how='left'),
             [ubr_core_df, ubr_enrolled_df])

#### Gender Identity/Racial Identity/Age

In [41]:
demog_org = demog.copy()
demog_org['organization_name'] = demog_org['organization_name'].fillna('zUnpaired')

In [42]:
gender_df = demog_org.pivot_table(values='participant_id',
                                  index=['organization_type', 'awardee_name', 'organization_name'],
                                  columns='gender_identity', aggfunc='nunique').reset_index()
race_df = demog_org.pivot_table(values='participant_id',
                                index=['organization_type', 'awardee_name', 'organization_name'],
                                columns='race_ethnicity', aggfunc='nunique').reset_index()
age_df = demog_org.pivot_table(values='participant_id',
                               index=['organization_type', 'awardee_name', 'organization_name'], columns='age_group',
                               aggfunc='nunique').reset_index()

In [43]:
# Gender
gender_df = gender_df[
    ['organization_type', 'awardee_name', 'organization_name', 'Man', 'Non-Binary', 'Other/Addl. Options',
     'Transgender', 'Woman', 'Multiple Selections', 'Skipped', 'Prefer not to say']]
gender_df = gender_df.set_index(['organization_type', 'awardee_name', 'organization_name'])

for c in gender_df.columns:
    gender_df = gender_df.rename(columns={c: ('Gender Identity', c)})

gender_df.columns = pd.MultiIndex.from_tuples(tuple(gender_df.columns))

# Race
race_df = race_df[
    ['organization_type', 'awardee_name', 'organization_name', 'Asian', 'Black and Hispanic, Latino, or Spanish',
     'Black or African American', 'Hispanic, Latino, or Spanish', 'Middle Eastern or North African',
     'More than one race', 'More than one race and Hispanic, Latino, or Spanish',
     'Native Hawaiian or other Pacific Islander', 'One other race and Hispanic, Latino, or Spanish', 'Other race',
     'Prefer not to say', 'White', 'White and Hispanic, Latino, or Spanish', 'Skipped']]
race_df = race_df.set_index(['organization_type', 'awardee_name', 'organization_name'])

for c in race_df.columns:
    race_df = race_df.rename(columns={c: ('Racial Identity', c)})

race_df.columns = pd.MultiIndex.from_tuples(tuple(race_df.columns))

# Age
age_df = age_df.set_index(['organization_type', 'awardee_name', 'organization_name'])

for c in age_df.columns:
    age_df = age_df.rename(columns={c: ('Age', c)})

age_df.columns = pd.MultiIndex.from_tuples(tuple(age_df.columns))

In [44]:
# All Demographics
demog_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name'], how='left'),
    [gender_df, race_df, age_df])
demog_df = demog_df.fillna(0)
pd.set_option('display.float_format', '{:,.0f}'.format)

#### Combined dfs

In [45]:
all_dfs_tab2_orgs = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name'], how='left'),
    [total_df, enroll_full_df, ubr_dfs, demog_df]).fillna(0)
all_dfs_tab2_orgs.index = all_dfs_tab2_orgs.index.rename(['Type', 'Awardee', 'Organization'])
all_dfs_tab2_orgs = all_dfs_tab2_orgs.rename(
    index={'HPO': 'RMC', 'UNSET': 'UNPAIRED', 'No organization set': 'Unpaired',
           'United States Department of Veteran Affairs': 'VA',
           'Cherokee Health Systems': 'Cherokee', 'Community Health Center, Inc': 'Community Health Center',
           'Eau Claire Cooperative Health Center': 'Eau Claire', 'Hudson River Health Care, Inc.': 'HRHCare',
           'Jackson-Hinds Comprehensive Health Center': 'Jackson-Hinds', 'San Ysidro Health Center': 'San Ysidro',
           'California Precision Medicine Consortium': 'California',
           'New England Precision Medicine Consortium': 'New England',
           'Pittsburgh': 'PITT', 'Southern Consortium': 'Southern',
           'Trans-American Consortium for the Health Care Systems Research Network (TACH)': 'Trans-America',
           'University of Texas Health Science Center at Houston': 'UT_HEALTH',
           'Virginia Commonwealth University': 'VCU',
           'Washington University in St. Louis': 'WASH U', 'Wisconsin Consortium': 'Wisconsin', 'Quest Labs': 'Quest'})

### Sites

#### Enrollment Status (non-overlapping)

In [46]:
data_site = data.copy()
data_site['site_name'] = data_site['site_name'].fillna('zUnpaired')

In [47]:
Total_Registered = data_site.groupby(['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Total Registered Individuals'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name']).astype(float)
Total_Participants_Consented = data_site.loc[(data['total_participants_consented'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(
    columns={'participant_id': 'Total Participants'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name']).astype(float)

total_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name', 'site_name'], how='left'),
    [Total_Registered, Total_Participants_Consented])

for c in total_df.columns:
    total_df = total_df.rename(columns={c: ('', c)})

total_df.columns = pd.MultiIndex.from_tuples(tuple(total_df.columns))

total_df = total_df.reindex(['Overall', 'HPO', 'FQHC', 'DV', 'VA', 'HPO-Lite', 'UNSET'], level='organization_type')
pd.set_option('display.float_format', '{:,.0f}'.format)

In [48]:
# enrollment from yesterday
enrollment_df = data_site.pivot_table(values='participant_id',
                                      index=['organization_type', 'awardee_name', 'organization_name', 'site_name'],
                                      columns='enrollment_status', aggfunc='nunique').reset_index()
enrollment_df = enrollment_df[
    ['organization_type', 'awardee_name', 'organization_name', 'site_name', 'REGISTERED', 'PARTICIPANT',
     'CORE_MINUS_PM', 'CORE_PARTICIPANT', 'ENROLLED_PARTICIPANT', 'PMB_ELIGIBLE']]
enrollment_df = enrollment_df.rename(
    columns={'REGISTERED': 'Registered Individuals', 'PARTICIPANT': 'Participants',
             'CORE_MINUS_PM': 'Total Core -PM',
             'CORE_PARTICIPANT': 'Total Core Participant',
             'ENROLLED_PARTICIPANT': 'Enrolled Participants',
             'PMB_ELIGIBLE': 'PM&B Eligible'}).set_index(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])

# enrollment since previous report run
enroll_sinceLast_rp_df = data_site.pivot_table(values='participant_id',
                                               index=['organization_type', 'awardee_name', 'organization_name',
                                                      'site_name'], columns='enrollment_status_since_last_report',
                                               aggfunc='nunique').reset_index()
enroll_sinceLast_rp_df = enroll_sinceLast_rp_df[
    ['organization_type', 'awardee_name', 'organization_name', 'site_name', 'REGISTERED', 'PARTICIPANT',
     'CORE_MINUS_PM', 'CORE_PARTICIPANT', 'ENROLLED_PARTICIPANT', 'PMB_ELIGIBLE']]
enroll_sinceLast_rp_df = enroll_sinceLast_rp_df.rename(
    columns={'REGISTERED': 'Registered_prev', 'PARTICIPANT': 'Participant_prev',
             'CORE_MINUS_PM': 'Core -PM_prev',
             'CORE_PARTICIPANT': 'Core Participant_prev',
             'ENROLLED_PARTICIPANT': 'Enrolled_prev',
             'PMB_ELIGIBLE': 'PMB_prev'}).set_index(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])

# weekly tally
weekly_tally_df = data_site.pivot_table(values='participant_id',
                                        index=['organization_type', 'awardee_name', 'organization_name', 'site_name'],
                                        columns='weekly_tally', aggfunc='nunique')
weekly_tally_df = weekly_tally_df.rename(columns={'CORE_MINUS_PM': 'CPM_prev_week', 'CORE_PARTICIPANT': 'CP_prev_week'})

# previous weekly tally
prev_weekly_tally_df = data_site.pivot_table(values='participant_id',
                                        index=['organization_type', 'awardee_name', 'organization_name', 'site_name'],
                                        columns='prev_weekly_tally', aggfunc='nunique')
prev_weekly_tally_df = prev_weekly_tally_df.rename(columns={'CORE_MINUS_PM': 'CPM_prev_two_week', 'CORE_PARTICIPANT': 'CP_prev_two_week'})

# merge
enroll_full_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name', 'site_name'], how='left'),
    [enrollment_df, enroll_sinceLast_rp_df, weekly_tally_df, prev_weekly_tally_df])
enroll_full_df.loc['Overall', :] = enroll_full_df.sum().values
enroll_full_df = enroll_full_df.rename(index={'': 'All Awardees'})
enroll_full_df = enroll_full_df.fillna(0)
pd.set_option('display.float_format', '{:,.0f}'.format)

# Date cols
enroll_full_df['Registered Since Last Report'] = enroll_full_df['Registered Individuals'] - enroll_full_df[
    'Registered_prev']
enroll_full_df['Participant Since Last Report'] = enroll_full_df['Participants'] - enroll_full_df[
    'Participant_prev']
enroll_full_df['Enrolled Since Last Report'] = enroll_full_df['Enrolled Participants'] - enroll_full_df[
    'Enrolled_prev']
enroll_full_df['PMB Since Last Report'] = enroll_full_df['PM&B Eligible'] - enroll_full_df[
    'PMB_prev']
enroll_full_df['Since Last Report'] = enroll_full_df['Total Core -PM'] - enroll_full_df['Core -PM_prev']
enroll_full_df['CP Since Last Report'] = enroll_full_df['Total Core Participant'] - enroll_full_df[
    'Core Participant_prev']
enroll_full_df['Weekly tally'] = enroll_full_df['Total Core -PM'] - enroll_full_df['CPM_prev_week']
enroll_full_df['CP Weekly Tally'] = enroll_full_df['Total Core Participant'] - enroll_full_df['CP_prev_week']
enroll_full_df['Last CP Weekly Tally'] = enroll_full_df['CP_prev_week'] - enroll_full_df['CP_prev_two_week']
enroll_full_df = enroll_full_df.drop(
    ['Registered_prev', 'Participant_prev', 'Core -PM_prev', 'Core Participant_prev',
     'CPM_prev_week', 'CP_prev_week'], axis=1)
enroll_full_df = enroll_full_df[
    ['Registered Individuals', 'Registered Since Last Report', 'Enrolled Participants', 'Participant Since Last Report',
     'PM&B Eligible', 'PMB Since Last Report',
     'Total Core -PM', 'Since Last Report',
     'Weekly tally', 'Total Core Participant', 'CP Since Last Report', 'CP Weekly Tally', 'Last CP Weekly Tally']]

for c in enroll_full_df.columns:
    enroll_full_df = enroll_full_df.rename(columns={c: ('Enrollment Status (non-overlapping)', c)})

enroll_full_df.columns = pd.MultiIndex.from_tuples(tuple(enroll_full_df.columns))

####  UBR of Core Participants

In [49]:
ubr_overall_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_overall'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Overall'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_ethnicity_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_ethnicity'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Racial Identity'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_age_at_consent_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_age_at_consent'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Age'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_sex_c = data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_sex'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Sex'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_sexual_gender_minority_c = data_site.loc[
    (data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_sexual_gender_minority'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'SGM'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_income_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_income'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Income'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_education_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_education'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Education'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_geography_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_geography'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Geography'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_disability_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_disability'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Disability'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_healthcare_access_c = \
data_site.loc[(data_site['enrollment_status'] == 'CORE_PARTICIPANT') & (data_site['ubr_healthcare_access'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Healthcare Access & Utilization'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])

In [50]:
ubr_core_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name', 'site_name'], how='left'),
    [Total_Registered, ubr_overall_c, ubr_ethnicity_c, ubr_age_at_consent_c, ubr_sex_c, ubr_sexual_gender_minority_c,
     ubr_income_c, ubr_education_c, ubr_geography_c, ubr_disability_c, ubr_healthcare_access_c])
ubr_core_df = ubr_core_df.drop(['Total Registered Individuals'], axis=1)

ubr_core_df = ubr_core_df.div(
    enroll_full_df[('Enrollment Status (non-overlapping)', 'Total Core Participant')].astype(float), axis=0)
ubr_core_df = ubr_core_df.fillna(0)
pd.set_option('display.float_format', '{:.2%}'.format)

for c in ubr_core_df.columns:
    ubr_core_df = ubr_core_df.rename(columns={c: ('UBR of Core Participants', c)})

ubr_core_df.columns = pd.MultiIndex.from_tuples(tuple(ubr_core_df.columns))

#### UBR of Enrolled Participants

In [51]:
ubr_overall_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_overall'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Overall'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_ethnicity_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_ethnicity'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Racial Identity'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_age_at_consent_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_age_at_consent'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Age'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_sex_cpm = data.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_sex'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Sex'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_sexual_gender_minority_cpm = data_site.loc[
    (data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_sexual_gender_minority'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'SGM'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_income_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_income'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Income'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_education_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_education'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Education'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_geography_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_geography'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Geography'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_disability_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_disability'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Disability'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])
ubr_healthcare_access_cpm = \
data_site.loc[(data_site['enrollment_status'] == 'ENROLLED_PARTICIPANT') & (data_site['ubr_healthcare_access'] == 1)].groupby(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])[
    'participant_id'].nunique().to_frame().rename(columns={'participant_id': 'Healthcare Access & Utilization'}).sort_values(
    ['organization_type', 'awardee_name', 'organization_name', 'site_name'])

In [52]:
ubr_enrolled_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name', 'site_name'], how='left'),
    [Total_Registered, ubr_overall_cpm, ubr_ethnicity_cpm, ubr_age_at_consent_cpm, ubr_sex_cpm,
     ubr_sexual_gender_minority_cpm, ubr_income_cpm, ubr_education_cpm, ubr_geography_cpm, ubr_disability_cpm,
     ubr_healthcare_access_cpm])
ubr_enrolled_df = ubr_enrolled_df.drop(['Total Registered Individuals'], axis=1)

ubr_enrolled_df = ubr_enrolled_df.div(
    enroll_full_df[('Enrollment Status (non-overlapping)', 'Enrolled Participants')].astype(float), axis=0)
ubr_enrolled_df = ubr_enrolled_df.fillna(0)
pd.set_option('display.float_format', '{:.2%}'.format)

for c in ubr_enrolled_df.columns:
    ubr_enrolled_df = ubr_enrolled_df.rename(columns={c: ('UBR of Enrolled Participants', c)})

ubr_enrolled_df.columns = pd.MultiIndex.from_tuples(tuple(ubr_enrolled_df.columns))

In [53]:
# Both UBR metrics
ubr_dfs = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name', 'site_name'], how='left'),
    [ubr_core_df, ubr_enrolled_df])

####  Gender Identity/Racial Identity/Age

In [54]:
demog_site = demog.copy()
demog_site['site_name'] = demog_org['site_name'].fillna('zUnpaired')

In [55]:
gender_df = demog_site.pivot_table(values='participant_id',
                                   index=['organization_type', 'awardee_name', 'organization_name', 'site_name'],
                                   columns='gender_identity', aggfunc='nunique').reset_index()
race_df = demog_site.pivot_table(values='participant_id',
                                 index=['organization_type', 'awardee_name', 'organization_name', 'site_name'],
                                 columns='race_ethnicity', aggfunc='nunique').reset_index()
age_df = demog_site.pivot_table(values='participant_id',
                                index=['organization_type', 'awardee_name', 'organization_name', 'site_name'],
                                columns='age_group', aggfunc='nunique').reset_index()

In [56]:
# Gender Identity
gender_df = gender_df[
    ['organization_type', 'awardee_name', 'organization_name', 'site_name', 'Man', 'Non-Binary', 'Other/Addl. Options',
     'Transgender', 'Woman', 'Multiple Selections', 'Skipped', 'Prefer not to say']]
gender_df = gender_df.set_index(['organization_type', 'awardee_name', 'organization_name', 'site_name'])

for c in gender_df.columns:
    gender_df = gender_df.rename(columns={c: ('Gender Identity', c)})

gender_df.columns = pd.MultiIndex.from_tuples(tuple(gender_df.columns))

# Race Identity
race_df = race_df[['organization_type', 'awardee_name', 'organization_name', 'site_name', 'Asian',
                   'Black and Hispanic, Latino, or Spanish', 'Black or African American',
                   'Hispanic, Latino, or Spanish', 'Middle Eastern or North African', 'More than one race',
                   'More than one race and Hispanic, Latino, or Spanish', 'Native Hawaiian or other Pacific Islander',
                   'One other race and Hispanic, Latino, or Spanish', 'Other race', 'Prefer not to say', 'White',
                   'White and Hispanic, Latino, or Spanish', 'Skipped']]
race_df = race_df.set_index(['organization_type', 'awardee_name', 'organization_name', 'site_name'])

for c in race_df.columns:
    race_df = race_df.rename(columns={c: ('Racial Identity', c)})

race_df.columns = pd.MultiIndex.from_tuples(tuple(race_df.columns))

# Age
age_df = age_df.set_index(['organization_type', 'awardee_name', 'organization_name', 'site_name'])

for c in age_df.columns:
    age_df = age_df.rename(columns={c: ('Age', c)})

age_df.columns = pd.MultiIndex.from_tuples(tuple(age_df.columns))

In [57]:
# All Demographics
demog_df = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name', 'site_name'], how='left'),
    [gender_df, race_df, age_df])
demog_df = demog_df.fillna(0)
pd.set_option('display.float_format', '{:,.0f}'.format)

### Combined dfs

In [58]:
all_dfs_tab2_sites = reduce(
    lambda x, y: pd.merge(x, y, on=['organization_type', 'awardee_name', 'organization_name', 'site_name'], how='left'),
    [total_df, enroll_full_df, ubr_dfs, demog_df]).fillna(0)
all_dfs_tab2_sites.index = all_dfs_tab2_sites.index.rename(['Type', 'Awardee', 'Organization', 'Site'])
all_dfs_tab2_sites = all_dfs_tab2_sites.rename(
    index={'HPO': 'RMC', 'UNSET': 'UNPAIRED', 'No organization set': 'Unpaired',
           'United States Department of Veteran Affairs': 'VA',
           'Cherokee Health Systems': 'Cherokee', 'Community Health Center, Inc': 'Community Health Center',
           'Eau Claire Cooperative Health Center': 'Eau Claire', 'Hudson River Health Care, Inc.': 'HRHCare',
           'Jackson-Hinds Comprehensive Health Center': 'Jackson-Hinds', 'San Ysidro Health Center': 'San Ysidro',
           'California Precision Medicine Consortium': 'California',
           'New England Precision Medicine Consortium': 'New England',
           'Pittsburgh': 'PITT', 'Southern Consortium': 'Southern',
           'Trans-American Consortium for the Health Care Systems Research Network (TACH)': 'Trans-America',
           'University of Texas Health Science Center at Houston': 'UT_HEALTH',
           'Virginia Commonwealth University': 'VCU',
           'Washington University in St. Louis': 'WASH U', 'Wisconsin Consortium': 'Wisconsin', 'Quest Labs': 'Quest'})

In [59]:
# Merge & formatting for tab 2
tab2_orgs = all_dfs_tab2_orgs.reset_index(level=2, col_level=1)
tab2_sites = all_dfs_tab2_sites.reset_index(level=[2, 3], col_level=1)

# custom_sort = (all_dfs_tab1.columns)
tab2_all = pd.concat([all_dfs_tab1, tab2_orgs, tab2_sites], sort=False)
tab2_all = tab2_all.sort_index().reset_index().sort_values(['Awardee', ('', 'Organization'), ('', 'Site')],
                                                           na_position='first')
tab2_all = tab2_all.set_index(['Type', 'Awardee', ('', 'Organization'), ('', 'Site')])
tab2_all = tab2_all[all_dfs_tab1.columns]
tab2_all = tab2_all.reindex(['Overall', 'RMC', 'FQHC', 'DV', 'VA', 'HPO-Lite', 'UNPAIRED'], level='Type')
tab2_all.index.names = ['Type', 'Awardee', 'Organization', 'Site']
# tab2_all[('Age', '0-17')] = tab2_all[('Age', '0-17')].fillna(0)

# Rename and organize columns
tab2_all = tab2_all.rename(index={'zUnpaired': 'Unpaired'})
tab2_all.drop(('UNPAIRED', 'Unpaired', 'Unpaired'), axis=0, inplace=True)

# no data 0-17 age bucket
tab2_all.insert(loc=59, column=('Age', '0-17'), value=0)

In [62]:
file_name = yday +'_'+'PEO-Participant_Enrollment_Overview'+'.xlsx'
writer = pd.ExcelWriter(file_name, engine='xlsxwriter')
tab1_final.to_excel(writer, sheet_name = 'PEO Report', startrow=2)
tab2_all.to_excel(writer, sheet_name = 'Including Orgs & Sites', startrow=2)
wb = writer.book
ws1 = writer.sheets['PEO Report']
ws2 = writer.sheets['Including Orgs & Sites']

# Cell formats
num_format = wb.add_format({'num_format': '#,##0'})
pct_format = wb.add_format({'num_format': '0.00%'})
idx_format = wb.add_format({'align': 'left', 'valign': 'top'})
a1_format = wb.add_format({'font_size': 12, 'bold': True})

ws1.write('A1', 'Participant Enrollment Overview', a1_format)
ws1.write('A2', 'Updated: ')
ws1.write('B2', yday)
ws1.set_column('A:A', 10, idx_format)
ws1.set_column('B:B', 26, idx_format)
ws1.set_column('C:S', 8.5, num_format)
ws1.set_column('T:AM', 8.5, pct_format)
ws1.set_column('AN:BN', 8.5, num_format)

ws2.write('A1', 'Participant Enrollment Overview', a1_format)
ws2.write('A2', 'Updated: ')
ws2.write('B2', yday)
ws2.set_column('A:A', 10, idx_format)
ws2.set_column('B:B', 26, idx_format)
ws2.set_column('C:C', 56, idx_format)
ws2.set_column('D:D', 90, idx_format)
ws2.set_column('E:U', 8.5, num_format)
ws2.set_column('V:AO', 8.5, pct_format)
ws2.set_column('AP:BP', 8.5, num_format)

writer.save()