# Prerequisite to run this notebook:
- **PMI-Ops account**
- **Postgres credentials**
- **Google Cloud SDK Software Installed**

# <b style="color:blue; font-size:15px;">The data are sourced from the program data repository (PDR), a dataset derived from the raw data repository (RDR) that is designed for aggregate reporting. Please take caution in sharing this notebook outside of the <i>All of Us</i> Research Program Consortium.</b>

# - <em style="color:grey"><b>Note</b>: Before running this notebook, you need to open the connection and leave Google Cloud SDK Shell open. So, you can connect to your IDE</em>
        
    - Copy/paste the link below to your Google Cloud SDK Shell and hit `Enter`
            
      <b>cloud_sql_proxy -instances=aou-pdr-data-prod:us-central1:prod-pdr-5deb-lhty=tcp:7000, aou-pdr-data-prod:us-central1:prod-pdr-alpha-replica=tcp:7005</b> 

In [338]:
import os
import sys
from functools import reduce
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, String, MetaData, Integer
from sqlalchemy import inspect
import psycopg2
import random 
import string
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import datetime as dt
from datetime import datetime
from datetime import date
from datetime import time
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import time
import xlsxwriter

In [339]:
# Insert your username into the textbox and hit enter
username = input("Please enter your username: ")

Please enter your username: ckilpatrick


In [340]:
# Insert your password into the textbox and hit enter
password = input("Please enter your password: ")

Please enter your password: Pieing19


In [341]:
db_postgres = create_engine('postgresql://'+username+':'+password+'@localhost:7005/drc')

## Questions output 

In [342]:
query =''' 

WITH surveys AS (
    WITH survey_tab AS (
        VALUES ('2020-05-29 00:00:00+00:00'::timestamp, 'May'),
               ('2020-06-26 00:00:00+00:00'::timestamp, 'June'),
               ('2020-09-25 00:00:00+00:00'::timestamp, 'July'),
               ('2020-12-03 00:00:00+00:00'::timestamp, 'November'),
               ('2021-01-04 00:00:00+00:00'::timestamp, 'December'),
               ('2021-03-05 00:00:00+00:00'::timestamp, 'February'),
               ('2021-08-19 00:00:00+00:00'::timestamp, 'Minute Summer'),
               ('2021-10-27 00:00:00+00:00'::timestamp, 'Minute Fall'),
               ('2021-12-21 00:00:00+00:00'::timestamp, 'Minute Winter'),
               ('2022-01-20 00:00:00+00:00'::timestamp, 'Minute New Year')
    )
    SELECT column1 AS survey_ts, column2 AS survey
    FROM survey_tab
),
 participants AS (
 SELECT b.*,
       (SELECT CASE WHEN b.cope_feb_resp_status_id = 1 and b.cope_feb_resp_vac = 'Yes' and b.cope_feb_resp_vac_doses = 2 THEN
            (SELECT STRING_AGG(s1.survey::text, ',' ORDER BY s1.survey_ts) FROM surveys s1
              WHERE s1.survey_ts >= b.basics_ts AND s1.survey_ts NOT IN
                        ('2021-08-19 00:00:00+00:00'::timestamp, '2021-10-27 00:00:00+00:00'::timestamp))
           WHEN b.cope_spring_resp_status_id = 1 and b.cope_spring_resp_vac_doses = 2 THEN
            (SELECT STRING_AGG(s1.survey::text, ',' ORDER BY s1.survey_ts) FROM surveys s1
              WHERE s1.survey_ts >= b.basics_ts AND s1.survey_ts != '2021-08-19 00:00:00+00:00'::timestamp)
           ELSE
            (SELECT STRING_AGG(s1.survey::text, ',' ORDER BY s1.survey_ts) FROM surveys s1
              WHERE s1.survey_ts >= b.basics_ts) END) as cope_month_elig,
       (SELECT s2.survey_ts FROM surveys s2
          WHERE s2.survey_ts >= b.basics_ts ORDER BY s2.survey_ts limit 1) as first_cope_elig_ts
 FROM (
    SELECT a.*,
       -- February COPE Survey
       -- cope_feb_resp_status_id = (null=not completed, 1=completed, 2=partial response)
       (SELECT pm2.mod_response_status_id FROM pdr.mv_participant_module pm2
          WHERE pm2.participant_id = a.participant_id AND pm2.mod_module = 'cope_feb' AND
                pm2.mod_status_id = 1 and pm2.mod_response_status_id > 0
          ORDER BY pm2.mod_authored DESC LIMIT 1) AS cope_feb_resp_status_id,
       (SELECT (CASE WHEN mod.cdc_covid_xx = 'COPE_A_44' THEN 'Yes'
                     WHEN mod.cdc_covid_xx = 'COPE_A_13' THEN 'No'
                     WHEN mod.cdc_covid_xx = 'COPE_A_334' THEN 'Not_Sure' END)
            FROM pdr.mv_mod_cope_feb mod
            WHERE mod.participant_id = a.participant_id and mod.status_id = 1
            ORDER BY mod.authored DESC LIMIT 1) as cope_feb_resp_vac,
       (SELECT (CASE WHEN mod.cdc_covid_xx_a = 'cope_a_332' THEN 1
                     WHEN mod.cdc_covid_xx_a = 'cope_a_333' THEN 2 END)
            FROM pdr.mv_mod_cope_feb mod
            WHERE mod.participant_id = a.participant_id and mod.status_id = 1
            ORDER BY mod.authored DESC LIMIT 1) as cope_feb_resp_vac_doses,
       -- COPE Spring Minute Survey
       -- cope_feb_resp_status_id = (null=not completed, 1=completed, 2=partial response)
       (SELECT pm2.mod_response_status_id FROM pdr.mv_participant_module pm2
          WHERE pm2.participant_id = a.participant_id AND pm2.mod_module = 'cope_vaccine1' AND
                pm2.mod_status_id = 1 and pm2.mod_response_status_id > 0
          ORDER BY pm2.mod_authored DESC LIMIT 1) AS cope_spring_resp_status_id,
       (SELECT (CASE WHEN mod.cdc_covid_xx_seconddose = 'COPE_A_44' THEN 2
                     WHEN mod.cdc_covid_xx_firstdose = 'COPE_A_44' THEN 1 ELSE 0 END)
            FROM pdr.mv_mod_cope_vaccine1 mod
            WHERE mod.participant_id = a.participant_id and mod.status_id = 1
            ORDER BY mod.authored DESC LIMIT 1) as cope_spring_resp_vac_doses
    FROM (
        SELECT p.participant_id,
           p.suspension_time,
           (SELECT MIN(pm.mod_authored) FROM pdr.mv_participant_module pm
               WHERE p.participant_id = pm.participant_id AND pm.mod_module = 'TheBasics' AND
                     pm.mod_status_id = 1 AND pm.mod_response_status_id = 1) as basics_ts
          FROM pdr.mv_participant p
    ) a
    WHERE a.basics_ts IS NOT NULL
  ) b
)
SELECT * FROM participants;


'''
data = pd.read_sql(query, db_postgres)

In [343]:
#data.head

<bound method NDFrame.head of                                              question  total    ubr    rbr  \
0   Think about the place you live. Do you have pr...  81685  58538  23146   
1   In the last month, how often have you found th...  84376  60707  23668   
2                     I am unhappy being so withdrawn  84150  60521  23628   
3   "Since you speak a language other than English...  10346   8505   1841   
4   I am spiritually touched by the beauty of crea...  84288  60672  23615   
5   You are treated with less courtesy than other ...  85491  61630  23860   
6   The crime rate in my neighborhood makes it uns...  82437  59105  23331   
7   In the last month, how often have you felt tha...  84905  61127  23777   
8             Vandalism is common in my neighborhood.  83596  60092  23503   
9                                I lack companionship  85360  61500  23859   
10  Someone to help with daily chores if you were ...  83651  60072  23578   
11  There is too much alcohol use 

In [345]:
numbers = data.select_dtypes(include=['number'])
div_total = numbers['total']/numbers['total'].max()
div_ubr = numbers['ubr']/numbers['ubr'].max()
div_rbr = numbers['rbr']/numbers['rbr'].max()
div_race = numbers['race']/numbers['race'].max()
div_edu = numbers['edu']/numbers['edu'].max()
div_income = numbers['income']/numbers['income'].max()
percents = data['Completion Rate'],data['UBR CR'],data['RBR CR'],data['UBR Race CR'],data['UBR Edu CR'] ,data['Income_divided'] = [div_total,div_ubr,div_rbr,div_race,div_edu,div_income]

#data.head

<bound method NDFrame.head of                                              question  total    ubr    rbr  \
0   Think about the place you live. Do you have pr...  81685  58538  23146   
1   In the last month, how often have you found th...  84376  60707  23668   
2                     I am unhappy being so withdrawn  84150  60521  23628   
3   "Since you speak a language other than English...  10346   8505   1841   
4   I am spiritually touched by the beauty of crea...  84288  60672  23615   
5   You are treated with less courtesy than other ...  85491  61630  23860   
6   The crime rate in my neighborhood makes it uns...  82437  59105  23331   
7   In the last month, how often have you felt tha...  84905  61127  23777   
8             Vandalism is common in my neighborhood.  83596  60092  23503   
9                                I lack companionship  85360  61500  23859   
10  Someone to help with daily chores if you were ...  83651  60072  23578   
11  There is too much alcohol use 

## UBR Statstics

In [346]:
query_new = '''
select
distinct d.participant_id,
    CASE WHEN ubr_overall = 1 THEN 1 ELSE 0 END AS ubr_overall,
    CASE WHEN ubr_overall = 0 THEN 1 ELSE 0 END AS rbr_overall,
    ubr_sex,
    ubr_gender_identity,
    ubr_ethnicity,
    ubr_geography,
    ubr_income,
    ubr_age_at_consent,
    ubr_education,
    CASE
        WHEN age_at_consent < 26 THEN '18-25'
        WHEN age_at_consent < 36 THEN '26-35'
        WHEN age_at_consent < 46 THEN '36-45'
        WHEN age_at_consent < 56 THEN '46-55'
        WHEN age_at_consent < 66 THEN '56-65'
        WHEN age_at_consent < 76 THEN '66-75'
        WHEN age_at_consent < 86 THEN '76-85'
        WHEN age_at_consent >= 86 THEN '86+'
    END AS age,
authored ::date,
sign_up_time ::date,
    education,
    income,
    primary_language,
    sex,
    CASE WHEN sexual_orientation LIKE '%%,%%' THEN 'Two_or_More' ELSE sexual_orientation END AS sexual_orientation,
    CASE WHEN race LIKE '%%,%%' THEN 'Two_or_More' ELSE race END AS ethnicity,
    CASE WHEN gender LIKE '%%,%%' THEN 'Two_or_More' ELSE gender END AS gender_identity,
    CASE WHEN s.status_id = 1 THEN 1 ELSE 0 END AS sdoh_flag,
    CASE WHEN mod.ppi1_flag = 1 AND mod.ppi2_flag = 1 AND mod.ppi3_flag = 1 THEN 1 END AS ppi1_3_flag

FROM drc.ssad_dm.mv_participant_display d
LEFT JOIN pdr.mv_mod_sdoh s on s.participant_id = d.participant_id
LEFT JOIN (SELECT
                    participant_id,
                    MAX(CASE WHEN mod_module = 'TheBasics' THEN 1 END) AS ppi1_flag,
                    MAX(CASE WHEN mod_module = 'OverallHealth' THEN 1 END) AS ppi2_flag,
                    MAX(CASE WHEN mod_module = 'Lifestyle' THEN 1 END) AS ppi3_flag
                FROM pdr.mv_participant_module mod
where mod.mod_status_id = 1
      AND mod.mod_response_status_id = 1
                GROUP BY participant_id) mod on mod.participant_id = d.participant_id


'''
ubrs = pd.read_sql(query_new, db_postgres)


In [347]:
#ubrs.head()

In [348]:
ubrs['authored'] =pd.to_datetime(ubrs['authored'])
ubrs['sign_up_time'] =pd.to_datetime(ubrs['sign_up_time'])

In [349]:
flag= ubrs.loc[(ubrs['ppi1_3_flag'] == 1)]
flags =  ubrs.loc[(ubrs['ppi1_3_flag'] == 1) & (ubrs['sdoh_flag'] == 1)]
flag_nov= flag[(flag['sign_up_time'] <= '2021-11-30')]
flag_dec= flag[(flag['sign_up_time'] <= '2021-12-31')]
flag_jan= flag[(flag['sign_up_time'] <= '2022-1-31')]
flags_nov = flags[(flags['authored'] > '2021-11-01') & (flags['authored'] < '2021-11-30')]
flags_dec= flags[(flags['authored'] > '2021-12-01') & (flags['authored'] < '2021-12-31')]
flags_jan= flags[(flags['authored'] > '2022-1-01') & (flags['authored'] < '2022-1-31')]

In [350]:
#total_eligible
Total_eligible = flag['participant_id'].nunique()
UBR_overall= flag[flag['ubr_overall']==1]['participant_id'].nunique() 
RBR_overall=  flag[flag['rbr_overall']==1]['participant_id'].nunique() 
UBR_sex =  flag[flag['ubr_sex']==1]['participant_id'].nunique() 
UBR_age_at_consent =  flag[flag['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_gender_identity =   flag[flag['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_ethnicity  =  flag[flag['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_geography  =  flag[flag['ubr_geography']==1]['participant_id'].nunique() 
UBR_income  =  flag[flag['ubr_income']==1]['participant_id'].nunique() 
UBR_education  = flag[flag['ubr_education']==1]['participant_id'].nunique() 
UBR_all= [UBR_age_at_consent,UBR_geography,RBR_overall,UBR_gender_identity,Total_eligible,UBR_overall,UBR_sex,UBR_income,UBR_ethnicity,UBR_education]
#UBR_all

In [351]:
#total_completed
Total_completed = flag[flag['sdoh_flag']==1]['participant_id'].nunique() 
UBR_overall_completed = flags[flags['ubr_overall']==1]['participant_id'].nunique() 
RBR_overall_completed= flags[flags['rbr_overall']==1]['participant_id'].nunique()
UBR_sex_completed = flags[flags['ubr_sex']==1]['participant_id'].nunique()
UBR_age_at_consent_completed =  flags[flags['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_gender_identity_completed =   flags[flags['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_ethnicity_completed  =  flags[flags['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_geography_completed  =  flags[flags['ubr_geography']==1]['participant_id'].nunique() 
UBR_income_completed  = flags[flags['ubr_income']==1]['participant_id'].nunique()
UBR_education_completed  = flags[flags['ubr_education']==1]['participant_id'].nunique() 
UBR_all_completed= [UBR_age_at_consent_completed,UBR_geography_completed,RBR_overall_completed,UBR_gender_identity_completed,Total_completed,UBR_overall_completed,UBR_sex_completed,UBR_income_completed,UBR_ethnicity_completed,UBR_education_completed]
#UBR_all_completed

In [352]:
#total_eligible November
Total_Nov_eligible = flag_nov['participant_id'].nunique()
UBR_Nov_overall= flag_nov[flag_nov['ubr_overall']==1]['participant_id'].nunique() 
RBR_Nov_overall=  flag_nov[flag_nov['rbr_overall']==1]['participant_id'].nunique() 
UBR_Nov_sex =  flag_nov[flag_nov['ubr_sex']==1]['participant_id'].nunique() 
UBR_Nov_age_at_consent =  flag_nov[flag_nov['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_Nov_gender_identity =   flag_nov[flag_nov['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_Nov_ethnicity  =  flag_nov[flag_nov['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_Nov_geography  =  flag_nov[flag_nov['ubr_geography']==1]['participant_id'].nunique() 
UBR_Nov_income  =  flag_nov[flag_nov['ubr_income']==1]['participant_id'].nunique() 
UBR_Nov_education  = flag_nov[flag_nov['ubr_education']==1]['participant_id'].nunique() 
UBR_all_nov= [UBR_Nov_age_at_consent,UBR_Nov_geography,RBR_Nov_overall,UBR_Nov_gender_identity,Total_Nov_eligible,UBR_Nov_overall,UBR_Nov_sex,UBR_Nov_income,UBR_Nov_ethnicity,UBR_Nov_education]
#UBR_all_nov


In [353]:
#total_eligible December
Total_DEC_eligible = flag_dec['participant_id'].nunique()
UBR_DEC_overall= flag_dec[flag_dec['ubr_overall']==1]['participant_id'].nunique() 
RBR_DEC_overall=  flag_dec[flag_dec['rbr_overall']==1]['participant_id'].nunique() 
UBR_DEC_sex =  flag_dec[flag_dec['ubr_sex']==1]['participant_id'].nunique() 
UBR_DEC_age_at_consent =  flag_dec[flag_dec['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_DEC_gender_identity =   flag_dec[flag_dec['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_DEC_ethnicity  =  flag_dec[flag_dec['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_DEC_geography  =  flag_dec[flag_dec['ubr_geography']==1]['participant_id'].nunique() 
UBR_DEC_income  =  flag_dec[flag_dec['ubr_income']==1]['participant_id'].nunique() 
UBR_DEC_education  = flag_dec[flag_dec['ubr_education']==1]['participant_id'].nunique() 
UBR_DEC_all= [UBR_DEC_age_at_consent,UBR_DEC_geography,RBR_DEC_overall,UBR_DEC_gender_identity,Total_DEC_eligible,UBR_DEC_overall,UBR_DEC_sex,UBR_DEC_income,UBR_DEC_ethnicity,UBR_DEC_education]
#UBR_DEC_all



In [354]:
#total_eligible January
Total_JAN_eligible = flag_jan['participant_id'].nunique()
UBR_JAN_overall= flag_jan[flag_jan['ubr_overall']==1]['participant_id'].nunique() 
RBR_JAN_overall=  flag_jan[flag_jan['rbr_overall']==1]['participant_id'].nunique() 
UBR_JAN_sex =  flag_jan[flag_jan['ubr_sex']==1]['participant_id'].nunique() 
UBR_JAN_age_at_consent =  flag_jan[flag_jan['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_JAN_gender_identity =   flag_jan[flag_jan['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_JAN_ethnicity  =  flag_jan[flag_jan['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_JAN_geography  =  flag_jan[flag_jan['ubr_geography']==1]['participant_id'].nunique() 
UBR_JAN_income  =  flag_jan[flag_jan['ubr_income']==1]['participant_id'].nunique() 
UBR_JAN_education  = flag_jan[flag_jan['ubr_education']==1]['participant_id'].nunique() 
UBR_JAN_all= [UBR_JAN_age_at_consent,UBR_JAN_geography,RBR_JAN_overall,UBR_JAN_gender_identity,Total_JAN_eligible,UBR_JAN_overall,UBR_JAN_sex,UBR_JAN_income,UBR_JAN_ethnicity,UBR_JAN_education]
#UBR_JAN_all



In [355]:
#total_completed November
Total_NOV_COMP = flags_nov['participant_id'].nunique()
UBR_NOV_COMP_overall= flags_nov[flags_nov['ubr_overall']==1]['participant_id'].nunique() 
RBR_NOV_COMP_overall=  flags_nov[flags_nov['rbr_overall']==1]['participant_id'].nunique() 
UBR_NOV_COMP_sex =  flags_nov[flags_nov['ubr_sex']==1]['participant_id'].nunique() 
UBR_NOV_COMP_age_at_consent =  flags_nov[flags_nov['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_NOV_COMP_gender_identity =   flags_nov[flags_nov['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_NOV_COMP_ethnicity  =  flags_nov[flags_nov['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_NOV_COMP_geography  =  flags_nov[flags_nov['ubr_geography']==1]['participant_id'].nunique() 
UBR_NOV_COMP_income  =  flags_nov[flags_nov['ubr_income']==1]['participant_id'].nunique() 
UBR_NOV_COMP_education  = flags_nov[flags_nov['ubr_education']==1]['participant_id'].nunique() 
UBR_NOV_COMP_all= [UBR_NOV_COMP_age_at_consent,UBR_NOV_COMP_geography,RBR_NOV_COMP_overall,UBR_NOV_COMP_gender_identity,Total_NOV_COMP_eligible,UBR_NOV_COMP_overall,UBR_NOV_COMP_sex,UBR_NOV_COMP_income,UBR_NOV_COMP_ethnicity,UBR_NOV_COMP_education]
#UBR_NOV_COMP_all



In [356]:
#total_completed December
Total_DEC_COMP = flags_dec['participant_id'].nunique()
UBR_DEC_COMP_overall= flags_dec[flags_dec['ubr_overall']==1]['participant_id'].nunique() 
RBR_DEC_COMP_overall=  flags_dec[flags_dec['rbr_overall']==1]['participant_id'].nunique() 
UBR_DEC_COMP_sex =  flags_dec[flags_dec['ubr_sex']==1]['participant_id'].nunique() 
UBR_DEC_COMP_age_at_consent =  flags_dec[flags_dec['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_DEC_COMP_gender_identity =   flags_dec[flags_dec['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_DEC_COMP_ethnicity  =  flags_dec[flags_dec['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_DEC_COMP_geography  =  flags_dec[flags_dec['ubr_geography']==1]['participant_id'].nunique() 
UBR_DEC_COMP_income  =  flags_dec[flags_dec['ubr_income']==1]['participant_id'].nunique() 
UBR_DEC_COMP_education  = flags_dec[flags_dec['ubr_education']==1]['participant_id'].nunique() 
UBR_DEC_COMP_all= [UBR_DEC_COMP_age_at_consent,UBR_DEC_COMP_geography,RBR_DEC_COMP_overall,UBR_DEC_COMP_gender_identity,Total_DEC_COMP,UBR_DEC_COMP_overall,UBR_DEC_COMP_sex,UBR_DEC_COMP_income,UBR_DEC_COMP_ethnicity,UBR_DEC_COMP_education]
#UBR_DEC_COMP_all



In [357]:
#total_completed January
Total_JAN_COMP = flags_jan['participant_id'].nunique()
UBR_JAN_COMP_overall= flags_jan[flags_jan['ubr_overall']==1]['participant_id'].nunique() 
RBR_JAN_COMP_overall=  flags_jan[flags_jan['rbr_overall']==1]['participant_id'].nunique() 
UBR_JAN_COMP_sex =  flags_jan[flags_jan['ubr_sex']==1]['participant_id'].nunique() 
UBR_JAN_COMP_age_at_consent =  flags_jan[flags_jan['ubr_age_at_consent']==1]['participant_id'].nunique() 
UBR_JAN_COMP_gender_identity =   flags_jan[flags_jan['ubr_gender_identity']==1]['participant_id'].nunique() 
UBR_JAN_COMP_ethnicity  =  flags_jan[flags_jan['ubr_ethnicity']==1]['participant_id'].nunique() 
UBR_JAN_COMP_geography  =  flags_jan[flags_jan['ubr_geography']==1]['participant_id'].nunique() 
UBR_JAN_COMP_income  =  flags_jan[flags_jan['ubr_income']==1]['participant_id'].nunique() 
UBR_JAN_COMP_education  = flags_jan[flags_jan['ubr_education']==1]['participant_id'].nunique() 
UBR_JAN_COMP_all= [UBR_JAN_COMP_age_at_consent,UBR_JAN_COMP_geography,RBR_JAN_COMP_overall,UBR_JAN_COMP_gender_identity,Total_JAN_COMP,UBR_JAN_COMP_overall,UBR_JAN_COMP_sex,UBR_JAN_COMP_income,UBR_JAN_COMP_ethnicity,UBR_JAN_COMP_education]
#UBR_JAN_COMP_all



In [358]:
Total_Percents = [i / j for i, j in zip(UBR_all_completed, UBR_all)]
Total_Percents_Jan = [i / j for i, j in zip(UBR_JAN_COMP_all, UBR_JAN_all)]
Total_Percents_Dec = [i / j for i, j in zip(UBR_DEC_COMP_all, UBR_DEC_all)]
Total_Percents_Nov = [i / j for i, j in zip(UBR_NOV_COMP_all, UBR_all_nov)]
overall =pd.DataFrame([UBR_all,UBR_all_completed,Total_Percents,UBR_all_nov,UBR_NOV_COMP_all,Total_Percents_Nov,UBR_DEC_all,UBR_DEC_COMP_all,Total_Percents_Dec,UBR_JAN_all,UBR_JAN_COMP_all,Total_Percents_Jan])
overall.columns = ['ubr_age_at_consent','ubr_geography','rbr_overall','ubr_gender','Survey Participants','ubr_overall','ubr_sex','ubr_income', 'ubr_ethnicity','ubr_education']
new =overall.transpose()
new.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#new

In [359]:
#age_table
age_all = flag.age.value_counts()
age_completed = flags.age.value_counts()
age_divide = age_completed/age_all
age_all_nov = flag_nov.age.value_counts()
age_completed_nov = flags_nov.age.value_counts()
age_divide_nov =age_completed_nov/age_all_nov
age_all_dec = flag_dec.age.value_counts()
age_completed_dec = flags_dec.age.value_counts()
age_divide_dec =age_completed_dec/age_all_dec
age_all_jan = flag_jan.age.value_counts()
age_completed_jan = flags_jan.age.value_counts()
age_divide_jan =age_completed_jan/age_all_jan
percents_age = pd.concat([age_all,age_completed,age_divide,age_all_nov,age_completed_nov,age_divide_nov,age_all_dec,age_completed_dec,age_divide_dec,age_all_jan,age_completed_jan,age_divide_jan], axis =1 )
percents_age.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_age


In [360]:
#education_table
education_all = flag.education.value_counts()
education_completed = flags.education.value_counts()
education_divide = education_completed/education_all
education_all_nov = flag_nov.education.value_counts()
education_completed_nov = flags_nov.education.value_counts()
education_divide_nov =education_completed_nov/education_all_nov
education_all_dec = flag_dec.education.value_counts()
education_completed_dec = flags_dec.education.value_counts()
education_divide_dec =education_completed_dec/education_all_dec
education_all_jan = flag_jan.education.value_counts()
education_completed_jan = flags_jan.education.value_counts()
education_divide_jan =education_completed_jan/education_all_jan
percents_education = pd.concat([education_all,education_completed,education_divide,education_all_nov,education_completed_nov,education_divide_nov,education_all_dec,education_completed_dec,education_divide_dec,education_all_jan,education_completed_jan,education_divide_jan], axis =1 )
percents_education.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_education


In [361]:
#income_table
income_all = flag.income.value_counts()
income_completed = flags.income.value_counts()
income_divide = income_completed/income_all
income_all_nov = flag_nov.income.value_counts()
income_completed_nov = flags_nov.income.value_counts()
income_divide_nov =income_completed_nov/income_all_nov
income_all_dec = flag_dec.income.value_counts()
income_completed_dec = flags_dec.income.value_counts()
income_divide_dec =income_completed_dec/income_all_dec
income_all_jan = flag_jan.income.value_counts()
income_completed_jan = flags_jan.income.value_counts()
income_divide_jan =income_completed_jan/income_all_jan
percents_income = pd.concat([income_all,income_completed,income_divide,income_all_nov,income_completed_nov,income_divide_nov,income_all_dec,income_completed_dec,income_divide_dec,income_all_jan,income_completed_jan,income_divide_jan], axis =1 )
percents_income.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_income


In [362]:
#primary_language_table
primary_language_all = flag.primary_language.value_counts()
primary_language_completed = flags.primary_language.value_counts()
primary_language_divide = primary_language_completed/primary_language_all
primary_language_all_nov = flag_nov.primary_language.value_counts()
primary_language_completed_nov = flags_nov.primary_language.value_counts()
primary_language_divide_nov =primary_language_completed_nov/primary_language_all_nov
primary_language_all_dec = flag_dec.primary_language.value_counts()
primary_language_completed_dec = flags_dec.primary_language.value_counts()
primary_language_divide_dec =primary_language_completed_dec/primary_language_all_dec
primary_language_all_jan = flag_jan.primary_language.value_counts()
primary_language_completed_jan = flags_jan.primary_language.value_counts()
primary_language_divide_jan =primary_language_completed_jan/primary_language_all_jan
percents_primary_language = pd.concat([primary_language_all,primary_language_completed,primary_language_divide,primary_language_all_nov,primary_language_completed_nov,primary_language_divide_nov,primary_language_all_dec,primary_language_completed_dec,primary_language_divide_dec,primary_language_all_jan,primary_language_completed_jan,primary_language_divide_jan], axis =1 )
percents_primary_language.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_primary_language


In [363]:
#sex_table
sex_all = flag.sex.value_counts()
sex_completed = flags.sex.value_counts()
sex_divide = sex_completed/sex_all
sex_all_nov = flag_nov.sex.value_counts()
sex_completed_nov = flags_nov.sex.value_counts()
sex_divide_nov =sex_completed_nov/sex_all_nov
sex_all_dec = flag_dec.sex.value_counts()
sex_completed_dec = flags_dec.sex.value_counts()
sex_divide_dec =sex_completed_dec/sex_all_dec
sex_all_jan = flag_jan.sex.value_counts()
sex_completed_jan = flags_jan.sex.value_counts()
sex_divide_jan =sex_completed_jan/sex_all_jan
percents_sex = pd.concat([sex_all,sex_completed,sex_divide,sex_all_nov,sex_completed_nov,sex_divide_nov,sex_all_dec,sex_completed_dec,sex_divide_dec,sex_all_jan,sex_completed_jan,sex_divide_jan], axis =1 )
percents_sex.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_sex


In [364]:
#sexual_orientation_table
sexual_orientation_all = flag.sexual_orientation.value_counts()
sexual_orientation_completed = flags.sexual_orientation.value_counts()
sexual_orientation_divide = sexual_orientation_completed/sexual_orientation_all
sexual_orientation_all_nov = flag_nov.sexual_orientation.value_counts()
sexual_orientation_completed_nov = flags_nov.sexual_orientation.value_counts()
sexual_orientation_divide_nov =sexual_orientation_completed_nov/sexual_orientation_all_nov
sexual_orientation_all_dec = flag_dec.sexual_orientation.value_counts()
sexual_orientation_completed_dec = flags_dec.sexual_orientation.value_counts()
sexual_orientation_divide_dec =sexual_orientation_completed_dec/sexual_orientation_all_dec
sexual_orientation_all_jan = flag_jan.sexual_orientation.value_counts()
sexual_orientation_completed_jan = flags_jan.sexual_orientation.value_counts()
sexual_orientation_divide_jan =sexual_orientation_completed_jan/sexual_orientation_all_jan
percents_sexual_orientation = pd.concat([sexual_orientation_all,sexual_orientation_completed,sexual_orientation_divide,sexual_orientation_all_nov,sexual_orientation_completed_nov,sexual_orientation_divide_nov,sexual_orientation_all_dec,sexual_orientation_completed_dec,sexual_orientation_divide_dec,sexual_orientation_all_jan,sexual_orientation_completed_jan,sexual_orientation_divide_jan], axis =1 )
percents_sexual_orientation.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_sexual_orientation


In [365]:
#ethnicity_table
ethnicity_all = flag.ethnicity.value_counts()
ethnicity_completed = flags.ethnicity.value_counts()
ethnicity_divide = ethnicity_completed/ethnicity_all
ethnicity_all_nov = flag_nov.ethnicity.value_counts()
ethnicity_completed_nov = flags_nov.ethnicity.value_counts()
ethnicity_divide_nov =ethnicity_completed_nov/ethnicity_all_nov
ethnicity_all_dec = flag_dec.ethnicity.value_counts()
ethnicity_completed_dec = flags_dec.ethnicity.value_counts()
ethnicity_divide_dec =ethnicity_completed_dec/ethnicity_all_dec
ethnicity_all_jan = flag_jan.ethnicity.value_counts()
ethnicity_completed_jan = flags_jan.ethnicity.value_counts()
ethnicity_divide_jan =ethnicity_completed_jan/ethnicity_all_jan
percents_ethnicity = pd.concat([ethnicity_all,ethnicity_completed,ethnicity_divide,ethnicity_all_nov,ethnicity_completed_nov,ethnicity_divide_nov,ethnicity_all_dec,ethnicity_completed_dec,ethnicity_divide_dec,ethnicity_all_jan,ethnicity_completed_jan,ethnicity_divide_jan], axis =1 )
percents_ethnicity.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_ethnicity

In [366]:
#gender_identity_table
gender_identity_all = flag.gender_identity.value_counts()
gender_identity_completed = flags.gender_identity.value_counts()
gender_identity_divide = gender_identity_completed/gender_identity_all
gender_identity_all_nov = flag_nov.gender_identity.value_counts()
gender_identity_completed_nov = flags_nov.gender_identity.value_counts()
gender_identity_divide_nov =gender_identity_completed_nov/gender_identity_all_nov
gender_identity_all_dec = flag_dec.gender_identity.value_counts()
gender_identity_completed_dec = flags_dec.gender_identity.value_counts()
gender_identity_divide_dec =gender_identity_completed_dec/gender_identity_all_dec
gender_identity_all_jan = flag_jan.gender_identity.value_counts()
gender_identity_completed_jan = flags_jan.gender_identity.value_counts()
gender_identity_divide_jan =gender_identity_completed_jan/gender_identity_all_jan
percents_gender_identity = pd.concat([gender_identity_all,gender_identity_completed,gender_identity_divide,gender_identity_all_nov,gender_identity_completed_nov,gender_identity_divide_nov,gender_identity_all_dec,gender_identity_completed_dec,gender_identity_divide_dec,gender_identity_all_jan,gender_identity_completed_jan,gender_identity_divide_jan], axis =1 )
percents_gender_identity.columns = ['Eligible All','Completed All','Percent All','Eligible Novemeber','Completed November','Percent November','Eligible December','Completed December','Percent December','Eligible January','Completed January','Percent January']
#percents_gender_identity


In [367]:
one = new.append(percents_age)
two = one.append(percents_education)
three= two.append(percents_income)
four = three.append(percents_primary_language)
five = four.append(percents_sex)
six = five.append(percents_sexual_orientation)
seven = six.append(percents_ethnicity)
all_values = seven.append(percents_gender_identity)
all_values

Unnamed: 0,Eligible All,Completed All,Percent All,Eligible Novemeber,Completed November,Percent November,Eligible December,Completed December,Percent December,Eligible January,Completed January,Percent January
ubr_age_at_consent,94050.0,32481.0,0.345359,91021.0,22697.0,0.24936,92780.0,2539.0,0.027366,94050.0,6668.0,0.070898
ubr_geography,29125.0,9200.0,0.31588,27920.0,6000.0,0.2149,28582.0,958.0,0.033518,29125.0,1936.0,0.066472
rbr_overall,80694.0,23979.0,0.29716,78044.0,15688.0,0.201015,79668.0,2396.0,0.030075,80694.0,5316.0,0.065879
ubr_gender,10173.0,2317.0,0.22776,9735.0,1375.0,0.141243,9990.0,307.0,0.030731,10173.0,529.0,0.052
Survey Participants,393182.0,86418.0,0.219791,381591.0,56315.0,0.147579,388389.0,8713.0,0.022434,393182.0,19117.0,0.048621
ubr_overall,312488.0,62439.0,0.199812,303547.0,40629.0,0.133847,308721.0,6317.0,0.020462,312488.0,13801.0,0.044165
ubr_sex,249.0,33.0,0.13253,248.0,23.0,0.092742,249.0,4.0,0.016064,249.0,5.0,0.02008
ubr_income,103894.0,10829.0,0.104231,102134.0,6570.0,0.064327,103100.0,1272.0,0.012338,103894.0,2602.0,0.025045
ubr_ethnicity,180675.0,18184.0,0.100645,176591.0,10664.0,0.060388,178973.0,2269.0,0.012678,180675.0,4640.0,0.025681
ubr_education,36392.0,1495.0,0.04108,35871.0,820.0,0.02286,36159.0,215.0,0.005946,36392.0,398.0,0.010936


# Saving to Excel

In [372]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
file_date = datetime.today().strftime('%Y.%m.%d')
file_name = 'overall' + file_date + '.xlsx'
writer = pd.ExcelWriter(file_name, engine='xlsxwriter')
all_values.to_excel(writer, sheet_name = 'SDOH Report', startrow=3)
data.to_excel(writer, sheet_name = 'Questions', index = False, startrow=3)
wb = writer.book
ws1 = writer.sheets['SDOH Report']
ws2 = writer.sheets['Questions']
# Cell formats
num_format = wb.add_format({'num_format': '#,##0'})
pct_format = wb.add_format({'num_format': '0.0%'})
idx_format = wb.add_format({'align': 'left', 'valign': 'top'})
a1_format = wb.add_format({'font_size': 24, 'bold': True})

ws1.write('A1', 'SDOH Completion Rates', a1_format)
ws1.set_column('A:A', 10, idx_format)
ws1.set_column('B:B', 26, idx_format)
ws1.set_column('B:C', 8.5, num_format)
ws1.set_column('D:D', 8.5, pct_format)
ws1.set_column('E:F', 8.5, num_format)
ws1.set_column('G:G', 8.5, pct_format)
ws1.set_column('H:I', 8.5, num_format)
ws1.set_column('J:J', 8.5, pct_format)
ws1.set_column('K:L', 8.5, num_format)
ws1.set_column('M:M', 8.5, pct_format)

ws2.write('A1', 'Participant Enrollment Overview', a1_format)
ws2.set_column('A:A', 10, idx_format)
ws2.set_column('B:G', 8.5, num_format)
ws2.set_column('H:M', 8.5, pct_format)

writer.save()