# Coronavirus Data Analysis

In [1]:
import pandas as pd
import numpy as np

In [2]:
spark.sparkContext.applicationId

'application_1635565815347_0210'

In [3]:
pd.options.display.max_columns = 45

In [4]:
from foresight.discern import broadcast_discern, push_discern, pop_discern

pd.set_option('display.max_colwidth', None)

# Location of the discernontology database
root = 's3://consult-datalab-persistence-s3-data/discernontology/v1/'  

contextid = ['da0acee6-1e2e-4384-9fc7-a58c259d0c50', '5E259FD575B54D4982D32D4E92DCA831', '8FBD43EF0885489AA9FF961D66294839'] 

# Loop that removes all hypthens and captializes all letters. Sets up the file location
for contextid in contextid:
    contextid = contextid.upper().replace("-", "")
    push_discern(spark, contextid, discern_root=root)
    print(contextid)

defaultcontext = 'DA0ACEE61E2E43849FC7A58C259D0C50'   
defaultcontext1 = '5E259FD575B54D4982D32D4E92DCA831' 
defaultcontext2 = '8FBD43EF0885489AA9FF961D66294839'

DA0ACEE61E2E43849FC7A58C259D0C50
5E259FD575B54D4982D32D4E92DCA831
8FBD43EF0885489AA9FF961D66294839


In [5]:
db = "real_world_data_2021_Q2"
spark.sql("USE {}".format(db))
print("Using database: {},".format(db))

Using database: real_world_data_2021_Q2,


In [6]:
spark.sql('show tables').toPandas()

Unnamed: 0,database,tableName,isTemporary
0,real_world_data_2021_q2,allergy,False
1,real_world_data_2021_q2,clinical_event,False
2,real_world_data_2021_q2,condition,False
3,real_world_data_2021_q2,demographics,False
4,real_world_data_2021_q2,encounter,False
5,real_world_data_2021_q2,immunization,False
6,real_world_data_2021_q2,lab,False
7,real_world_data_2021_q2,measurement,False
8,real_world_data_2021_q2,medication,False
9,real_world_data_2021_q2,medication_administration,False


### Create Variables for Queries

In [7]:
# Look back days variables
daysInterval = 365
print(daysInterval)
maxDays = 365
print(maxDays)
oneYearInterval=365
print(oneYearInterval)

# Username variable
user_id = 'rc047407'           ####<-------------------- change to your user id (Ex: tl068507)
userName = 'user_' + user_id
print(userName)

# Algorithm name variable
algorithmName='CoronavirusLengthOfStay_HighLevel'
print(algorithmName)

365
365
365
user_rc047407
CoronavirusLengthOfStay_HighLevel


In [8]:
spark.sql("""CREATE SCHEMA IF NOT EXISTS {userName}""".format(**{
    'userName':userName
})).show()

++
||
++
++



In [9]:
spark.sql("""select * from encounter """).printSchema()

root
 |-- encounterid: string (nullable = true)
 |-- personid: string (nullable = true)
 |-- reasonforvisit: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- financialclass: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullabl

In [10]:
spark.sql("""select * from demographics""").printSchema()

root
 |-- personid: string (nullable = true)
 |-- birthdate: string (nullable = true)
 |-- dateofdeath: string (nullable = true)
 |-- gender: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- birthsex: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |

In [11]:
spark.sql("""select * from tenant_attributes""").printSchema()

root
 |-- tenant: string (nullable = true)
 |-- zip_code: string (nullable = true)
 |-- bed_size: string (nullable = true)
 |-- speciality: string (nullable = true)
 |-- segment: string (nullable = true)



In [10]:
## Intensive care service
spark.sql(f"""select distinct hospitalservice.standard.primaryDisplay 
             from encounter e
             where HAS_ANY_CONCEPT_IN_CONTEXT(e.encounterTypes.type,
              array('INPATIENT_VISIT_ENC'),
                               '{defaultcontext}' )
      
                               
            and  ( exists  (   SELECT 1 
                               FROM {db}.condition c
                                WHERE e.personId = c.personId
                                  and e.encounterId = c.encounterId
                                  and e.tenant = c.tenant
                                  and ( HAS_ANY_CONCEPT_IN_CONTEXT(c.conditionCode, array('CORONAVIRUS_COVID_19_DISEASE_CLIN',
                                        'CORONAVIRUS_COVID_19_POSITIVE_CLIN'), '{defaultcontext}' ) )
                               --   and ( HAS_ANY_CONCEPT_IN_CONTEXT(c.conditionCode, array('CORONAVIRUS_COVID_19_DISEASE_CLIN',
                               --         'CORONAVIRUS_COVID_19_POSITIVE_CLIN', 'CORONAVIRUS_COVID_19_EXPOSURE_TO_CLIN',
                               --         'CORONAVIRUS_COVID_19_TRAVEL_TO_AREA_AFFECTED_CLIN'), '{defaultcontext}' ) )
                            )
               or exists (   SELECT 1 
                              FROM {db}.lab r
                               WHERE e.encounterId = r.encounterId
                                  and HAS_ANY_CONCEPT_IN_CONTEXT(r.labCode, array('SARS_CORONAVIRUS_2_COVID_19_OBSTYPE'), '{defaultcontext}' )    ---  SARS_CORONAVIRUS_2_COVID_19_OBSTYPE  Negative
                                            AND ( has_any_concept_in_context(r.typedvalue.codifiedValues.values.value,
                                               array('POSITIVE_QUAL'), 
                                                    '5E259FD575B54D4982D32D4E92DCA831' )
                                              )    
                
                            )         
                 )  
                 
         and hospitalservice.standard.primaryDisplay = "Intensive care service"       
                             
""").toPandas()

KeyboardInterrupt: 

In [None]:
spark.sql("""select * from lab """).printSchema()

In [12]:
spark.sql("""select * from questionnaire """).printSchema()

root
 |-- questionnaireid: string (nullable = true)
 |-- personid: string (nullable = true)
 |-- name: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- status: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |  

In [11]:
spark.sql("""select distinct tenant
                    ,personId

                from demographics
                
                order by 2,1
             
                limit(25)
                """).toPandas()

Unnamed: 0,tenant,personId
0,13,00000007-4c76-45de-8b82-8052b3564944
1,40,00000077-3603-4025-bb94-2cb28f0232e2
2,77,0000008b-4c47-498d-b0db-4dcbad1e4e84
3,37,000000a7-0c12-4af2-abd9-dedeb2c59d7b
4,6,000000c5-0fec-4cb7-b397-fb88e4a0851b
5,45,00000116-2fb9-4b5a-8ce2-4a3ce130ba90
6,56,00000137-d832-47c9-a3a0-4e7c2e6ecd75
7,77,00000142-71f8-4345-8cbc-5ed346a2bfa1
8,35,00000240-2272-4c71-b7e5-11f6e04c68b4
9,44,00000258-0a18-49d2-b7f8-c13790a2ae99


In [12]:
# spark.sql("""select gender.standard.primaryDisplay as gender
#                     ,birthsex.standard.primaryDisplay as birthsex
#                     ,maritalstatus.standard.primaryDisplay as maritalstatus
#                     ,races.standard.primaryDisplay as races
#                     ,zip_code
#                     ,birthDate
#                 from demographics
             
#                 limit(25)
#                 """).toPandas()

In [13]:
spark.sql("""select * from demographics""").printSchema()

root
 |-- personid: string (nullable = true)
 |-- birthdate: string (nullable = true)
 |-- dateofdeath: string (nullable = true)
 |-- gender: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- birthsex: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |

In [14]:
spark.sql("""select * from encounter""").printSchema()

root
 |-- encounterid: string (nullable = true)
 |-- personid: string (nullable = true)
 |-- reasonforvisit: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- financialclass: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullabl

In [15]:
spark.sql("""select * from procedure""").printSchema()

root
 |-- procedureid: string (nullable = true)
 |-- personid: string (nullable = true)
 |-- encounterid: string (nullable = true)
 |-- procedurecode: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- modifiercodes: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- standard: struct (nullable = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |    |    |-- standardCodings: arr

In [16]:
spark.sql("""select * from medication""").printSchema()

root
 |-- medicationid: string (nullable = true)
 |-- encounterid: string (nullable = true)
 |-- personid: string (nullable = true)
 |-- intendeddispenser: string (nullable = true)
 |-- startdate: string (nullable = true)
 |-- intendedadministrator: string (nullable = true)
 |-- doseunit: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- stopdate: string (nullable = true)
 |-- category: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (n

In [17]:
spark.sql("""select * from tenant_attributes """).printSchema()

root
 |-- tenant: string (nullable = true)
 |-- zip_code: string (nullable = true)
 |-- bed_size: string (nullable = true)
 |-- speciality: string (nullable = true)
 |-- segment: string (nullable = true)



In [18]:
spark.sql("""select * from tenant_attributes order by 1""").toPandas()

Unnamed: 0,tenant,zip_code,bed_size,speciality,segment
0,1,5,500-999,IDN/Regional Health Authority,Regional Hospital
1,10,6,500-999,Hospital,Regional Hospital
2,100,4,<100,Critical Access Hospital,Community Healthcare
3,101,2,200-299,Hospital,Childrens
4,102,6,<100,Community Hospital,Critical Access
...,...,...,...,...,...
96,95,7,<100,Critical Access Hospital,Community Healthcare
97,96,6,500-999,Academic Medical Center,Academic
98,97,6,<100,Critical Access Hospital,Community Healthcare
99,98,6,<100,Hospital,Community Healthcare


####  ZIP Codes Beginning With	States
    0	Connecticut, Massachusetts, Maine, New Hampshire, New Jersey, Puerto Rico, Rhode Island, Vermont, Virgin Islands, Army Post Office Europe, Fleet Post Office Europe
    1	Delaware, New York, Pennsylvania
    2	District of Columbia, Maryland, North Carolina, South Carolina, Virginia, West Virginia
    3	Alabama, Florida, Georgia, Mississippi, Tennessee, Army Post Office Americas, Fleet Post Office Americas
    4	Indiana, Kentucky, Michigan, Ohio
    5	Iowa, Minnesota, Montana, North Dakota, South Dakota, Wisconsin
    6	Illinois, Kansas, Missouri, Nebraska
    7	Arkansas, Louisiana, Oklahoma, Texas
    8	Arizona, Colorado, Idaho, New Mexico, Nevada, Utah, Wyoming
    9	Alaska, American Samoa, California, Guam, Hawaii, Marshall Islands, Federated States of Micronesia, Northern Mariana Islands, Oregon, Palau, Washington, Army Post Office Pacific, Fleet Post Office Pacific

In [11]:
hospitalDf = spark.sql(f"""
    SELECT distinct
        e.tenant
        ,t.bed_size as tenant_bed_size
        ,t.speciality as tenant_speciality
        ,t.segment as tenant_segment
        ,t.zip_code as tenant_zip_code
        ,e.personId
        ,e.encounterId
        ,e.serviceDate
        ,e.dischargeDate
        ,datediff(e.dischargeDate, e.serviceDate)+1 as length_of_stay
        ,d.zip_code as zip_code
        ,if (d.deceased = true, 1, 0) as deceased
        ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.dischargedisposition,
              array('DISCHARGED_TO_SKILLED_NURSING_FACILITY_DCDIS'),
                               '{defaultcontext}' ), 1, 0) as discharged_to_skilled_nursing_facility
        ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.dischargedisposition,
              array('HOSPICE_DCDIS'),
                               '{defaultcontext}' ), 1, 0) as discharged_to_hospice   
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.dischargedisposition,
              array('EXPIRED_DCDIS'),
                               '{defaultcontext2}' ), 1, 0) as died_in_the_hospital
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.dischargedisposition,
              array('DISCHARGED_TO_INPATIENT_REHAB_FACILITY_DCDIS'),
                               '{defaultcontext1}' ), 1, 0) as discharged_to_rehab_facility
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.dischargedisposition,
              array('DISCHARGED_TO_LONG_TERM_CARE_HOSPITAL_DCDIS', 'LONG_TERM_CARE_HOSPITAL_DCDIS'),
                               '{defaultcontext1}' ), 1, 0) as discharged_to_long_term_care_facility  
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.encounterTypes.type,
              array('INTENSIVE_CARE_VISIT_ENC'),
                               '{defaultcontext}' ), 1, 0) as intensive_care_visit 
       ,if(hospitalservice.standard.primaryDisplay = "Intensive care service", 1, 0) as intensive_care_medical_service                             
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.encounterTypes.type,
              array('PALLIATIVE_CARE_ENC'),
                               '{defaultcontext}' ), 1, 0) as palliative_care_visit 
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.hospitalServices.service,
              array('PALLIATIVE_CARE_MEDSERV'),
                               '{defaultcontext}' ), 1, 0) as palliative_care_medical_service                          
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.encounterTypes.type,
              array('HOSPICE_CARE_ENC'),
                               '{defaultcontext}' ), 1, 0) as hospice_care_visit   
       ,if( HAS_ANY_CONCEPT_IN_CONTEXT(e.hospitalServices.service,
              array('HOSPICE_CARE_MEDSERV'),
                               '{defaultcontext}' ), 1, 0) as hospice_care_medical_service                            
       ,ROUND((datediff(date_add(current_date(), - 0), d.birthDate))/365,0) as age
       ,CASE  
            WHEN HAS_ANY_CONCEPT_IN_CONTEXT(d.gender, array('FEMALE_GEN'),'{defaultcontext}' ) THEN 'female' 
            WHEN HAS_ANY_CONCEPT_IN_CONTEXT(d.gender, array('MALE_GEN'),'{defaultcontext}' ) THEN 'male'
            ELSE 'unknown'
         END as gender   

        FROM {db}.encounter e

        JOIN {db}.demographics d
            ON e.personId = d.personId
              and e.tenant = d.tenant
               
        JOIN {db}.tenant_attributes t
            ON  e.tenant = t.tenant            
                                   
        WHERE  ( HAS_ANY_CONCEPT_IN_CONTEXT(e.encounterTypes.type, array('INPATIENT_VISIT_ENC'), '{defaultcontext}' ) 
                  or HAS_ANY_CONCEPT_IN_CONTEXT(e.type, array('INPATIENT_VISIT_ENC'), '{defaultcontext}' ) )
            and e.dischargeDate is not null
            and e.serviceDate <= e.dischargeDate
            
            and  ( exists  (   SELECT 1 
                               FROM {db}.condition c
                                WHERE e.personId = c.personId
                                  and e.encounterId = c.encounterId
                                  and e.tenant = c.tenant
                                  and ( HAS_ANY_CONCEPT_IN_CONTEXT(c.conditionCode, array('CORONAVIRUS_COVID_19_DISEASE_CLIN',
                                        'CORONAVIRUS_COVID_19_POSITIVE_CLIN'), '{defaultcontext}' ) )
                               )
               or exists (   SELECT 1 
                              FROM {db}.lab r
                               WHERE e.encounterId = r.encounterId
                                  and HAS_ANY_CONCEPT_IN_CONTEXT(r.labCode, array('SARS_CORONAVIRUS_2_COVID_19_OBSTYPE'), '{defaultcontext}' )    ---  SARS_CORONAVIRUS_2_COVID_19_OBSTYPE  Negative
                                            AND ( has_any_concept_in_context(r.typedvalue.codifiedValues.values.value,
                                               array('POSITIVE_QUAL'), 
                                                    '5E259FD575B54D4982D32D4E92DCA831' )
                                              )    
                
                            )         
                 )      
       

""")

In [12]:
# Drop existing table, if there was one 
spark.sql(f"""DROP TABLE IF EXISTS {userName}.{algorithmName}_Hospital""").show() 

++
||
++
++



In [13]:
hospitalDf.write.saveAsTable(f"""{userName}.{algorithmName}_Hospital """)

In [14]:
spark.sql(f"""select *
   from {userName}.{algorithmName}_Hospital
 
    """).count()

316061

In [15]:
spark.sql(f"""select * from {userName}.{algorithmName}_Hospital """).printSchema()

root
 |-- tenant: integer (nullable = true)
 |-- tenant_bed_size: string (nullable = true)
 |-- tenant_speciality: string (nullable = true)
 |-- tenant_segment: string (nullable = true)
 |-- tenant_zip_code: string (nullable = true)
 |-- personId: string (nullable = true)
 |-- encounterId: string (nullable = true)
 |-- serviceDate: string (nullable = true)
 |-- dischargeDate: string (nullable = true)
 |-- length_of_stay: integer (nullable = true)
 |-- zip_code: string (nullable = true)
 |-- deceased: integer (nullable = true)
 |-- discharged_to_skilled_nursing_facility: integer (nullable = true)
 |-- discharged_to_hospice: integer (nullable = true)
 |-- died_in_the_hospital: integer (nullable = true)
 |-- discharged_to_rehab_facility: integer (nullable = true)
 |-- discharged_to_long_term_care_facility: integer (nullable = true)
 |-- intensive_care_visit: integer (nullable = true)
 |-- intensive_care_medical_service: integer (nullable = true)
 |-- palliative_care_visit: integer (nullab

In [12]:
spark.sql(f"""select *
   from {userName}.{algorithmName}_Hospital
 --  where length_of_stay > 0
 --    and discharged_to_skilled_nursing_facility = 0
     where intensive_care_medical_service = 1
      and zip_code > 0
 order by 5,2,3,4,6,7 
 
 limit(10)
    """).toPandas()

Unnamed: 0,tenant,tenant_bed_size,tenant_speciality,tenant_segment,tenant_zip_code,personId,encounterId,serviceDate,dischargeDate,length_of_stay,zip_code,deceased,discharged_to_skilled_nursing_facility,discharged_to_hospice,died_in_the_hospital,discharged_to_rehab_facility,discharged_to_long_term_care_facility,intensive_care_visit,intensive_care_medical_service,palliative_care_visit,palliative_care_medical_service,hospice_care_visit,hospice_care_medical_service,age,gender
0,51,500-999,Hospital,IDN,0,291432ee-939d-41f6-b0e0-ae7056e8b81b,b34b1d06-47d2-432b-824e-2a018f06aa75,2020-02-22T16:49:00+00:00,2020-03-01T01:04:00+00:00,9,1,1,0,0,1,0,0,0,1,0,0,0,0,77.0,male
1,90,500-999,Academic Medical Center,Academic,1,023501a2-9f7c-4a44-a6db-403951f1ea04,0032a916-4e5c-46d8-a52a-b520558cfb3c,2020-07-08T05:04:00+00:00,2020-07-23T23:28:00+00:00,16,1,1,0,0,1,0,0,0,1,0,0,0,0,80.0,male
2,90,500-999,Academic Medical Center,Academic,1,02c2363a-8e6d-4a55-bf07-54d1884c5755,ea7bca30-5e5b-43cb-9555-0f33e7d3a791,2020-04-02T05:58:44+00:00,2020-04-03T20:32:00+00:00,2,1,1,0,0,1,0,0,0,1,0,0,0,0,79.0,male
3,90,500-999,Academic Medical Center,Academic,1,02c2363a-8e6d-4a55-bf07-54d1884c5755,ea7bca30-5e5b-43cb-9555-0f33e7d3a791,2020-04-02T05:58:44+00:00,2020-04-03T20:32:00+00:00,2,1,0,0,0,1,0,0,0,1,0,0,0,0,79.0,male
4,90,500-999,Academic Medical Center,Academic,1,03a407a3-d6e1-42d1-910e-db803b19a3b9,75a9b685-16fe-4660-bc10-e9e4e6bbd0e8,2020-02-27T16:23:44+00:00,2020-03-11T02:30:00+00:00,14,1,1,0,0,1,0,0,0,1,0,0,0,0,58.0,male
5,90,500-999,Academic Medical Center,Academic,1,0407ee63-3872-4087-8e02-c488974817f8,9eba4bad-8593-44c7-a167-0f81cc22ea54,2020-03-30T00:25:25+00:00,2020-04-17T02:08:00+00:00,19,1,1,0,0,1,0,0,0,1,0,0,0,0,64.0,male
6,90,500-999,Academic Medical Center,Academic,1,06d32803-df4f-4cfb-98ba-4782661779cc,e4486cbc-ef2d-4c8e-9e7b-629e0484bb1d,2019-03-03T01:33:00+00:00,2019-03-06T05:45:00+00:00,4,1,1,0,0,1,0,0,0,1,0,0,0,0,77.0,female
7,90,500-999,Academic Medical Center,Academic,1,06d32803-df4f-4cfb-98ba-4782661779cc,e4486cbc-ef2d-4c8e-9e7b-629e0484bb1d,2019-03-03T01:33:00+00:00,2019-03-06T05:45:00+00:00,4,1,0,0,0,1,0,0,0,1,0,0,0,0,77.0,female
8,90,500-999,Academic Medical Center,Academic,1,07232c63-82bb-4c71-ab24-660be1f5ec9f,dadbf9ef-4b36-4649-af16-75c3b4443c39,2020-03-16T14:19:32+00:00,2020-03-30T10:45:00+00:00,15,1,1,0,0,1,0,0,0,1,0,0,0,0,29.0,male
9,90,500-999,Academic Medical Center,Academic,1,07232c63-82bb-4c71-ab24-660be1f5ec9f,dadbf9ef-4b36-4649-af16-75c3b4443c39,2020-03-16T14:19:32+00:00,2020-03-30T10:45:00+00:00,15,1,0,0,0,1,0,0,0,1,0,0,0,0,29.0,male


In [17]:
spark.sql(f"""
select age_range
       ,total_population
       
       ,total_not_vaccinated 
       ,round(total_not_vaccinated/total_population,4)*100 as percent_not_vaccinated
       
       ,total_before_vaccination
       ,round(total_before_vaccination/total_population,4)*100 as percent_total_before_vaccination
       ,total_after_vaccination
       ,round(total_after_vaccination/total_population,4)*100 as percent_total_after_vaccination
      
       ,total_deceased
       ,round(total_deceased/total_population,4)*100 as percent_deceased
       ,total_died_in_the_hospital
       ,round(total_died_in_the_hospital/total_population,4)*100 as percent_deceased
       ,total_discharged_to_hospice
       ,round(total_discharged_to_hospice/total_population,4)*100 as percent_discharged_to_hospice
       ,total_discharged_to_skilled_nursing_facility
       ,round(total_discharged_to_skilled_nursing_facility/total_population,4)*100 as percent_discharged_to_skilled_nursing_facility
       ,total_discharged_to_rehab_facility
       ,round(total_discharged_to_rehab_facility/total_population,4)*100 as percent_discharged_to_rehab_facility
       ,total_discharged_to_long_term_care_facility
       ,round(total_discharged_to_long_term_care_facility/total_population,4)*100 as percent_discharged_to_long_term_care_facility
       ,total_male
       ,round(total_male/total_population,4)*100 as percent_male
       ,total_female
       ,round(total_female/total_population,4)*100 as percent_female
       ,total_unknown_gender
       ,round(total_unknown_gender/total_population,4)*100 as percent_unknown_gender
   from (    
        select stats.*
              ,(total_population - total_male - total_female) as total_unknown_gender
              ,(total_population - total_vaccinated ) as total_not_vaccinated
              
        from (
           select
              case
                when age >= 90 then "90+"
                when age >= 85 and age <= 89 then "85-89"
                when age >= 80 and age <= 84 then "80-85"
                when age >= 75 and age <= 79 then "75-79"
                when age >= 70 and age <= 74 then "70-74"
                when age >= 65 and age <= 69 then "65-69"
                when age >= 60 and age <= 64 then "60-65"
                when age >= 55 and age <= 59 then "55-59"
                when age >= 50 and age <= 54 then "50-54"
                when age >= 45 and age <= 49 then "45-49"
                when age >= 40 and age <= 44 then "40-45"
                when age >= 35 and age <= 39 then "35-39"
                when age >= 30 and age <= 34 then "30-34"
                when age >= 25 and age <= 29 then "25-29"
                when age >= 20 and age <= 24 then "20-25"
                when age >= 15 and age <= 19 then "15-19"
                when age >= 10 and age <= 14 then "10-14"
                when age >=  5 and age <=  9 then "05-09"
                else "00-04"
        --          else "18-25"
              end as age_range
              ,count(distinct after_vaccination.personId) as total_after_vaccination
              ,count(distinct before_vaccination.personId) as total_before_vaccination
              ,count(distinct vaccinated.personId) as total_vaccinated
              ,count(distinct hospital.personId) as total_population
              ,count(distinct deceased.personId) as total_deceased
              ,count(distinct died_in_the_hospital.personId) as total_died_in_the_hospital
              ,count(distinct discharged_to_hospice.personId) as total_discharged_to_hospice
              ,count(distinct discharged_to_skilled_nursing_facility.personId) as total_discharged_to_skilled_nursing_facility
              ,count(distinct discharged_to_rehab_facility.personId) as total_discharged_to_rehab_facility
              ,count(distinct discharged_to_long_term_care_facility.personId) as total_discharged_to_long_term_care_facility
              ,count(distinct male.personId) as total_male
              ,count(distinct female.personId) as total_female

            from  {userName}.{algorithmName}_Hospital hospital

            left join {userName}.{algorithmName}_Vaccine after_vaccination
                     on hospital.personid = after_vaccination.personId
                       and ( datediff(hospital.serviceDate, after_vaccination.vaccine_date) > 0
                           and after_vaccination.vaccine_date is not null)  
                           
            left join {userName}.{algorithmName}_Vaccine before_vaccination
                     on hospital.personid = before_vaccination.personId
                       and ( datediff(hospital.serviceDate, before_vaccination.vaccine_date) <= 0
                            or before_vaccination.vaccine_date is null)                 
                
            left join {userName}.{algorithmName}_Vaccine vaccinated
                     on hospital.personid = vaccinated.personId
                
           
            left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital
                    where age >= 0
                       and ( deceased=1 or died_in_the_hospital=1 ) ) deceased
                on deceased.personId = hospital.personId   

            left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital
                    where age >= 0
                       and died_in_the_hospital=1 ) died_in_the_hospital
                on died_in_the_hospital.personId = hospital.personId    

            left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital
                    where age >= 0
                       and discharged_to_hospice=1 ) discharged_to_hospice
                on discharged_to_hospice.personId = hospital.personId   

            left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital
                    where age >= 0
                       and discharged_to_skilled_nursing_facility=1 ) discharged_to_skilled_nursing_facility
                on discharged_to_skilled_nursing_facility.personId = hospital.personId    

            left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital
                    where age >= 0
                       and discharged_to_rehab_facility=1 ) discharged_to_rehab_facility
                on discharged_to_rehab_facility.personId = hospital.personId       

            left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital
                    where age >= 0
                       and discharged_to_long_term_care_facility=1 ) discharged_to_long_term_care_facility
                on discharged_to_long_term_care_facility.personId = hospital.personId               

             left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital h
                    where age >= 0
                       and gender='male'
                       and not exists (select 1 
                                         from {userName}.{algorithmName}_Hospital h2
                                         where h.personId = h2.personId
                                               and h2.age >= 0
                                               and h2.gender='female')) male
                on male.personId = hospital.personId   

             left join  (
                  select distinct personId
                   from {userName}.{algorithmName}_Hospital h
                    where age >= 0
                       and gender='female'
                       and not exists (select 1 
                                         from {userName}.{algorithmName}_Hospital h2
                                         where h.personId = h2.personId
                                               and h2.age >= 0
                                               and h2.gender='male'))  female
                on female.personId = hospital.personId  



          --    where age >= 18 

              group by 1
           ) stats   
      ) stats2

 order by 1 desc
    """).toPandas()

Unnamed: 0,age_range,total_population,total_not_vaccinated,percent_not_vaccinated,total_before_vaccination,percent_total_before_vaccination,total_after_vaccination,percent_total_after_vaccination,total_deceased,percent_deceased,total_died_in_the_hospital,percent_deceased.1,total_discharged_to_hospice,percent_discharged_to_hospice,total_discharged_to_skilled_nursing_facility,percent_discharged_to_skilled_nursing_facility,total_discharged_to_rehab_facility,percent_discharged_to_rehab_facility,total_discharged_to_long_term_care_facility,percent_discharged_to_long_term_care_facility,total_male,percent_male,total_female,percent_female,total_unknown_gender,percent_unknown_gender
0,90+,9560,9415,98.48,137,1.43,12,0.13,3335,34.88,2558,26.76,1136,11.88,2703,28.27,193,2.02,100,1.05,4024,42.09,5517,57.71,19,0.2
1,85-89,10986,10744,97.8,228,2.08,25,0.23,3508,31.93,2852,25.96,829,7.55,2584,23.52,237,2.16,150,1.37,5308,48.32,5653,51.46,25,0.23
2,80-85,14526,14151,97.42,357,2.46,34,0.23,4109,28.29,3485,23.99,689,4.74,2840,19.55,313,2.15,210,1.45,7441,51.23,7046,48.51,39,0.27
3,75-79,17820,17359,97.41,437,2.45,34,0.19,4446,24.95,3823,21.45,560,3.14,2873,16.12,342,1.92,334,1.87,9514,53.39,8251,46.3,55,0.31
4,70-74,18737,18248,97.39,470,2.51,26,0.14,3988,21.28,3475,18.55,350,1.87,2329,12.43,342,1.83,392,2.09,10153,54.19,8547,45.62,37,0.2
5,65-69,18546,18078,97.48,448,2.42,24,0.13,3321,17.91,2964,15.98,243,1.31,1846,9.95,337,1.82,364,1.96,9993,53.88,8504,45.85,49,0.26
6,60-65,17756,17392,97.95,358,2.02,11,0.06,2606,14.68,2317,13.05,163,0.92,1298,7.31,247,1.39,257,1.45,9855,55.5,7846,44.19,55,0.31
7,55-59,15084,14785,98.02,291,1.93,14,0.09,1673,11.09,1517,10.06,83,0.55,786,5.21,205,1.36,223,1.48,8568,56.8,6485,42.99,31,0.21
8,50-54,12479,12259,98.24,216,1.73,11,0.09,1016,8.14,921,7.38,53,0.42,480,3.85,145,1.16,135,1.08,7179,57.53,5266,42.2,34,0.27
9,45-49,9114,8937,98.06,173,1.9,8,0.09,650,7.13,589,6.46,22,0.24,194,2.13,93,1.02,76,0.83,5148,56.48,3947,43.31,19,0.21


In [19]:
spark.sql(f"""
   select *
     from {userName}.{algorithmName}_Vaccine v
    
""").toPandas()

Unnamed: 0,tenant,personId,encounterId,encounterType,serviceDate,vaccine_date,vaccinated
0,10,cc9e21bc-5628-4964-a029-84706b3d8b30,c01c29a9-f58f-450d-bc7b-4abeaf4db5d2,Outpatient,2021-04-13T14:00:00+00:00,2021-03-17T15:58:16+00:00,1
1,63,083bfe43-23cb-4e59-85ab-f42bfb86be74,5a0f5dec-998d-4f32-b2ee-7ff7749774ea,Attending clinic,2021-02-24T18:00:00+00:00,2021-02-24T15:06:00+00:00,1
2,99,a09e5e4a-b77e-4416-a378-4900a955257b,3c1075e7-f73b-4c66-8538-818dfd8355b9,Seen in clinic,2020-12-10T20:37:35+00:00,2021-01-25T14:20:00+00:00,1
3,36,e704b4c9-eda3-4f34-9fec-8fde3eefe974,2db0b4f3-8ee4-44e0-915a-e9de82f0ef2f,Attending clinic,2021-03-10T22:25:00+00:00,2021-03-10T22:26:11+00:00,1
4,77,124ae674-7d6a-4d9f-91e6-f940f2381092,2b03828c-3741-411a-9a03-4984ea19fdfc,Attending clinic,2021-03-25T18:00:00+00:00,2021-03-25T18:19:00+00:00,1
...,...,...,...,...,...,...,...
6336,65,55e2e01f-d7b4-4803-a286-e17a1a4abaf2,79654dcd-61ac-4065-98d6-b9451d607f77,Seen in clinic,2021-02-01T17:45:00+00:00,2021-01-30T00:03:00+00:00,1
6337,10,dd4c8be5-570e-44ec-aa41-130e2eaf350c,3e70a55e-1e4f-49bc-93a1-7c5eb8c8218e,Outpatient,2021-03-30T12:39:11+00:00,2021-03-30T12:41:35+00:00,1
6338,14,b647a8af-766a-492b-bb64-9d9fbaf341f0,8cab3cc5-ed6b-45ac-8f32-2353be3c61cc,,2021-03-13T15:11:00+00:00,2021-03-13T15:16:22+00:00,1
6339,63,20a01ff1-8fb8-43fe-be10-3d63e5093a87,7af6acb6-922d-45ce-b36f-ec00d133367e,Attending clinic,2021-03-03T16:00:00+00:00,2021-03-03T16:31:00+00:00,1


In [10]:
spark.sql(f"""select *
   from {userName}.{algorithmName}_Hospital

    """).printSchema()

root
 |-- tenant: integer (nullable = true)
 |-- tenant_bed_size: string (nullable = true)
 |-- tenant_speciality: string (nullable = true)
 |-- tenant_segment: string (nullable = true)
 |-- tenant_zip_code: string (nullable = true)
 |-- personId: string (nullable = true)
 |-- encounterId: string (nullable = true)
 |-- serviceDate: string (nullable = true)
 |-- dischargeDate: string (nullable = true)
 |-- length_of_stay: integer (nullable = true)
 |-- zip_code: string (nullable = true)
 |-- deceased: integer (nullable = true)
 |-- discharged_to_skilled_nursing_facility: integer (nullable = true)
 |-- discharged_to_hospice: integer (nullable = true)
 |-- died_in_the_hospital: integer (nullable = true)
 |-- discharged_to_rehab_facility: integer (nullable = true)
 |-- discharged_to_long_term_care_facility: integer (nullable = true)
 |-- intensive_care_visit: integer (nullable = true)
 |-- intensive_care_medical_service: integer (nullable = true)
 |-- palliative_care_visit: integer (nullab

In [15]:
spark.sql(f"""select --tenant_bed_size
                    --,tenant_speciality
                    --,tenant_segment
                    tenant_zip_code
               --     ,tenant_bed_size
                    ,count(distinct tenant) number_of_hospitals
                    ,count(distinct personId) number_of_covid_patients
   from {userName}.{algorithmName}_Hospital
   group by 1 
   order by 1 
    """).toPandas()

Unnamed: 0,tenant_zip_code,number_of_hospitals,number_of_covid_patients
0,0,7,8545
1,1,4,38870
2,2,8,21205
3,3,7,19106
4,4,8,5478
5,5,10,4154
6,6,24,12981
7,7,10,16141
8,8,7,30818
9,9,11,20401


In [17]:
spark.sql(f"""select zip_code
                     ,count(distinct tenant)
   from tenant_attributes
   group by 1 
   order by 1 
    """).toPandas()

Unnamed: 0,zip_code,count(DISTINCT tenant)
0,0,7
1,1,4
2,2,8
3,3,7
4,4,8
5,5,11
6,6,26
7,7,11
8,8,7
9,9,12


In [13]:
spark.sql(f"""select segment
                     ,count(distinct tenant)
   from tenant_attributes
   group by 1 
   order by 1 
    """).toPandas()

Unnamed: 0,segment,count(DISTINCT tenant)
0,Academic,9
1,Childrens,6
2,Community Healthcare,21
3,Community Hospital,11
4,Critical Access,7
5,IDN,20
6,Reginal Health System,1
7,Regional Hospital,25
8,Specialty Hospital,1


In [15]:
spark.sql(f"""select count(distinct tenant)
   from tenant_attributes

    """).toPandas()

Unnamed: 0,count(DISTINCT tenant)
0,101


In [17]:
spark.sql(f"""select bed_size
                     ,count(distinct tenant)
   from tenant_attributes
   group by 1 
   order by 1 
    """).toPandas()

Unnamed: 0,bed_size,count(DISTINCT tenant)
0,,2
1,100-199,5
2,200-299,8
3,300-499,15
4,500-999,20
5,<100,40
6,>=1000,11


In [12]:
spark.sql(f"""select --tenant_bed_size
                    --,tenant_speciality
                    --,tenant_segment
                    tenant_zip_code
               --     ,tenant_bed_size
               --     ,count(distinct tenant) number_of_hospitals
                    ,count(distinct personId) number_of_covid_patients
   from {userName}.{algorithmName}_Hospital
   group by 1 
   order by 1 
    """).toPandas()

Unnamed: 0,tenant_zip_code,number_of_covid_patients
0,0,8545
1,1,38870
2,2,21205
3,3,19106
4,4,5478
5,5,4154
6,6,12981
7,7,16141
8,8,30818
9,9,20401


In [48]:
spark.sql(f"""

    select  sum(female) as female, sum(male) as male, sum(unknown) as unknown, sum(patients) as patients
    from (

           select gen.age_range
             ,sum(if(gender=='female',gen.patients,0)) as female
             ,sum(if(gender=='male',gen.patients,0)) as male
             ,tot.patients - sum(if(gender=='female',gen.patients,0)) - sum(if(gender=='male',gen.patients,0)) as unknown
             ,tot.patients
           from
           (
           select
              case
                when age >= 90 then "90+"
                when age >= 85 and age < 90 then "85-89"
                when age >= 80 and age < 85 then "80-85"
                when age >= 75 and age < 80 then "75-79"
                when age >= 70 and age < 75 then "70-74"
                when age >= 65 and age < 70 then "65-69"
                when age >= 60 and age < 65 then "60-65"
                when age >= 55 and age < 60 then "55-59"
                when age >= 50 and age < 55 then "50-54"
                when age >= 45 and age < 50 then "45-49"
                when age >= 40 and age < 45 then "40-45"
                when age >= 35 and age < 40 then "35-39"
                when age >= 30 and age < 35 then "30-34"
                when age >= 25 and age < 30 then "25-29"
                when age >= 20 and age < 25 then "20-25"
                when age >= 12 and age < 20 then "12-19"
                when age >= 0 and age < 12 then "0-12"
                when age <0 then "negative"
                when age is null then "none"
        --        when age >=  5 and age < 10 then "05-09"
        --        else "00-04"
        --          else "18-25"
                end as age_range
                ,gender  
                ,count(distinct personId) patients
                from ( select age, first(gender) as gender, personId
                           from {userName}.{algorithmName}_Hospital 
                           group by 1, 3
                            ) gen2
               group by 1, gender
               order by 1  
               ) gen
                
          join ( select
              case
                when age >= 90 then "90+"
                when age >= 85 and age < 90 then "85-89"
                when age >= 80 and age < 85 then "80-85"
                when age >= 75 and age < 80 then "75-79"
                when age >= 70 and age < 75 then "70-74"
                when age >= 65 and age < 70 then "65-69"
                when age >= 60 and age < 65 then "60-65"
                when age >= 55 and age < 60 then "55-59"
                when age >= 50 and age < 55 then "50-54"
                when age >= 45 and age < 50 then "45-49"
                when age >= 40 and age < 45 then "40-45"
                when age >= 35 and age < 40 then "35-39"
                when age >= 30 and age < 35 then "30-34"
                when age >= 25 and age < 30 then "25-29"
                when age >= 20 and age < 25 then "20-25"
                when age >= 12 and age < 20 then "12-19"
                when age >= 0 and age < 12 then "0-12"
                when age <0 then "negative"
                when age is null then "none"
        --        when age >=  5 and age < 10 then "05-09"
        --        else "00-04"
        --          else "18-25"
                end as age_range
                ,count(distinct personId) as patients
               from {userName}.{algorithmName}_Hospital
               group by 1) tot
               on gen.age_range = tot.age_range
               group by 1, tot.patients
               order by 1
     )     
     
""").toPandas()

Unnamed: 0,female,male,unknown,patients
0,85270,91954,8070,185294


In [10]:
spark.sql(f"""select
               gender                
               ,count(distinct personId) number_of_covid_patients
               from {userName}.{algorithmName}_Hospital
               group by 1 
               order by 1 
               
""").toPandas()

Unnamed: 0,gender,number_of_covid_patients
0,female,85725
1,male,92329
2,unknown,9044
