# Coronavirus Data Analysis

In [1]:
import pandas as pd
import numpy as np

In [2]:
spark.sparkContext.applicationId

'application_1631937024592_0300'

In [3]:
pd.options.display.max_columns = 35

In [4]:
from foresight.discern import broadcast_discern, push_discern, pop_discern

### care management new default will be 5e259fd5-75b5-4d49-82d3-2d4e92dca831 for any new clients or 
###  any algorithm wiki not line for line with Cerner Standard then would be using the new context  

pd.set_option('display.max_colwidth', None)

# Location of the discernontology database
root = 's3://consult-datalab-persistence-s3-data/discernontology/v1/'   ##<------------- change it to your stack without the v1/

contextid = ['da0acee6-1e2e-4384-9fc7-a58c259d0c50', '5E259FD575B54D4982D32D4E92DCA831'] 

# Loop that removes all hypthens and captializes all letters. Sets up the file location
for contextid in contextid:
    contextid = contextid.upper().replace("-", "")
    push_discern(spark, contextid, discern_root=root)
    print(contextid)

defaultcontext = 'DA0ACEE61E2E43849FC7A58C259D0C50'   
defaultcontext1 = '5E259FD575B54D4982D32D4E92DCA831' 

DA0ACEE61E2E43849FC7A58C259D0C50
5E259FD575B54D4982D32D4E92DCA831


In [5]:
db = "real_world_data_2021_Q2"
spark.sql("USE {}".format(db))
print("Using database: {},".format(db))

Using database: real_world_data_2021_Q2,


In [6]:
spark.sql('show tables').toPandas()

Unnamed: 0,database,tableName,isTemporary
0,real_world_data_2021_q2,allergy,False
1,real_world_data_2021_q2,clinical_event,False
2,real_world_data_2021_q2,condition,False
3,real_world_data_2021_q2,demographics,False
4,real_world_data_2021_q2,encounter,False
5,real_world_data_2021_q2,immunization,False
6,real_world_data_2021_q2,lab,False
7,real_world_data_2021_q2,measurement,False
8,real_world_data_2021_q2,medication,False
9,real_world_data_2021_q2,medication_administration,False


### Create Variables for Queries

In [7]:
# Look back days variables
daysInterval = 365
print(daysInterval)
maxDays = 365
print(maxDays)
oneYearInterval=365
print(oneYearInterval)

# Username variable
user_id = 'rc047407'           ####<-------------------- change to your user id (Ex: tl068507)
userName = 'user_' + user_id
print(userName)

# Algorithm name variable
algorithmName='CoronavirusLengthOfStay_HighLevel'
print(algorithmName)

365
365
365
user_rc047407
CoronavirusLengthOfStay_HighLevel


In [8]:
spark.sql("""CREATE SCHEMA IF NOT EXISTS {userName}""".format(**{
    'userName':userName
})).show()

++
||
++
++



In [9]:
spark.sql(f"""select * FROM {userName}.{algorithmName}_Hospital h  """).printSchema()

root
 |-- tenant: integer (nullable = true)
 |-- tenant_bed_size: string (nullable = true)
 |-- tenant_speciality: string (nullable = true)
 |-- tenant_segment: string (nullable = true)
 |-- tenant_zip_code: string (nullable = true)
 |-- personId: string (nullable = true)
 |-- encounterId: string (nullable = true)
 |-- serviceDate: string (nullable = true)
 |-- dischargeDate: string (nullable = true)
 |-- length_of_stay: integer (nullable = true)
 |-- zip_code: string (nullable = true)
 |-- deceased: integer (nullable = true)
 |-- discharged_to_skilled_nursing_facility: integer (nullable = true)
 |-- discharged_to_hospice: integer (nullable = true)
 |-- died_in_the_hospital: integer (nullable = true)
 |-- discharged_to_rehab_facility: integer (nullable = true)
 |-- discharged_to_long_term_care_facility: integer (nullable = true)
 |-- age: double (nullable = true)
 |-- gender: string (nullable = true)



In [10]:
spark.sql("""select * from procedure """).printSchema()

root
 |-- procedureid: string (nullable = true)
 |-- personid: string (nullable = true)
 |-- encounterid: string (nullable = true)
 |-- procedurecode: struct (nullable = true)
 |    |-- standard: struct (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- codingSystemId: string (nullable = true)
 |    |    |-- primaryDisplay: string (nullable = true)
 |    |-- standardCodings: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |-- modifiercodes: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- standard: struct (nullable = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- codingSystemId: string (nullable = true)
 |    |    |    |-- primaryDisplay: string (nullable = true)
 |    |    |-- standardCodings: arr

In [11]:
procedureDf = spark.sql(f"""


    SELECT distinct 
        h.tenant
        ,h.personId
        ,h.serviceDate
        ,h.dischargeDate
        ,p.procedureId
        ,p.serviceStartDate
        ,p.serviceEndDate
        ,if(length(p.serviceStartDate)>3, datediff(current_date(), p.serviceStartDate),
                     datediff(current_date(),h.serviceDate)) as procedure_performed_in_past_days
        ,round((if(length(p.serviceStartDate)>3, datediff(current_date(), p.serviceStartDate),
                     datediff(current_date(),h.serviceDate)) ) / 365, 2) as procedure_performed_in_past_years
        ,if(HAS_ANY_CONCEPT_IN_CONTEXT(p.procedureCode,
                            array('BONE_MARROW_TRANSPLANT_OR_TRANSFUSION_PROC'),
                            '{defaultcontext}'),1,0) as had_bone_marrow_transplant_or_transfusion
        ,if(HAS_ANY_CONCEPT_IN_CONTEXT(p.procedureCode,
                            array('SOLID_ORGAN_TRANSPLANT_STATUS_PROC'),
                            '{defaultcontext}'),1,0) as had_solid_organ_transplant    


  
        FROM {userName}.{algorithmName}_Hospital h  

       

        JOIN {db}.procedure p
            ON  h.personId = p.personId
                and h.tenant = p.tenant
                
                AND HAS_ANY_CONCEPT_IN_CONTEXT(p.procedureCode,
                            array('BONE_MARROW_TRANSPLANT_OR_TRANSFUSION_PROC','SOLID_ORGAN_TRANSPLANT_STATUS_PROC'),
                               '{defaultcontext}' ) 
                AND NOT HAS_ANY_CONCEPT_IN_CONTEXT(p.status, array('ENTERED_IN_ERROR_PROBSTAT', 'ERROR_ENTRY_DELETED_QUAL', 
                                                       'CANCELED_PROBSTAT', 'CANCELED_QUAL', 'DEFERRED_QUAL',
                                                       'UNAUTHORIZED_QUAL', 'VOIDED_QUAL', 'REJECTED_QUAL'), '5E259FD575B54D4982D32D4E92DCA831')  
                
                            
      
                       
   
   
""")

In [12]:
spark.sql(f"""DROP TABLE IF EXISTS {userName}.{algorithmName}_Procedure""").show() 

++
||
++
++



In [13]:
procedureDf.write.saveAsTable(f"""{userName}.{algorithmName}_Procedure""")

In [14]:
procedureDf.count()

1178

In [15]:
spark.sql(f"""select p.tenant
                    ,p.personId
                    ,p.serviceDate
                    ,p.dischargeDate
                    ,p.serviceStartDate
                    ,p.serviceEndDate
                    ,procedure_performed_in_past_days
                    ,procedure_performed_in_past_years
                    ,had_bone_marrow_transplant_or_transfusion
                    ,had_solid_organ_transplant 
            from {userName}.{algorithmName}_Procedure p
            
          order by 1,2,3,4
        limit(25)
""").toPandas()

Unnamed: 0,tenant,personId,serviceDate,dischargeDate,serviceStartDate,serviceEndDate,procedure_performed_in_past_days,procedure_performed_in_past_years,had_bone_marrow_transplant_or_transfusion,had_solid_organ_transplant
0,1,3c7f1e75-36ac-4909-b07e-f47932ce103f,2020-09-27T04:43:00+00:00,2020-11-03T23:25:56+00:00,,,362,0.99,0,1
1,2,73b9b3e6-fa81-43fe-8fbc-6ebdb064160a,2021-02-17T20:56:50+00:00,2021-02-20T00:10:00+00:00,2018-11-05T07:00:00+00:00,,1054,2.89,0,1
2,6,45f9b669-e3b5-47a5-ad87-9900fce38bbb,2020-12-09T00:00:00,2020-12-21T00:00:00,2020-12-09T00:00:00,2020-12-09T00:00:00,289,0.79,1,0
3,6,77275659-3175-4e03-9676-e53ce09f89f2,2020-11-24T19:57:00+00:00,2020-11-30T18:56:00+00:00,2012-07-13T04:00:00+00:00,,3360,9.21,0,1
4,6,77275659-3175-4e03-9676-e53ce09f89f2,2020-11-24T19:57:00+00:00,2020-11-30T18:56:00+00:00,2012-07-13T04:00:00+00:00,,3360,9.21,0,1
5,6,7d12960a-1387-4b64-9e11-701f200bb998,2021-02-23T20:00:00+00:00,2021-03-06T21:40:00+00:00,,,213,0.58,0,1
6,6,7de05e47-a2c5-4a4c-bd8f-68ded7cac6a3,2020-07-20T00:00:00,2020-07-30T00:00:00,,,431,1.18,0,1
7,6,7de05e47-a2c5-4a4c-bd8f-68ded7cac6a3,2020-07-20T18:36:00+00:00,2020-07-30T16:09:00+00:00,,,431,1.18,0,1
8,6,818676c2-8bcf-479e-9838-03e1c0430291,2020-09-22T00:00:00,2020-09-26T00:00:00,2020-08-12T00:00:00,2020-08-12T00:00:00,408,1.12,1,0
9,6,818676c2-8bcf-479e-9838-03e1c0430291,2020-09-22T21:29:00+00:00,2020-09-26T20:46:00+00:00,2020-08-12T00:00:00,2020-08-12T00:00:00,408,1.12,1,0
