## Cross-Dataset Analysis

In [1]:
dataset_id = " reporting"
!bq --location=US mk --dataset {dataset_id}

Dataset 'studied-brand-266702:reporting' successfully created.


### Implementation of cross-dataset queries

#### Query #1: Compare number of adverse events grouped by age in the US in 2018, through a full outer join between drug dataset (dataset 2) and vaccine dataset (dataset 1)  

In [103]:
%%bigquery
select faers.Age, Drug_Cases, Vaccine_Cases
from 
(select AGE_YRS as Age, count(DISTINCT p.CASE_ID) as Drug_Cases
 from faers_modeled.Patient_SQL_Final p inner join faers_modeled.Adverse_Event e on p.CASE_ID = e.CASE_ID
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US'
 group by AGE_YRS) faers
full join
(select AGE_YRS as Age, count(DISTINCT p.VAERS_ID) as Vaccine_Cases
 from vaers_modeled.Patient p inner join vaers_modeled.Adverse_Event_Beam_DF e on p.VAERS_ID = e.VAERS_ID
 where ONSET_DATE between '2018-1-1' and '2018-12-31' 
 group by AGE_YRS) vaers
on faers.Age = vaers.Age
where faers.Age is not null
order by Age

Unnamed: 0,Age,Drug_Cases,Vaccine_Cases
0,0,196,760.0
1,1,132,1575.0
2,2,136,511.0
3,3,127,193.0
4,4,108,995.0
...,...,...,...
100,100,5,1.0
101,101,3,
102,102,1,
103,103,1,


#### Create database view from cross-dataset query above (Age vs Drug_Cases and Vaccine_Cases) for Google Data Studio.

In [104]:
%%bigquery
create or replace view reporting.Cases_By_Age as
select faers.Age, Drug_Cases, Vaccine_Cases
from 
(select AGE_YRS as Age, count(DISTINCT p.CASE_ID) as Drug_Cases
 from studied-brand-266702.faers_modeled.Patient_SQL_Final p inner join studied-brand-266702.faers_modeled.Adverse_Event e on p.CASE_ID = e.CASE_ID
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US'
 group by AGE_YRS) faers
full join
(select AGE_YRS as Age, count(DISTINCT p.VAERS_ID) as Vaccine_Cases
 from studied-brand-266702.vaers_modeled.Patient p inner join studied-brand-266702.vaers_modeled.Adverse_Event_Beam_DF e on p.VAERS_ID = e.VAERS_ID
 where ONSET_DATE between '2018-1-1' and '2018-12-31' 
 group by AGE_YRS) vaers
on faers.Age = vaers.Age
where faers.Age is not null
order by Age

#### Query #2: Compare number of cases, deaths and hospitalizations by month in the US in 2018, through an inner join between drug dataset (dataset 2) and vaccine dataset (dataset 1)  

In [97]:
%%bigquery
select faers.Month, Drug_Cases, Drug_Death, Drug_Hospitalization, Vaccine_Cases, Vaccine_Death, Vaccine_Hospitalization
from
(select extract(month from EVENT_DATE) as Month, COUNT(DISTINCT CASE_ID) as Drug_Cases, COUNTIF(OUTCOME = 'DE') as Drug_Death, COUNTIF(OUTCOME = 'HO') as Drug_Hospitalization
 from faers_modeled.Adverse_Event
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US'
 group by Month) faers
inner join
(select extract(month from ONSET_DATE) as Month, COUNT(DISTINCT VAERS_ID) as Vaccine_Cases, COUNTIF(DIED = true) as Vaccine_Death, COUNTIF(HOSPITAL = true) as Vaccine_Hospitalization
 from vaers_modeled.Adverse_Event_Beam_DF
 where ONSET_DATE between '2018-1-1' and '2018-12-31'
 group by Month) vaers
on faers.Month = vaers.Month
order by Month

Unnamed: 0,Month,Drug_Cases,Drug_Death,Drug_Hospitalization,Vaccine_Cases,Vaccine_Death,Vaccine_Hospitalization
0,1,1722,101,313,1730,8,57
1,2,1775,106,311,1790,7,57
2,3,2156,95,365,1994,5,48
3,4,2400,130,430,2809,3,68
4,5,2912,162,523,2872,5,91
5,6,3721,211,696,2771,3,58
6,7,25787,994,4186,2787,1,76
7,8,7739,449,1483,3430,3,78
8,9,11901,810,2467,4225,4,133
9,10,17130,1168,3223,5562,15,158


#### Create database view from cross-dataset query above for Google Data Studio.

In [98]:
%%bigquery
create or replace view reporting.Cases_By_Month as
select faers.Month, Drug_Cases, Drug_Death, Drug_Hospitalization, Vaccine_Cases, Vaccine_Death, Vaccine_Hospitalization
from
(select extract(month from EVENT_DATE) as Month, COUNT(DISTINCT CASE_ID) as Drug_Cases, COUNTIF(OUTCOME = 'DE') as Drug_Death, COUNTIF(OUTCOME = 'HO') as Drug_Hospitalization
 from studied-brand-266702.faers_modeled.Adverse_Event
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US'
 group by Month) faers
inner join
(select extract(month from ONSET_DATE) as Month, COUNT(DISTINCT VAERS_ID) as Vaccine_Cases, COUNTIF(DIED = true) as Vaccine_Death, COUNTIF(HOSPITAL = true) as Vaccine_Hospitalization
 from studied-brand-266702.vaers_modeled.Adverse_Event_Beam_DF
 where ONSET_DATE between '2018-1-1' and '2018-12-31'
 group by Month) vaers
on faers.Month = vaers.Month
order by Month

#### Query #3: Compare number of cases, deaths, hospitalizations, life threats and disabilities by gender in the US in 2018, through an inner join between drug dataset (dataset 2) and vaccine dataset (dataset 1)  

In [105]:
%%bigquery
select faers.Sex, Drug_Cases, Drug_Death, Drug_Hospitalization, Drug_Life_Threat, Drug_Disability, Vaccine_Cases, Vaccine_Death, Vaccine_Hospitalization, Vaccine_Life_Threat, Vaccine_Disability
from
(select SEX as Sex, COUNT(DISTINCT p.CASE_ID) as Drug_Cases, COUNTIF(OUTCOME = 'DE') as Drug_Death, COUNTIF(OUTCOME = 'HO') as Drug_Hospitalization, COUNTIF(OUTCOME = 'LT') as Drug_Life_Threat, COUNTIF(OUTCOME = 'DS') as Drug_Disability
 from faers_modeled.Patient_SQL_Final p inner join faers_modeled.Adverse_Event e on p.CASE_ID = e.CASE_ID
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US' and (Sex = 'M' or Sex = 'F')
 group by SEX) faers
inner join
(select SEX as Sex, COUNT(DISTINCT p.VAERS_ID) as Vaccine_Cases, COUNTIF(DIED = true) as Vaccine_Death, COUNTIF(HOSPITAL = true) as Vaccine_Hospitalization, COUNTIF(L_THREAT = true) as Vaccine_Life_Threat, COUNTIF(DISABLE = true) as Vaccine_Disability
 from vaers_modeled.Patient p inner join vaers_modeled.Adverse_Event_Beam_DF e on p.VAERS_ID = e.VAERS_ID
 where (ONSET_DATE between '2018-1-1' and '2018-12-31') and (Sex = 'M' or Sex = 'F')
 group by SEX) vaers
on faers.SEX = vaers.SEX
order by Sex

Unnamed: 0,Sex,Drug_Cases,Drug_Death,Drug_Hospitalization,Drug_Life_Threat,Drug_Disability,Vaccine_Cases,Vaccine_Death,Vaccine_Hospitalization,Vaccine_Life_Threat,Vaccine_Disability
0,F,58077,2401,9459,639,714,21337,27,465,89,192
1,M,32628,2820,7462,577,367,10233,36,472,121,123


#### Create database view from cross-dataset query above for Google Data Studio.

In [106]:
%%bigquery
create or replace view reporting.Cases_By_Gender as
select faers.Sex, Drug_Cases, Drug_Death, Drug_Hospitalization, Drug_Life_Threat, Drug_Disability, Vaccine_Cases, Vaccine_Death, Vaccine_Hospitalization, Vaccine_Life_Threat, Vaccine_Disability
from
(select SEX as Sex, COUNT(DISTINCT p.CASE_ID) as Drug_Cases, COUNTIF(OUTCOME = 'DE') as Drug_Death, COUNTIF(OUTCOME = 'HO') as Drug_Hospitalization, COUNTIF(OUTCOME = 'LT') as Drug_Life_Threat, COUNTIF(OUTCOME = 'DS') as Drug_Disability
 from studied-brand-266702.faers_modeled.Patient_SQL_Final p inner join studied-brand-266702.faers_modeled.Adverse_Event e on p.CASE_ID = e.CASE_ID
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US' and (Sex = 'M' or Sex = 'F')
 group by SEX) faers
inner join
(select SEX as Sex, COUNT(DISTINCT p.VAERS_ID) as Vaccine_Cases, COUNTIF(DIED = true) as Vaccine_Death, COUNTIF(HOSPITAL = true) as Vaccine_Hospitalization, COUNTIF(L_THREAT = true) as Vaccine_Life_Threat, COUNTIF(DISABLE = true) as Vaccine_Disability
 from studied-brand-266702.vaers_modeled.Patient p inner join studied-brand-266702.vaers_modeled.Adverse_Event_Beam_DF e on p.VAERS_ID = e.VAERS_ID
 where (ONSET_DATE between '2018-1-1' and '2018-12-31') and (Sex = 'M' or Sex = 'F')
 group by SEX) vaers
on faers.SEX = vaers.SEX
order by Sex

#### Query #4 (Additional): Compare number of cases, deaths and hospitalizations by date in the US in 2018, through a inner join between drug dataset (dataset 2) and vaccine dataset (dataset 1)  

In [101]:
%%bigquery
select faers.Date, Drug_Cases, Drug_Death, Drug_Hospitalization, Vaccine_Cases, Vaccine_Death, Vaccine_Hospitalization
from
(select EVENT_DATE as Date, COUNT(DISTINCT CASE_ID) as Drug_Cases, COUNTIF(OUTCOME = 'DE') as Drug_Death, COUNTIF(OUTCOME = 'HO') as Drug_Hospitalization
 from faers_modeled.Adverse_Event
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US'
 group by EVENT_DATE) faers
inner join
(select ONSET_DATE as Date, COUNT(DISTINCT VAERS_ID) as Vaccine_Cases, COUNTIF(DIED = true) as Vaccine_Death, COUNTIF(HOSPITAL = true) as Vaccine_Hospitalization
 from vaers_modeled.Adverse_Event_Beam_DF
 where ONSET_DATE between '2018-1-1' and '2018-12-31'
 group by ONSET_DATE) vaers
on faers.Date = vaers.Date
order by Date

Unnamed: 0,Date,Drug_Cases,Drug_Death,Drug_Hospitalization,Vaccine_Cases,Vaccine_Death,Vaccine_Hospitalization
0,2018-01-01,77,2,11,70,1,7
1,2018-01-02,34,5,7,32,0,2
2,2018-01-03,38,2,7,45,0,2
3,2018-01-04,34,3,7,58,0,0
4,2018-01-05,35,7,5,45,1,2
...,...,...,...,...,...,...,...
360,2018-12-27,84,1,4,32,0,0
361,2018-12-28,89,0,5,28,0,0
362,2018-12-29,23,1,0,23,0,0
363,2018-12-30,15,0,2,4,0,0


#### Create database view from cross-dataset query above for Google Data Studio.

In [102]:
%%bigquery
create or replace view reporting.Cases_By_Date as
select faers.Date, Drug_Cases, Drug_Death, Drug_Hospitalization, Vaccine_Cases, Vaccine_Death, Vaccine_Hospitalization
from
(select EVENT_DATE as Date, COUNT(DISTINCT CASE_ID) as Drug_Cases, COUNTIF(OUTCOME = 'DE') as Drug_Death, COUNTIF(OUTCOME = 'HO') as Drug_Hospitalization
 from studied-brand-266702.faers_modeled.Adverse_Event
 where (EVENT_DATE between '2018-1-1' and '2018-12-31') and COUNTRY = 'US'
 group by EVENT_DATE) faers
inner join
(select ONSET_DATE as Date, COUNT(DISTINCT VAERS_ID) as Vaccine_Cases, COUNTIF(DIED = true) as Vaccine_Death, COUNTIF(HOSPITAL = true) as Vaccine_Hospitalization
 from studied-brand-266702.vaers_modeled.Adverse_Event_Beam_DF
 where ONSET_DATE between '2018-1-1' and '2018-12-31'
 group by ONSET_DATE) vaers
on faers.Date = vaers.Date
order by Date