### Model VAERS Dataset

In [1]:
dataset_id = "vaers_modeled"
!bq --location=US mk --dataset {dataset_id}

Dataset 'studied-brand-266702:vaers_modeled' successfully created.


### Split staging Events table into separate entities: Patient table and Adverse_Event table 

#### Create Patient table

In [1]:
%%bigquery
create or replace table vaers_modeled.Patient as
select DISTINCT ROW_NUMBER() OVER(ORDER BY VAERS_ID) as PATIENT_ID, VAERS_ID, STATE, CAST(AGE_YRS AS INT64) AS AGE_YRS, SEX, ALLERGIES
from vaers_staging.Events

In [2]:
%%bigquery
select * from vaers_modeled.Patient

Unnamed: 0,PATIENT_ID,VAERS_ID,STATE,AGE_YRS,SEX,ALLERGIES
0,871,733449,,,M,
1,2428,735423,,,M,
2,2699,735795,,,U,
3,3023,736218,,,U,
4,3193,736460,,,M,
...,...,...,...,...,...,...
49160,30861,772806,WY,74.0,F,tegretol
49161,33041,775306,WY,75.0,F,no known allergies
49162,7228,742332,WY,77.0,U,
49163,43629,787434,WY,79.0,U,


#### Create Adverse_Event table

In [4]:
%%bigquery
create or replace table vaers_modeled.Adverse_Event as
select VAERS_ID, ONSET_DATE, RECOVD, DIED, DATEDIED, L_THREAT, OFC_VISIT, ER_VISIT, ER_ED_VISIT, HOSPITAL, HOSPDAYS, X_STAY, DISABLE, BIRTH_DEFECT, OTHER_MEDS, CUR_ILL, HISTORY, PRIOR_VAX 
from vaers_staging.Events

In [5]:
%%bigquery
select * from vaers_modeled.Adverse_Event

Unnamed: 0,VAERS_ID,ONSET_DATE,RECOVD,DIED,DATEDIED,L_THREAT,OFC_VISIT,ER_VISIT,ER_ED_VISIT,HOSPITAL,HOSPDAYS,X_STAY,DISABLE,BIRTH_DEFECT,OTHER_MEDS,CUR_ILL,HISTORY,PRIOR_VAX
0,732483,,U,,,,,,,,,,,,,,,
1,733092,,U,,,,,,,,,,,,,,,
2,734264,,N,,,,True,,,,,,,,,,,
3,734585,,U,,,,,,,,,,,,,,,
4,734883,,U,,,,,,,,,,,,STERILE DILUENT,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49160,752214,2018-05-12,,,,,,,,,,,,,,None. Routine follow up,Back pain; Bilat knee pain; HTN/hyperlipidemia...,
49161,748671,2018-05-12,Y,,,,,,,,,,,,COSAMIN DS; Aspirin,Stomach virus 3 weeks prior,Arthritis,
49162,751451,2018-05-12,U,,,,,,,,,,,,SINGULAIR; aspirin; multivitamin; fish oil,Upset stomach 2 days before,Asthma,
49163,749668,2018-05-12,Y,,,,,,,,,,,,XARELTO 20 MG PO QD SERTRALINE 100 MG PO QAM P...,Small rash prior to vaccination- not mentioned...,Unknown,


### Generate Primary Key for Symptom table. No other changes made to table as Symptom is an entity by itself.

In [8]:
%%bigquery
create or replace table vaers_modeled.Symptom as
select ROW_NUMBER() OVER(ORDER BY VAERS_ID) as SYMPTOM_ID, VAERS_ID, SYMPTOM1, SYMPTOM2, SYMPTOM3, SYMPTOM4, SYMPTOM5 
from vaers_staging.Symptoms

In [9]:
%%bigquery
select * from vaers_modeled.Symptom

Unnamed: 0,SYMPTOM_ID,VAERS_ID,SYMPTOM1,SYMPTOM2,SYMPTOM3,SYMPTOM4,SYMPTOM5
0,17,732230,Drug administered at inappropriate site,Injected limb mobility decreased,Injection site pain,Pain,
1,41,732299,Abdominal pain,Arthralgia,Blood thyroid stimulating hormone normal,C-reactive protein normal,Dizziness
2,140,732484,Chest discomfort,Dysgeusia,Dysphagia,Pain,
3,218,732592,Autoimmune disorder,Crohn's disease,Iritis,Paraesthesia,
4,418,732819,Cold sweat,Dizziness postural,Electrocardiogram normal,Eyelid injury,Feeling abnormal
...,...,...,...,...,...,...,...
60186,27786,762672,Injection site erythema,Injection site induration,Injection site warmth,,
60187,40713,775306,Injection site erythema,Injection site induration,Injection site warmth,,
60188,49596,783702,Injection site erythema,Injection site induration,Injection site mass,Injection site pain,Injection site swelling
60189,51879,785892,Injection site erythema,Injection site induration,Injection site warmth,,


### Split staging Vaccines table into separate entities: Vaccine and Manufacturer

#### Create Vaccine table

In [10]:
%%bigquery
create or replace table vaers_modeled.Vaccine as
select ROW_NUMBER() OVER(ORDER BY VAX_NAME) as VAX_ID, VAX_NAME, VAX_TYPE
from vaers_staging.Vaccines
group by VAX_NAME, VAX_TYPE

In [11]:
%%bigquery
select * from vaers_modeled.Vaccine
order by VAX_NAME

Unnamed: 0,VAX_ID,VAX_NAME,VAX_TYPE
0,1,"ADENOVIRUS TYPES 4 & 7, LIVE, ORAL (NO BRAND N...",ADEN_4_7
1,2,ANTHRAX (BIOTHRAX),ANTH
2,3,ANTHRAX (NO BRAND NAME),ANTH
3,4,BCG (NO BRAND NAME),BCG
4,5,BCG (TICE),BCG
...,...,...,...
127,128,YELLOW FEVER (STAMARIL),YF
128,129,YELLOW FEVER (YF-VAX),YF
129,130,ZOSTER (NO BRAND NAME),VARZOS
130,131,ZOSTER (SHINGRIX),VARZOS


#### Create Manufacturer table

In [12]:
%%bigquery
create or replace table vaers_modeled.Manufacturer as
select ROW_NUMBER() OVER(ORDER BY VAX_MANU) as MANU_ID, VAX_MANU
from vaers_staging.Vaccines
group by VAX_MANU

In [13]:
%%bigquery
select * from vaers_modeled.Manufacturer
order by VAX_MANU

Unnamed: 0,MANU_ID,VAX_MANU
0,1,"BERNA BIOTECH, LTD."
1,2,CONNAUGHT LTD.
2,3,CSL LIMITED
3,4,DYNAVAX TECHNOLOGIES CORPORATION
4,5,EMERGENT BIOSOLUTIONS
5,6,GLAXOSMITHKLINE BIOLOGICALS
6,7,INTERCELL AG
7,8,MASS. PUB HLTH BIOL LAB
8,9,"MEDIMMUNE VACCINES, INC."
9,10,MERCK & CO. INC.


### Join attributes from staging Events, staging Vaccines, modeled Vaccine and modeled Manufacturer to form new entity: Vaccination

#### The Vaccination table will also serve as the junction table for the modeled Vaccine and Manufacturer tables

In [14]:
%%bigquery
create or replace table vaers_modeled.Vaccination as
select ROW_NUMBER() OVER(ORDER BY e.VAERS_ID) as VACCINATION_ID, e.VAERS_ID, e.VAX_DATE, mv.VAX_ID, m.MANU_ID, e.V_ADMINBY, e.V_FUNDBY, v.VAX_ROUTE, v.VAX_SITE
from (vaers_staging.Events e inner join vaers_staging.Vaccines v on e.VAERS_ID = v.VAERS_ID)
    left join vaers_modeled.Vaccine mv on mv.VAX_NAME = v.VAX_NAME
    left join vaers_modeled.Manufacturer m on m.VAX_MANU = v.VAX_MANU
group by e.VAERS_ID, e.VAX_DATE, mv.VAX_ID, m.MANU_ID, e.V_ADMINBY, e.V_FUNDBY, v.VAX_ROUTE, v.VAX_SITE

In [15]:
%%bigquery
select * from vaers_modeled.Vaccination 

Unnamed: 0,VACCINATION_ID,VAERS_ID,VAX_DATE,VAX_ID,MANU_ID,V_ADMINBY,V_FUNDBY,VAX_ROUTE,VAX_SITE
0,1,732217,2017-12-05,122,22,PHM,,,
1,73,732349,2017-09-19,87,22,UNK,,UN,UN
2,90,732368,2017-06-21,27,22,PVT,,SYR,LG
3,358,732701,2018-01-03,120,18,PUB,,SYR,AR
4,1172,733508,2016-11-17,61,12,OTH,OTH,IM,UN
...,...,...,...,...,...,...,...,...,...
62332,58983,790935,,132,10,PVT,,UN,
62333,60024,792025,,132,10,PHM,,UN,UN
62334,60214,792224,,132,10,PUB,,UN,UN
62335,60668,792692,,132,10,OTH,,UN,


### Identify Primary Key (PK) for each modeled table

#### Patient table : PK is PATIENT_ID

In [16]:
%%bigquery
select count(*) as total_records from vaers_modeled.Patient

Unnamed: 0,total_records
0,49165


In [17]:
%%bigquery
select count(distinct PATIENT_ID) as distinct_id from vaers_modeled.Patient

Unnamed: 0,distinct_id
0,49165


#### Adverse_Event table: PK is VAERS_ID

In [18]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event

Unnamed: 0,total_records
0,49165


In [19]:
%%bigquery
select count(distinct VAERS_ID) as distinct_id from vaers_modeled.Adverse_Event

Unnamed: 0,distinct_id
0,49165


#### Symptom table: PK is SYMPTOM_ID

In [20]:
%%bigquery
select count(*) as total_records from vaers_modeled.Symptom

Unnamed: 0,total_records
0,60191


In [21]:
%%bigquery
select count(distinct SYMPTOM_ID) as distinct_id from vaers_modeled.Symptom

Unnamed: 0,distinct_id
0,60191


#### Vaccine table: PK is VAX_ID

In [22]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccine

Unnamed: 0,total_records
0,132


In [23]:
%%bigquery
select count(distinct VAX_ID) as distinct_id from vaers_modeled.Vaccine

Unnamed: 0,distinct_id
0,132


#### Manufacturer table: PK is MANU_ID

In [24]:
%%bigquery
select count(*) as total_records from vaers_modeled.Manufacturer

Unnamed: 0,total_records
0,22


In [25]:
%%bigquery
select count(distinct MANU_ID) as distinct_id from vaers_modeled.Manufacturer

Unnamed: 0,distinct_id
0,22


#### Vaccination table: PK is VACCINATION_ID

In [26]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination

Unnamed: 0,total_records
0,62337


In [27]:
%%bigquery
select count(distinct VACCINATION_ID) as distinct_id from vaers_modeled.Vaccination

Unnamed: 0,distinct_id
0,62337


### Check for presence of duplicate records in each modeled table

In [28]:
%%bigquery
select count(*) as total_records from vaers_modeled.Patient

Unnamed: 0,total_records
0,49165


In [29]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Patient) 

Unnamed: 0,distinct_records
0,49165


In [30]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event

Unnamed: 0,total_records
0,49165


In [31]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Adverse_Event) 

Unnamed: 0,distinct_records
0,49165


In [32]:
%%bigquery
select count(*) as total_records from vaers_modeled.Symptom

Unnamed: 0,total_records
0,60191


In [33]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Symptom) 

Unnamed: 0,distinct_records
0,60191


In [34]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccine

Unnamed: 0,total_records
0,132


In [35]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Vaccine) 

Unnamed: 0,distinct_records
0,132


In [36]:
%%bigquery
select count(*) as total_records from vaers_modeled.Manufacturer

Unnamed: 0,total_records
0,22


In [37]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Manufacturer) 

Unnamed: 0,distinct_records
0,22


In [38]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination

Unnamed: 0,total_records
0,62337


In [39]:
%%bigquery
SELECT COUNT(*) as distinct_records FROM (SELECT DISTINCT * FROM vaers_modeled.Vaccination) 

Unnamed: 0,distinct_records
0,62337


### >> No duplicates found on all modeled tables

### Check for referential integrity violations 

In [40]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Patient p left join vaers_modeled.Adverse_Event e on e.VAERS_ID = p.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [41]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Symptom s left join vaers_modeled.Adverse_Event e on e.VAERS_ID = s.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [42]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Vaccination v left join vaers_modeled.Adverse_Event e on e.VAERS_ID = v.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [43]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Vaccination vt left join vaers_modeled.Vaccine v on vt.VAX_ID = v.VAX_ID 
where v.VAX_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


In [44]:
%%bigquery
select count(*) as count_of_missing_PK 
from vaers_modeled.Vaccination vt left join vaers_modeled.Manufacturer m on vt.MANU_ID = m.MANU_ID 
where m.MANU_ID is null 

Unnamed: 0,count_of_missing_PK
0,0


### >> No referential integrity violations on all modeled tables

## Beam pipeline

### Standardize data in Adverse_Event table using Beam pipeline (Direct Runner)

In [45]:
%run Adverse_Event_beam.py

  experiments = p.options.view_as(DebugOptions).experiments or []
INFO:apache_beam.runners.direct.direct_runner:Running pipeline with DirectRunner.
INFO:apache_beam.internal.gcp.auth:Setting socket default timeout to 60 seconds.
INFO:apache_beam.internal.gcp.auth:socket default timeout is 60.0 seconds.
INFO:oauth2client.transport:Attempting refresh to obtain initial access_token
INFO:apache_beam.io.gcp.bigquery_tools:Using location 'US' from table <TableReference
 datasetId: 'vaers_modeled'
 projectId: 'studied-brand-266702'
 tableId: 'Adverse_Event'> referenced by query SELECT * FROM vaers_modeled.Adverse_Event limit 50


Current RECOVD:  N
New RECOVD:  False
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None
Current RECOVD:  U
New RECOVD:  None


INFO:apache_beam.io.filebasedsink:Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
INFO:apache_beam.io.filebasedsink:Renamed 1 shards in 0.11 seconds.
INFO:apache_beam.io.filebasedsink:Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
INFO:apache_beam.io.filebasedsink:Renamed 1 shards in 0.10 seconds.
INFO:apache_beam.io.filebasedsink:Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
INFO:apache_beam.io.filebasedsink:Renamed 1 shards in 0.10 seconds.
INFO:apache_beam.io.filebasedsink:Starting finalize_write threads with num_shards: 1 (skipped: 0), batches: 1, num_threads: 1
INFO:apache_beam.io.filebasedsink:Renamed 1 shards in 0.11 seconds.
INFO:apache_beam.io.gcp.bigquery_tools:Created table studied-brand-266702.vaers_modeled.Adverse_Event_Beam with schema <TableSchema
 fields: [<TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAERS_ID'
 type: 'INTEGER'

### Verify presence of Primary Key in Beam result table

#### Adverse_Event_Beam table: PK is VAERS_ID

In [46]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event_Beam

Unnamed: 0,total_records
0,50


In [47]:
%%bigquery
select count(distinct VAERS_ID) as distinct_id from vaers_modeled.Adverse_Event_Beam

Unnamed: 0,distinct_id
0,50


#### Adverse_Event_Beam table is a parent table and does not have any Foreign Key

### Standardize data in Vaccination table using Beam pipeline (Direct Runner)

In [48]:
%run Vaccination_beam.py

  experiments = p.options.view_as(DebugOptions).experiments or []
INFO:apache_beam.runners.direct.direct_runner:Running pipeline with DirectRunner.
INFO:apache_beam.io.gcp.bigquery_tools:Using location 'US' from table <TableReference
 datasetId: 'vaers_modeled'
 projectId: 'studied-brand-266702'
 tableId: 'Vaccination'> referenced by query SELECT * FROM vaers_modeled.Vaccination limit 50


Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None None None
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None None None
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None None None
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None None None
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None UN None
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK UN UN
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  OTH IM AR
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  OTH IM AR
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None SYR AR
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK SYR AR
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None SYR AR
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK SYR AR
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None IM LA
New V_FUNDBY, VAX_ROUTE and VAX_SITE:  UNK IM LA
Current V_FUNDBY, VAX_ROUTE and VAX_SITE:  None IM L

INFO:apache_beam.io.gcp.bigquery_tools:Created table studied-brand-266702.vaers_modeled.Vaccination_Beam with schema <TableSchema
 fields: [<TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VACCINATION_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAERS_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_DATE'
 type: 'DATE'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'MANU_ID'
 type: 'INTEGER'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'V_ADMINBY'
 type: 'STRING'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'V_FUNDBY'
 type: 'STRING'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_ROUTE'
 type: 'STRING'>, <TableFieldSchema
 fields: []
 mode: 'NULLABLE'
 name: 'VAX_SITE'
 type: 'STRING'>]>. Result: <Table
 creationTime: 1587878594849
 etag: 'KiFEYKRd+2hGyS44gSxoyw=='
 id: 'studied-bra

### Verify presence of Primary Key in Beam result table

#### Vaccination_Beam table: PK is VACCINATION_ID

In [49]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination_Beam

Unnamed: 0,total_records
0,50


In [50]:
%%bigquery
select count(distinct VACCINATION_ID) as distinct_id from vaers_modeled.Vaccination_Beam

Unnamed: 0,distinct_id
0,50


#### Vaccination_Beam table: FK is VAERS_ID from Adverse_Event table

In [51]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam v left join vaers_modeled.Adverse_Event e on e.VAERS_ID = v.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam table: FK is VAX_ID from Vaccine table

In [52]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam v left join vaers_modeled.Vaccine va on va.VAX_ID = v.VAX_ID 
where va.VAX_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam table: FK is MANU_ID from Manufacturer table

In [53]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam v left join vaers_modeled.Manufacturer m on m.MANU_ID = v.MANU_ID 
where m.MANU_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


### Standardize data in Adverse_Event table using Beam pipeline (Dataflow Runner)

In [54]:
%run Adverse_Event_beam_dataflow.py

  kms_key=transform.kms_key))


### Verify presence of Primary Key in Beam result table

#### Adverse_Event_Beam_DF table: PK is VAERS_ID

In [55]:
%%bigquery
select count(*) as total_records from vaers_modeled.Adverse_Event_Beam_DF

Unnamed: 0,total_records
0,49165


In [56]:
%%bigquery
select count(distinct VAERS_ID) as distinct_id from vaers_modeled.Adverse_Event_Beam_DF

Unnamed: 0,distinct_id
0,49165


#### Adverse_Event_Beam_DF table is a parent table and does not have any Foreign Key

### Standardize data in Vaccination table using Beam pipeline (Dataflow Runner)

In [66]:
%run Vaccination_beam_dataflow.py

  kms_key=transform.kms_key))


### Verify presence of Primary Key in Beam result table

#### Vaccination_Beam_DF table: PK is VACCINATION_ID

In [67]:
%%bigquery
select count(*) as total_records from vaers_modeled.Vaccination_Beam_DF

Unnamed: 0,total_records
0,62337


In [68]:
%%bigquery
select count(distinct VACCINATION_ID) as distinct_id from vaers_modeled.Vaccination_Beam_DF

Unnamed: 0,distinct_id
0,62337


#### Vaccination_Beam_DF table: FK is VAERS_ID from Adverse_Event table

In [69]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam_DF v left join vaers_modeled.Adverse_Event e on e.VAERS_ID = v.VAERS_ID 
where e.VAERS_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam_DF table: FK is VAX_ID from Vaccine table

In [70]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam_DF v left join vaers_modeled.Vaccine va on va.VAX_ID = v.VAX_ID 
where va.VAX_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0


#### Vaccination_Beam_DF table: FK is MANU_ID from Manufacturer table

In [71]:
%%bigquery
select count(*) as count_of_invalid_FK 
from vaers_modeled.Vaccination_Beam_DF v left join vaers_modeled.Manufacturer m on m.MANU_ID = v.MANU_ID 
where m.MANU_ID is null 

Unnamed: 0,count_of_invalid_FK
0,0
