<a href="https://colab.research.google.com/github/DEP04929/ESICMDatathon2026/blob/main/HRandVentparam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pre-requisites for Amsterdam UMC DB


In [26]:
# sets *your* project id
PROJECT_ID = "esicmdatathon2026" #@param {type:"string"}


In [27]:
# sets default dataset for AmsterdamUMCdb
DATASET_PROJECT_ID = 'amsterdamumcdb' #@param {type:"string"}
DATASET_ID = 'van_gogh_2026_datathon' #@param {type:"string"}
LOCATION = 'eu' #@param {type:"string"}

In [28]:
import os
from google.colab import auth

# all libraries check this environment variable, so set it:
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID

auth.authenticate_user()
print('Authenticated')


Authenticated


In [29]:
%load_ext google.colab.data_table
from google.colab.data_table import DataTable

# change default limits:
DataTable.max_columns = 50
DataTable.max_rows = 30000


The google.colab.data_table extension is already loaded. To reload it, use:
  %reload_ext google.colab.data_table


In [30]:
from google.cloud.bigquery import magics
from google.cloud import bigquery

# sets the default query job configuration
def_config = bigquery.job.QueryJobConfig(default_dataset=DATASET_PROJECT_ID + "." + DATASET_ID)
magics.context.default_query_job_config = def_config


In [31]:
import pandas as pd
import numpy as np

import matplotlib as plt
import seaborn as sns
sns.set_style('darkgrid')

# Exploring data
Am trying to follow :
https://github.com/AmsterdamUMC/AmsterdamUMCdb/blob/master/omop/sql/ddl/bigquery.sql

Is it possible to approx SDNN even if the HR is only per hour?

In [32]:
%%bigquery test --project esicmdatathon2026
select person_id, measurement_date, stddev_samp(value_as_number) as sd
from measurement m
inner join concept c
on m.measurement_concept_id = c.concept_id
where measurement_concept_id in ( 21490872 --Heart rate.beat-to-beat by EKG
) and value_as_number > 0 and value_as_number < 300
group by person_id, measurement_date

Query is running:   0%|          |

Downloading:   0%|          |

# Vent data

In [33]:
%%bigquery ventpat --project $PROJECT_ID
with rr as
(
  with br as ( select person_id, measurement_datetime
  , case when measurement_concept_id = 1175625 then 'BRspont'
       when measurement_concept_id = 3007469 then 'BRset'
       when measurement_concept_id = 3012410 then 'TV'
       when measurement_concept_id = 3022875 then 'PEEP'
       when measurement_concept_id in ( 3025408, 2000000204) then 'FiO2'
       end as concept_name
  , value_as_number
  from measurement m
  where measurement_concept_id in (
         1175625 -- Breath rate spontaneous
        , 3007469 -- Breath rate setting Ventilator
        , 3012410 -- Tidal volume setting Ventilator
        , 3022875 -- Positive end expiratory pressure setting Ventilator
        , 3025408 -- Oxygen/Inspired gas Respiratory system by O2 Analyzer --on ventilator
        , 2000000204 -- fio2 setting
  )
  and value_as_number is not null
  and value_as_number > 0 and value_as_number < 200
  )
  select * from br
  PIVOT (max(value_as_number) for concept_name in ('BRspont', 'BRset', 'TV','PEEP', 'FiO2'))
),
hr as (
select person_id, measurement_datetime, value_as_number
from measurement m
where measurement_concept_id in (
 	21490872 --Heart rate.beat-to-beat by EKG
)
and person_id in (select distinct person_id from rr )
and value_as_number > 0 and value_as_number < 300
),
trach as (
  select person_id, measurement_datetime, value_as_number as trachsize
  from measurement m
  where measurement_concept_id in ( 36305611  -- Tracheostomy tube diameter
  )
),
 d as (
  select person_id, death_datetime
  from death
  where person_id in (select person_id from rr)
 )
select rr.*, hr.value_as_number as hr_ekg, trach.trachsize, d.death_datetime
from rr inner join hr
on rr.person_id = hr.person_id
and rr.measurement_datetime = hr.measurement_datetime
left outer join trach
on rr.person_id = trach.person_id
and rr.measurement_datetime = trach.measurement_datetime
left outer join d
on rr.person_id = d.person_id
order by rr.person_id, rr.measurement_datetime


Query is running:   0%|          |

Downloading:   0%|          |

In [34]:
ventpat.describe()

Unnamed: 0,person_id,BRspont,BRset,TV,PEEP,FiO2,hr_ekg,trachsize
count,7492881.0,1258917.0,4561037.0,66658.0,6059615.0,7411687.0,7492881.0,36.0
mean,32902.183049,20.79563,16.96402,97.734218,6.249544,56.63092,79.72638,7.611111
std,18967.448439,9.767422,6.462887,61.83742,2.308582,22.58185,20.8766,0.644882
min,3.0,0.1,1.0,0.15,0.1,0.45,1.0,6.0
25%,16570.0,15.0,12.9,41.0,5.0,40.0,65.0,7.0
50%,32866.0,20.7,15.0,95.0,5.0,50.0,77.0,8.0
75%,49297.0,27.0,19.9,156.0,8.0,70.0,92.0,8.0
max,65396.0,171.0,198.8,199.0,128.0,104.0,293.0,8.0


In [45]:
ventpat[ventpat['trachsize']>1] # random patient check

Unnamed: 0,person_id,measurement_datetime,BRspont,BRset,TV,PEEP,FiO2,hr_ekg,trachsize,death_datetime
197238,1661,2017-01-19 06:26:00+00:00,30.0,30.0,,6.0,22.0,91.0,8.0,NaT
938778,8360,2017-01-22 14:55:00+00:00,27.1,27.1,,8.0,40.0,72.0,8.0,2017-02-28 21:40:00+00:00
938779,8360,2017-01-22 14:55:00+00:00,27.1,27.1,,8.0,40.0,72.0,8.0,2017-02-28 21:40:00+00:00
1003040,8919,2017-01-11 10:33:00+00:00,27.8,,,0.2,25.1,78.0,8.0,NaT
1262960,11277,2017-01-15 10:36:00+00:00,13.0,13.0,,8.0,35.0,53.0,8.0,NaT
1671477,14722,2017-01-07 06:13:00+00:00,,19.0,,5.0,21.0,77.0,8.0,NaT
1724882,15254,2017-01-17 19:41:00+00:00,16.0,16.0,,10.0,40.0,93.0,7.0,2017-02-03 19:41:00+00:00
2205040,19498,2017-01-01 00:53:00+00:00,1.9,20.7,,6.0,60.2,71.0,8.0,NaT
2427471,21390,2017-01-18 01:48:00+00:00,29.7,,,5.0,30.0,99.0,8.0,2017-03-20 02:48:00+00:00
2622552,22954,2017-01-01 03:16:00+00:00,,,,,70.1,75.0,8.0,NaT


In [40]:
ventpat[ventpat['person_id']==16570] # random patient check

Unnamed: 0,person_id,measurement_datetime,BRspont,BRset,TV,PEEP,FiO2,hr_ekg,trachsize,death_datetime
1872963,16570,2017-01-01 00:45:00+00:00,,,,,70.1,78.0,,NaT
1872964,16570,2017-01-01 01:45:00+00:00,,,,,90.1,77.0,,NaT
1872965,16570,2017-01-01 02:45:00+00:00,,,,,70.0,71.0,,NaT
1872966,16570,2017-01-01 03:45:00+00:00,,,,,70.0,72.0,,NaT
1872967,16570,2017-01-01 04:45:00+00:00,,,,,75.1,76.0,,NaT
...,...,...,...,...,...,...,...,...,...,...
1873623,16570,2017-02-01 14:45:00+00:00,,,,,40.1,88.0,,NaT
1873624,16570,2017-02-01 15:45:00+00:00,,,,,40.0,91.0,,NaT
1873625,16570,2017-02-01 16:45:00+00:00,,,,,40.0,89.0,,NaT
1873626,16570,2017-02-01 17:45:00+00:00,,,,,40.0,90.0,,NaT


In [46]:
ventpat[ventpat['person_id']==61822] # random patient check

Unnamed: 0,person_id,measurement_datetime,BRspont,BRset,TV,PEEP,FiO2,hr_ekg,trachsize,death_datetime
7052445,61822,2017-01-01 00:55:00+00:00,10.9,15.0,,5.0,40.1,113.0,,NaT
7052446,61822,2017-01-01 02:55:00+00:00,18.3,,,5.0,30.4,132.0,,NaT
7052447,61822,2017-01-01 04:55:00+00:00,17.2,,,5.0,30.1,128.0,,NaT
7052448,61822,2017-01-01 05:55:00+00:00,13.2,,,5.1,30.0,127.0,,NaT
7052449,61822,2017-01-01 09:55:00+00:00,5.3,15.0,,5.0,30.0,132.0,,NaT
...,...,...,...,...,...,...,...,...,...,...
7053149,61822,2017-02-05 13:55:00+00:00,,,,6.0,25.0,112.0,,NaT
7053150,61822,2017-02-05 14:55:00+00:00,,,,6.0,25.0,115.0,,NaT
7053151,61822,2017-02-05 15:55:00+00:00,,,,6.0,25.0,98.0,,NaT
7053152,61822,2017-02-06 04:55:00+00:00,,,,6.0,25.0,117.0,,NaT


In [35]:
ventpat[ventpat['person_id']==64906] # random patient check

Unnamed: 0,person_id,measurement_datetime,BRspont,BRset,TV,PEEP,FiO2,hr_ekg,trachsize,death_datetime
7419205,64906,2017-01-01 00:48:00+00:00,,20.0,,8.0,100.0,59.0,,2018-03-04 21:04:00+00:00
7419206,64906,2017-01-01 00:48:00+00:00,,20.0,,8.0,100.0,59.0,,2018-03-04 21:04:00+00:00
7419207,64906,2017-01-01 04:48:00+00:00,,15.0,,8.0,80.0,81.0,,2018-03-04 21:04:00+00:00
7419208,64906,2017-01-01 04:48:00+00:00,,15.0,,8.0,80.0,81.0,,2018-03-04 21:04:00+00:00
7419209,64906,2017-01-01 05:48:00+00:00,,18.0,,8.0,30.0,81.0,,2018-03-04 21:04:00+00:00
...,...,...,...,...,...,...,...,...,...,...
7420992,64906,2018-03-04 18:48:00+00:00,46.0,46.0,,5.0,30.0,114.0,,2018-03-04 21:04:00+00:00
7420993,64906,2018-03-04 19:48:00+00:00,35.0,35.0,,5.0,30.0,97.0,,2018-03-04 21:04:00+00:00
7420994,64906,2018-03-04 19:48:00+00:00,35.0,35.0,,5.0,30.0,97.0,,2018-03-04 21:04:00+00:00
7420995,64906,2018-03-04 20:48:00+00:00,26.0,26.0,,5.0,30.0,88.0,,2018-03-04 21:04:00+00:00


In [47]:
ventpat[ventpat['person_id']==8919] # random patient check

Unnamed: 0,person_id,measurement_datetime,BRspont,BRset,TV,PEEP,FiO2,hr_ekg,trachsize,death_datetime
1002747,8919,2017-01-01 18:30:00+00:00,,,,5.0,74.0,80.0,,NaT
1002748,8919,2017-01-01 18:31:00+00:00,,,,5.0,74.0,82.0,,NaT
1002749,8919,2017-01-01 18:32:00+00:00,,,,5.0,74.0,80.0,,NaT
1002750,8919,2017-01-01 18:33:00+00:00,,,,5.0,74.0,82.0,,NaT
1002751,8919,2017-01-01 18:34:00+00:00,,,,5.0,74.0,86.0,,NaT
...,...,...,...,...,...,...,...,...,...,...
1003104,8919,2017-01-11 23:10:00+00:00,,,,5.0,45.0,103.0,,NaT
1003105,8919,2017-01-11 23:11:00+00:00,,,,5.0,42.0,99.0,,NaT
1003106,8919,2017-01-11 23:12:00+00:00,,,,5.0,40.0,97.0,,NaT
1003107,8919,2017-01-12 01:33:00+00:00,24.3,,,0.1,35.2,73.0,,NaT


# BGA

In [36]:
%%bigquery bga --project $PROJECT_ID
with br as ( select person_id, measurement_datetime
  , case when measurement_concept_id = 3010421 then 'pH'
     when measurement_concept_id = 3027315 then 'PaO2'
		 when measurement_concept_id = 3013290 then 'PaCO2'
		 when measurement_concept_id = 3006576 then 'HCO3'
		 when measurement_concept_id = 3012501 then 'BE'
		 when measurement_concept_id = 3047181 then 'Lactate'
       end as concept_name
  , value_as_number
  from measurement m
  where measurement_concept_id in (
         3010421 -- pH of Blood
											,3027315 -- Oxygen [Partial pressure] in Blood
											,3013290 -- Carbon dioxide [Partial pressure] in Blood
											,3006576 -- Bicarbonate [Moles/volume] in Blood
											,3012501 -- Base excess in Blood by calculation
											,3047181 -- Lactate [Moles/volume] in Blood
  )
  and value_as_number is not null
  )
  select * from br
  PIVOT (max(value_as_number) for concept_name in ('Lactate', 'pH', 'PaO2', 'PaCO2', 'HCO3','BE'))




Query is running:   0%|          |

Downloading:   0%|          |

In [37]:
bga.describe()

Unnamed: 0,person_id,Lactate,pH,PaO2,PaCO2,HCO3,BE
count,1334565.0,184323.0,637678.0,652377.0,348036.0,638037.0,648109.0
mean,32528.830641,2.095386,7.997381,45.639425,5.882058,28.51162,2.895174
std,18811.125844,5.991713,57.384907,42.98131,1.715303,2503.819,4.66604
min,1.0,-1.3,0.0,0.1,0.9,-24.1,-165.999986
25%,16307.0,1.0,7.35,10.4,4.8,22.1,0.7
50%,32748.0,1.4,7.41,19.9,5.5,25.0,2.7
75%,48428.0,2.2,7.45,75.8,6.5,28.5,5.3
max,65396.0,2357.000001,7522.0,741.0,32.9,1999999.0,283.299999


# Here you can add your code to explore the data

The BGA times are different from the ventilator times. So a join is not possible. To be considered are 1) summarising by day to determine patterns? 2) standard deviations by day? by shift morning/day/night? Missing are demographics, reason for admission.   