In [None]:
import pandas as pd
from google.cloud import bigquery
client = bigquery.Client()
DATASET = ''
FILE_ID = ''

# Configure the external data source and query job
external_config = bigquery.ExternalConfig('CSV')
external_config.source_uris = [
    '{file_name}'.format(file_name=FILE_ID),
]
external_config.schema = [
    bigquery.SchemaField('measurement_concept_id', 'INTEGER'),
    bigquery.SchemaField('unit_concept_id', 'INTEGER'),
    bigquery.SchemaField('set_unit_concept_id', 'INTEGER'),
    bigquery.SchemaField('transform_value_as_number', 'STRING')
]
external_config.options.skip_leading_rows = 1  # optionally skip header row
table_id = 'unit_mapping'

In [None]:
# query to join measurement data to the unit_mapping
sql = '''SELECT count(*) FROM `{}` 
join `aou-res-curation-output-prod.R2019Q2R1.measurement` using (measurement_concept_id, unit_concept_id)
where unit_concept_id!=set_unit_concept_id
'''.format(table_id)


In [None]:
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}
query_job = client.query(sql, job_config=job_config)  # API request

In [None]:
results = query_job.to_dataframe()  # Waits for query to finish
results


In [None]:
# query to join measurement data to the unit_mapping and do the work
sql = '''SELECT
  measurement_id,
  person_id,
  measurement_concept_id,
  measurement_date,
  measurement_datetime,
  measurement_type_concept_id,
  operator_concept_id,
  CASE transform_value_as_number
    WHEN "(1/x)" THEN IF (value_as_number = 0, 0, 1/value_as_number)
    WHEN "(x-32)*(5/9)" THEN (value_as_number-32)*(5/9)
    WHEN "*0.02835" THEN value_as_number * 0.02835
    WHEN "*0.394" THEN value_as_number * 0.394
    WHEN "*0.4536" THEN value_as_number * 0.4536
    WHEN "*1" THEN value_as_number * 1
    WHEN "*10" THEN value_as_number * 10
    WHEN "*10^(-1)" THEN value_as_number * 0.1
    WHEN "*10^(-2)" THEN value_as_number * 0.01
    WHEN "*10^(3)" THEN value_as_number * 1000
    WHEN "*10^(-3)" THEN value_as_number * 0.001
    WHEN "*10^(6)" THEN value_as_number * 1000000
    WHEN "*10^(-6)" THEN value_as_number * 0.000001
    -- when transform_value_as_number is null due to left join
    ELSE value_as_number
END
  AS value_as_number,
  value_as_concept_id,
  COALESCE(set_unit_concept_id, unit_concept_id) AS unit_concept_id,
  CASE transform_value_as_number
    WHEN "(1/x)" THEN 1/range_low
    WHEN "(x-32)*(5/9)" THEN (range_low-32)*(5/9)
    WHEN "*0.02835" THEN range_low * 0.02835
    WHEN "*0.394" THEN range_low * 0.394
    WHEN "*0.4536" THEN range_low * 0.4536
    WHEN "*1" THEN range_low * 1
    WHEN "*10" THEN range_low * 10
    WHEN "*10^(-1)" THEN range_low * 0.1
    WHEN "*10^(-2)" THEN range_low * 0.01
    WHEN "*10^(3)" THEN range_low * 1000
    WHEN "*10^(-3)" THEN range_low * 0.001
    WHEN "*10^(6)" THEN range_low * 1000000
    WHEN "*10^(-6)" THEN range_low * 0.000001
    -- when transform_value_as_number is null due to left join
    ELSE range_low
END
  AS range_low,
  CASE transform_value_as_number
    WHEN "(1/x)" THEN 1/range_high
    WHEN "(x-32)*(5/9)" THEN (range_high-32)*(5/9)
    WHEN "*0.02835" THEN range_high * 0.02835
    WHEN "*0.394" THEN range_high * 0.394
    WHEN "*0.4536" THEN range_high * 0.4536
    WHEN "*1" THEN range_high * 1
    WHEN "*10" THEN range_high * 10
    WHEN "*10^(-1)" THEN range_high * 0.1
    WHEN "*10^(-2)" THEN range_high * 0.01
    WHEN "*10^(3)" THEN range_high * 1000
    WHEN "*10^(-3)" THEN range_high * 0.001
    WHEN "*10^(6)" THEN range_high * 1000000
    WHEN "*10^(-6)" THEN range_high * 0.000001
    -- when transform_value_as_number is null due to left join
    ELSE range_high
END
  AS range_high,
  provider_id,
  visit_occurrence_id,
  measurement_source_value,
  measurement_source_concept_id,
  unit_source_value,
  value_source_value
FROM
    `{dataset_id}.measurement`
LEFT JOIN
  `{dataset_id}.unit_mapping`
USING
  (measurement_concept_id,
    unit_concept_id)
'''.format(dataset_id=DATASET)
#For validation
#where transform_value_as_number!='*1'
#limit 100

In [None]:
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}
query_job = client.query(sql, job_config=job_config)  # API request


In [None]:
results = query_job.to_dataframe()  # Waits for query to finish
results
