In [None]:
from pathlib import Path
from ebmdatalab import bq

from utils import read_sql_query
from config import DATA_DIR

PROJECT_ID = "ebmdatalab"
CREDENTIALS_PATH = 'notebooks/cred/bq-service-account.json'


In [None]:
query = read_sql_query("andexanet.sql")
df = bq.cached_read(sql=query, csv_path=Path(DATA_DIR, "andexanet.csv"), use_cache=False)
 

In [None]:
df.head()

In [None]:
all_ods_codes_query = """
SELECT DISTINCT 
    CASE
        WHEN ods.successor_ods_code IS NOT NULL AND ods.successor_ods_code != 'None'
        THEN ods.successor_ods_code
        ELSE dose.ods_code
    END AS ods_code,
    CASE
        WHEN ods.successor_ods_code IS NOT NULL AND ods.successor_ods_code != 'None'
        THEN successor_org.ods_name
        ELSE dose.ods_name
    END AS ods_name,
    ods_ae.has_ae
FROM 
    scmd.dose
LEFT JOIN 
    scmd.ods_mapped AS ods
ON 
    dose.ods_code = ods.ods_code
LEFT JOIN
    scmd.ods_mapped AS successor_org
ON
    ods.successor_ods_code = successor_org.ods_code
LEFT JOIN (
    SELECT ods_code, has_ae
    FROM (
        SELECT 
            ods_code, 
            has_ae,
            ROW_NUMBER() OVER (PARTITION BY ods_code ORDER BY MAX(period) DESC) AS rn
        FROM scmd.org_ae_status
        GROUP BY ods_code, has_ae
    ) latest_status
    WHERE rn = 1
) AS ods_ae
ON ods.ods_code = ods_ae.ods_code
"""

all_ods_codes = bq.cached_read(sql=all_ods_codes_query, csv_path=Path(DATA_DIR, "all_ods_codes.csv"), use_cache=False)
all_ods_codes.to_csv(Path(DATA_DIR, "all_ods_codes.csv"), index=False)
all_ods_codes.head()