# Imports

In [None]:
# Install additional modules (one time effort per cloud environment)
!pip install --upgrade import_ipynb data_repo_client urllib3 xmltodict

In [1]:
## imports and environment variables

# Imports
import import_ipynb
import pandas as pd
import os
import re
import json
import data_repo_client
from google.cloud import bigquery
import ingest_pipeline_utilities as utils
import build_file_inventory as bfi
import identify_supplementary_files as isf
import logging
from time import sleep
import datetime
from google.cloud import storage
import math
import csv
import numpy as np

# Configure logging format
logging.basicConfig(format="%(asctime)s - %(levelname)s: %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p", level=logging.INFO)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option("display.max_colwidth", None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)

# Environment variables
ws_name = os.environ["WORKSPACE_NAME"]
ws_project = os.environ["WORKSPACE_NAMESPACE"]
ws_bucket = os.environ["WORKSPACE_BUCKET"]
ws_bucket_name = re.sub('^gs://', '', ws_bucket)


Version 1.0.44: 10/8/2024 12:19pm - Nate Calvanese - Fixed bug where workspaces without tags would not run.
Version 1.0.9: 2/25/2023 3:15pm - Nate Calvanese - Replaced FAPI with utils functions
Version 2.0.4: 4/12/2024 2:30pm - Nate Calvanese - Fixed target path logic to remove unsupported characters
Version: 1.0.10: 1/12/2024 11:25am - Nate Calvanese - Made max_combined_rec_ref_size configurable
Version 1.0.16: 8/13/2024 4:37pm - Nate Calvanese - Updated bug in COALESCE logic and added SPLIT transform function
Version 2.0.7: 12/13/2023 1:13pm -- Replaced deprecated df append with pd.concat
Version 1.0.3: 03/12/2024 12:12pm - Nate Calvanese - Fixed a bug introduced in V1.0.2 update
Version 1.0.3: 12/11/2023 1:25pm - Nate Calvanese - Fixed bug in query logic to correct source_datarepo_row_ids
Version 1.0.2: 10/4/2023 10:40am - Nate Calvanese - Updated query logic and added validation


# Create new snapshot

## Script to create new full view snapshot

In [2]:
# Parameters
params = {}
params["profile_id"] = "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61"
params["snapshot_readers_list"] = ["azul-anvil-prod@firecloud.org", "auth-domain"]
params["anvil_schema_version"] = "SRC"

# Loop through datasets and create new snapshot
dataset_id_run_list = [
    '64594d5d-0429-4d89-bdf6-2f92dcd19d80',
]
results = []
for dataset in dataset_id_run_list:
    dataset_id = dataset
    try:
        api_client = utils.refresh_tdr_api_client()
        datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
        dataset_info = datasets_api.retrieve_dataset(id=dataset_id, include=["SCHEMA", "ACCESS_INFORMATION", "PROPERTIES"]).to_dict()
        dataset_name = dataset_info["name"]
        phs_id = dataset_info["phs_id"]
        consent_name = dataset_info["properties"]["consent_name"]
        auth_domains = dataset_info["properties"]["auth_domains"]
        src_workspaces = dataset_info["properties"]["source_workspaces"]
    except:
        dataset_name = ""
    if dataset_name:
        params["ws_bucket"] = ws_bucket
        params["dataset_id"] = dataset_id
        params["dataset_name"] = dataset_name
        params["phs_id"] = phs_id
        params["consent_name"] = consent_name
        params["auth_domains"] = auth_domains
        params["pipeline_results"] = []
        current_datetime = datetime.datetime.now()
        current_datetime_string = current_datetime.strftime("%Y%m%d%H%M")
        params["snapshot_name"] = params["dataset_name"] + "_" + params["anvil_schema_version"] + "_" + current_datetime_string 
        utils.create_and_share_snapshot(params)
        int_df_results = pd.DataFrame(params["pipeline_results"], columns = ["Dataset", "Time", "Step", "Task", "Status", "Message"])
        errors = int_df_results[int_df_results["Status"].str.contains("Error")]
        if len(errors) > 0:
            results.append([dataset_id, "Error", ""])
        else:
            snapshot_id = re.search("{'id': '([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'", str(int_df_results[int_df_results["Task"]=="Create and Share Snapshot"]["Message"]))[1]
            results.append([dataset_id, "Success", snapshot_id])
results_df = pd.DataFrame(results, columns = ["dataset_id", "run_status", "snapshot_id"])
display(results_df)


12/10/2024 02:44:12 PM - INFO: Creating full-view snapshot.
12/10/2024 02:44:12 PM - INFO: Attempting to lookup consent code using PHS: 3018 and Consent Name: NRES.
12/10/2024 02:44:13 PM - INFO: Submitting snapshot request.
TDR Job ID: UXCCd1_LQpaZU2Fu5yQpiA
12/10/2024 02:45:44 PM - INFO: Snapshot Creation succeeded: {'id': 'f7dcb9f3-82d4-4fbe-b02b-5d2354e1eeb9', 'name': 'AnVIL_ENCORE_RS293_SRC_202412101444', 'description': 'Full view snapshot of AnVIL_ENCORE_RS293', 'createdDate': '2024-12-10T14:44:43.104277Z', 'profileId': 'e0e03e48-5b96-45ec-baa4-8cc1ebf74c61', 'storage': [{'region': 'us-central1', 'cloudResource': 'bigquery', 'cloudPlatform': 'gcp'}, {'region': 'us-east4', 'cloudResource': 'firestore', 'cloudPlatform': 'gcp'}, {'region': 'us-central1', 'cloudResource': 'bucket', 'cloudPlatform': 'gcp'}], 'secureMonitoringEnabled': False, 'consentCode': 'NRES', 'phsId': 'phs003018', 'cloudPlatform': 'gcp', 'dataProject': 'datarepo-6b8b6d50', 'storageAccount': None, 'selfHosted': Tr

Unnamed: 0,dataset_id,run_status,snapshot_id
0,64594d5d-0429-4d89-bdf6-2f92dcd19d80,Success,f7dcb9f3-82d4-4fbe-b02b-5d2354e1eeb9


## Verify Snapshots Have Properly Formatted DRS URI

In [None]:
def validate_snapshot_drs_format(snapshot_id):
    
    # Retrieve snapshot information
    api_client = utils.refresh_tdr_api_client()
    snapshots_api = data_repo_client.SnapshotsApi(api_client=api_client)
    try:
        response = snapshots_api.retrieve_snapshot(id=snapshot_id, include=["ACCESS_INFORMATION"]).to_dict()
        bq_project = response["access_information"]["big_query"]["project_id"]
        bq_dataset = response["access_information"]["big_query"]["dataset_name"]
    except Exception as e:
        return "Failure - Issue Retrieving Snapshot Info"
    
    # Determine if field exists for dataset, continue if so, fail otherwise
    client = bigquery.Client()
    query = """SELECT COUNT(file_ref) AS rec_cnt, COUNT(CASE WHEN file_ref LIKE '%drs://drs.anv0:v2_%' THEN file_ref END) AS valid_cnt
                FROM `{project}.{dataset}.anvil_file`""".format(project=bq_project, dataset=bq_dataset)
    try:
        df = client.query(query).result().to_dataframe()
        if df["rec_cnt"].values[0] == df["valid_cnt"].values[0]:
            return "Success"
        else:
            rec_cnt = df["rec_cnt"].values[0]
            valid_cnt = df["valid_cnt"].values[0]
            return f"Failure: Only {valid_cnt} of {rec_cnt} records properly formatted"
    except Exception as e:
        return "Failure - BigQuery Error"

# Loop through datasets and validate is_supplementary field
snapshot_id_list = [
'c3d22305-b3f2-4561-a5b9-bed82ee742f4',
'9fe2abd4-70b4-4eee-b00d-38726ced8620',
'5329c25e-ccad-435d-9250-6fcc3ff88472',
'ced601b2-9a11-40e9-8067-241e5a5996ed',
'8165245c-2003-4ec7-bf57-731959022d47',
'737d454c-88be-477f-ae2c-ef473e2106ce',
'3bdbad9e-f9d4-4442-8606-791d490bf0af',
'cd19195f-25a0-44b1-b47d-ec99141833fc',
'b897e519-ba8b-4758-a263-6d57bd3b8e2b',
'1d385cfc-4bed-4f52-8f7b-ea54fc44b4f7',
'02d25240-823f-4b1d-8562-95385716a453',
'1974a21b-c409-4736-a3d7-e195fa96c4eb',
'99b46287-4790-492c-8a12-bea33f0f927c',
'c6ef5822-3929-4ae7-b5bc-dc27528bf226',
'08d19a7e-b868-4766-9f7e-d879d972cbd7',
'35186e6d-2728-4a8e-b0ad-6b34d0fe480c',
'b0d176bf-d094-4e33-a34b-b83a94de86ea',
'cc6bacc8-29fa-4d97-8856-79f52ea50c6f',
'85b721da-ad8e-4d82-93f0-0988f94af22e',
'407c7800-3ab4-4b13-ba45-c6c13c1c2278',
'2529f127-cff5-43ff-b879-06bc0e3468ff',
'b511be0b-7dc5-4767-a891-37f43d04a5a5',
'a7e031c3-62d4-46db-b2e2-0bdca6bbad65',
'5bba97dc-d6ab-4329-912f-148c8b807056',
'9cf61d88-d096-4981-b0c6-99db77554c01',
'4c722626-c559-4f5a-84bd-8d7d46983e1e',
'7c237e08-3329-4e64-bd2a-063be290e78b',
'4117144f-92e7-454f-9263-dad5e128cadb',
'ce2e7235-26e6-470f-8e05-298193b7f53d',
'6df525e1-b143-4e6f-b667-80c783ae1b66',
'92666b7c-4d50-4530-88e9-ea2d3da9d07a',
'42644c25-fa23-4b4e-8fcc-907cd8dcef60',
'155c11a9-638a-45c8-b172-7cf2e3e16fe6',
'b3da9fec-08ad-4496-a9ac-1411388fb5cc',
'0de07296-e3ff-4fe6-9183-9f421484197c',
'1b6273c6-7769-4daf-abee-93b11b322c73',
'ea50255a-45a4-4846-82e3-02b4f46f5b17',
'eb7045e1-2286-49f1-bce6-21b5d7fa5c32',
'b763c288-4132-434a-a6c9-25ad51b9d961',
'b67702a8-307d-4b20-835e-c0245d0761e5',
'88548251-e59e-4bc3-b71a-f1e9e2369919',
'd3dc5627-503b-48a5-ad79-31ab6c2fd417',
'ec14f8cd-5b1b-4124-a235-f11159984c7c',
'6d9e1212-4fa6-4632-be8a-75c45a474dd3',
'667eac9b-4e90-413d-80f3-d857b9829ab7',
'c091ea30-1862-4b1f-8e92-087b441472c3',
'43c86818-9bfe-46f2-9ae4-4a55a7baef1f',
'ebdaca04-ef29-42f3-8486-a94dade81bf8',
'f8781fbf-5fef-4481-8819-3df1bc724b7f',
'830df9ed-e4a6-4c9a-a97a-aa080fb030e4',
'84703c54-a9dd-400c-9701-2fc40922e3e3',
'c1c674dd-056a-470c-8874-bf70d8fae3a8',
'6a5b3be6-d1de-4f23-a431-b08e7ab231b8',
'ffe34538-3ddd-48de-b4a2-94f9b2dad086',
'2c6de04e-104d-42c8-8448-97d74985dacb',
'2a1882d9-88ca-4849-bcc1-f6914f593407',
'bf2f4106-cee9-419c-b4d1-d7b03a6293d5',
'a6c36f5e-b86c-4164-85ae-8bf0df2e4a90',
'7c19d852-e36a-4353-afea-10e501601d9a',
'00297802-e20a-413f-b389-a6f764b6600e',
'b8a455eb-827d-43a0-a89b-5d017747140f',
'3e85b06a-a6ea-4ce8-a655-44b1fce12138',
'9321b908-f2e4-437b-b53e-ed81754dcace',
'172bada7-f1c5-41c4-836d-05381beaed9a',
'133e902c-5ff0-4119-8078-db3e15006844',
'452bcafd-ab45-4e24-b5e0-13fcf22b0755',
'5e547934-c339-410e-a013-dfefed50f4b8',
'ffa84feb-ca0e-43d3-a04d-a402a8e24a3b',
'ff27037e-cb52-44ef-8979-f6e7ac3ed6f6',
'c853d4c0-d4be-433d-964e-e30bdc35480e',
'8fbe2def-b8ad-4b2d-90c9-0dd4517c67e1',
'03e54581-8fd3-47c3-9143-55368d2e4e86',
'9efae3c7-904c-48a8-939a-e82b46005ae1',
'5955a235-5be6-47bc-8303-ed0c4e68f501',
'e04edfef-69f8-47ff-8df9-dfff0e9218d2',
'f2a7be5a-4f7a-4a96-935e-ca7592855b45',
'7c90289b-be3e-4c9b-917a-d5e27d95dc15',
'0f46a588-b4ff-4a69-99e9-0a0bcf052522',
'cdd689fd-10f3-4cfa-b738-46549e689cac',
'eb7948be-1007-4b0e-b9b6-a5c40bbb9596',
'f20753f0-d09a-4b47-bffe-8f24ec354761',
'4cff04f4-eff9-4a62-bc6e-691accfbd328',
'9a61b980-4a33-465a-bc50-1aba00bc2cf6',
'90fe2016-e79c-456c-a5f9-3a31149fcd65',
'a4c62d7f-34f0-4e2e-9e46-c762d3ab0ff2',
'28dc8121-5e55-46c2-8313-681de2298986',
'dcc578ed-44bb-458f-8ff5-a78ca83f4616',
'aa42debe-3747-4dcd-8bc9-24eb90673fa5',
'5208772d-21f9-46b0-8167-0b05b57296b8',
'a2da748b-fec8-4e10-88ee-de32cbe8dee1',
'26df2a34-b10d-4361-ba2b-d9f966d09f61',
'dd00a8ba-ac49-481b-8d79-0e440adafd77',
'0df983d7-ed5e-44d2-acf1-686822b0cc7e',
'28559e94-ed57-48c8-bc8b-6cc4ad659a61',
'8b385bd3-52aa-48b9-be33-41f4d3fd4531',
'ce1bf5c3-525e-455d-a1e9-dd5f3d68c9d3',
'd0a6aa4c-821c-4bba-b53b-4f230ca3cda4',
'd9e817a2-6657-433b-8b2f-73790561725c',
'33c854eb-d228-4a82-8324-5e455ed1e447',
]
results = []
for snapshot_id in snapshot_id_list:
    status = validate_snapshot_drs_format(snapshot_id) 
    results.append([snapshot_id, status])
    results_df = pd.DataFrame(results, columns = ["snapshot_id", "validation_status"])
display(results_df)

# Add and populate anvil_file.is_supplementary

## Script to patch dataset

In [None]:
# Set base parameters
params = {}
params["profile_id"] = "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61"

# Loop through datasets and process is_supplementary field
dataset_id_list = [
'8b2b1c92-66cf-403c-8eb0-03b523d1550e',
'595b6755-e7ae-4e83-af2e-693c089aeec3',
'84ac0d05-4be5-43e9-973e-ef999144d802',
'732eaae3-b509-4a7a-8961-09d861e55253',
'544f643d-b19f-4aa0-a6ec-a90e1a8681d6',
'f85ea65e-1943-4bd6-a541-71c5d8465ca9',
'280c5d6f-39a3-4d1d-aad2-a174451cd9b2',
]
results = []
for dataset_id in dataset_id_list:
    logging.info(f"Patching dataset_id: {dataset_id}")
    params["t_output_dir"] = "ingest_pipeline/output/transformed/anvil/{}/table_data".format(dataset_id)
    output, status = isf.identify_supplementary_files(params, dataset_id)
    results.append([dataset_id, status, output])
    results_df = pd.DataFrame(results, columns = ["dataset_id", "run_status", "output"])
display(results_df)


## Script to validate patch worked properly

In [None]:
def validate_supp_file_flg(dataset_id):
    
    # Retrieve dataset information
    src_schema_dict = {}
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        response = datasets_api.retrieve_dataset(id=dataset_id, include=["SCHEMA", "ACCESS_INFORMATION"]).to_dict()
        src_schema_dict["tables"] = response["schema"]["tables"]
        bq_project = response["access_information"]["big_query"]["project_id"]
        bq_dataset = response["access_information"]["big_query"]["dataset_name"]
    except Exception as e:
        return "Failure - Issue Retrieving Dataset Info"
    
    # Determine if field exists for dataset, continue if so, fail otherwise
    field_found = False
    for table in src_schema_dict["tables"]:
        if table["name"] == "anvil_file":
            for col in table["columns"]:
                if col["name"] == "is_supplementary":
                    field_found = True
                    break
            break
    if field_found == False:
        return "Failure - is_supplementary field not found"
    else:
        client = bigquery.Client()
        # Check field population
        query = """SELECT COUNT(*) AS rec_cnt, COUNT(is_supplementary) AS populated_cnt
                    FROM `{project}.{dataset}.anvil_file`""".format(project=bq_project, dataset=bq_dataset)
        try:
            df = client.query(query).result().to_dataframe()
            if df["rec_cnt"].values[0] == df["populated_cnt"].values[0]:
                pass
            else:
                return "Failure - is_supplementary field not populated"
        except Exception as e:
            return "Failure - BigQuery Error"
        # Check field logic
        validation_query = """
            WITH activity_agg
            AS
            (
              SELECT used_biosample_id, generated_file_id, used_file_id FROM `{project}.{dataset}.anvil_activity`
              UNION ALL 
              SELECT [] AS used_biosample_id, generated_file_id, used_file_id FROM `{project}.{dataset}.anvil_alignmentactivity`
              UNION ALL 
              SELECT used_biosample_id, generated_file_id, [] AS used_file_id FROM `{project}.{dataset}.anvil_assayactivity`
              UNION ALL 
              SELECT used_biosample_id, generated_file_id, [] AS used_file_id FROM `{project}.{dataset}.anvil_sequencingactivity`
              UNION ALL 
              SELECT [] AS used_biosample_id, generated_file_id, used_file_id FROM `{project}.{dataset}.anvil_variantcallingactivity`
            ),
            activity_exp 
            AS
            (
              SELECT file_id, int_file_id, biosample_id
              FROM activity_agg
                  LEFT JOIN UNNEST(used_biosample_id) AS biosample_id
                  LEFT JOIN UNNEST(generated_file_id) as file_id
                  LEFT JOIN UNNEST(used_file_id) as int_file_id
            ),
            activity_exp_tagged
            AS
            (
              SELECT a.file_id, b.is_supplementary AS file_id_supp, int_file_id, c.is_supplementary AS int_file_id_supp, biosample_id
              FROM activity_exp a
                  LEFT JOIN  `{project}.{dataset}.anvil_file` b
                  ON a.file_id = b.file_id
                  LEFT JOIN  `{project}.{dataset}.anvil_file` c
                  ON a.int_file_id = c.file_id 
            )
            SELECT CASE WHEN file_id_supp = TRUE AND biosample_id IS NOT NULL THEN 'Supplemental File Linked to BioSample' WHEN (file_id_supp = TRUE AND int_file_id_supp = FALSE) OR (file_id_supp = FALSE AND int_file_id_supp = TRUE) THEN 'Supplemental File Linked to Non-Supplemental File' ELSE 'No Issue Found' END AS finding, COUNT(*) AS occurrences
            FROM activity_exp_tagged
            GROUP by finding
            """.format(project=bq_project, dataset=bq_dataset)
        try:
            df = client.query(validation_query).result().to_dataframe()
            records_json = json.loads(df.to_json(orient='records'))
            supp_linked_to_biosample = 0
            supp_linked_to_nonsupp = 0
            non_issue_links = 0
            for record in records_json:
                if record["finding"] == "Supplemental File Linked to BioSample":
                    supp_linked_to_biosample = record["occurrences"]
                elif record["finding"] == "Supplemental File Linked to Non-Supplemental File":
                    supp_linked_to_nonsupp = record["occurrences"]
                else:
                    non_issue_links = record["occurrences"]
            if supp_linked_to_biosample > 0 or supp_linked_to_nonsupp > 0:
                err_msg = f"Failure - Errors found when validating supplementary files flagged in the TDR dataset: Supplemental Files Linked to a Biosample: {str(supp_linked_to_biosample)} Supplemental Files Linked to a Non-Supplemental File: {str(supp_linked_to_nonsupp)} Links with No Issues: {str(non_issue_links)}"
                return err_msg
        except Exception as e:
            return "Failure - BigQuery Error"
        return "Success"  

# Loop through datasets and validate is_supplementary field
dataset_id_list = [
'8b2b1c92-66cf-403c-8eb0-03b523d1550e',
'595b6755-e7ae-4e83-af2e-693c089aeec3',
'84ac0d05-4be5-43e9-973e-ef999144d802',
'732eaae3-b509-4a7a-8961-09d861e55253',
'544f643d-b19f-4aa0-a6ec-a90e1a8681d6',
'f85ea65e-1943-4bd6-a541-71c5d8465ca9',
'280c5d6f-39a3-4d1d-aad2-a174451cd9b2',
]
results = []
for dataset_id in dataset_id_list:
    logging.info(f"Validating dataset_id: {dataset_id}")
    status = validate_supp_file_flg(dataset_id) 
    results.append([dataset_id, status])
    results_df = pd.DataFrame(results, columns = ["dataset_id", "validation_status"])
display(results_df)

# Attempt to populate anvil_donor.organism_type

## Script to patch dataset

In [None]:
def populate_organism_type(dataset_id):
    logging.info(f"Processing anvil_donor.organism_type for Dataset ID = {dataset_id}")
    
    # Retrieve dataset information
    logging.info("Retrieving necessary information from TDR.")
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        response = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION"]).to_dict()
        bq_project = response["access_information"]["big_query"]["project_id"]
        bq_dataset = response["access_information"]["big_query"]["dataset_name"]
    except Exception as e:
        logging.error("Error retrieving information from TDR. Exiting function. Error: {}".format(e))
        return "Failure"

    # Re-process anvil_donor data to include organism_type (where available)
    logging.info("Re-processing existing anvil_donor data to include organism_type value.")
    client = bigquery.Client()
    target_file = "anvil_donor.json"
    destination_dir = "ingest_pipeline/output/transformed/anvil/{}/table_data".format(dataset_id)
    query = """SELECT donor_id, 
    (SELECT MAX(CASE WHEN REGEXP_CONTAINS(value, '(h37|h38|h39|hg16|hg17|hg18|hg19|hs37|hs38|b37)') THEN 'Homo sapiens' END) AS organism_type FROM `{project}.{dataset}.workspace_attributes` WHERE attribute = 'library:reference') AS organism_type,
    part_of_dataset_id, phenotypic_sex, reported_ethnicity, genetic_ancestry, source_datarepo_row_ids
    FROM `{project}.{dataset}.anvil_donor`""".format(project=bq_project, dataset=bq_dataset)
    try:
        df = client.query(query).result().to_dataframe()
        records_json = df.to_json(orient='records') 
        records_list = json.loads(records_json)
        records_cnt = len(records_list)
        with open(target_file, 'w') as outfile:
            for idx, val in enumerate(records_list):
                json.dump(val, outfile)
                if idx < (records_cnt - 1):
                    outfile.write('\n')
        !gsutil cp $target_file $ws_bucket/$destination_dir/ 2> stdout
        !rm $target_file
        logging.info("Successfully created new anvil_donor.json file.")
    except Exception as e:
        logging.error("Error creating new anvil_donor.json file. Exiting function. Error: {}".format(str(e)))
        return "Failure"

    # Ingest updated anvil_donor data
    logging.info("Ingesting updated anvil_donor data into TDR dataset.")
    source_full_file_path = "{}/{}/{}".format(ws_bucket, destination_dir, "anvil_donor.json")
    ingest_request = {
        "table": "anvil_donor",
        "profile_id": "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61",
        "ignore_unknown_values": True,
        "resolve_existing_files": True,
        "updateStrategy": "replace",
        "format": "json",
        "load_tag": "Ingest for {}".format(dataset_id),
        "path": source_full_file_path
    }
    attempt_counter = 0
    while True:
        try:
            api_client = utils.refresh_tdr_api_client()
            datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
            ingest_request_result, job_id = utils.wait_for_tdr_job(datasets_api.ingest_dataset(id=dataset_id, ingest=ingest_request))
            logging.info("Ingest from file anvil_donor.json succeeded: {}".format(str(ingest_request_result)[0:1000]))
            break
        except Exception as e:
            logging.error("Error on Dataset Ingest: {}".format(str(e)))
            attempt_counter += 1
            if attempt_counter < 2:
                logging.info("Retrying Dataset Ingest (attempt #{})...".format(str(attempt_counter)))
                sleep(10)
                continue
            else:
                logging.error("Maximum number of retries exceeded. Exiting function.")
                return "Failure"

    # Return success message if no failures recorded
    logging.info("Function completed successfully.")
    return "Success"

# Loop through datasets and process supplementary_file_flag
dataset_id_list = [
'd74b26d5-24bb-4696-84c3-bcd1f5f90b08',
]
results = []
for dataset_id in dataset_id_list:
    status = populate_organism_type(dataset_id) 
    results.append([dataset_id, status])
    results_df = pd.DataFrame(results, columns = ["dataset_id", "run_status"])
display(results_df)


## Script to examine organism_type population

In [None]:
def validate_organism_type(dataset_id):
    
    # Retrieve dataset information
    src_schema_dict = {}
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        response = datasets_api.retrieve_dataset(id=dataset_id, include=["SCHEMA", "ACCESS_INFORMATION"]).to_dict()
        src_schema_dict["tables"] = response["schema"]["tables"]
        bq_project = response["access_information"]["big_query"]["project_id"]
        bq_dataset = response["access_information"]["big_query"]["dataset_name"]
    except Exception as e:
        return "Failure - Issue Retrieving Dataset Info"
    
    # Determine if field exists for dataset, continue if so, fail otherwise
    client = bigquery.Client()
    query = """SELECT COUNT(organism_type) AS populated_cnt
                FROM `{project}.{dataset}.anvil_donor`""".format(project=bq_project, dataset=bq_dataset)
    try:
        df = client.query(query).result().to_dataframe()
        if df["populated_cnt"].values[0] > 0:
            return "Success - Field Populated"
        else:
            return "Success - Field Not Populated"
    except Exception as e:
        return "Failure - BigQuery Error"

# Loop through datasets and validate is_supplementary field
dataset_id_list = [
'd74b26d5-24bb-4696-84c3-bcd1f5f90b08',
]
results = []
for dataset_id in dataset_id_list:
    status = validate_organism_type(dataset_id) 
    results.append([dataset_id, status])
    results_df = pd.DataFrame(results, columns = ["dataset_id", "validation_status"])
display(results_df)

# Update references to md5-added files

In [None]:
# Function to collect all datarepo rows for a particular table within a dataset
def collect_all_datarepo_rows(dataset_id, table_name):
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        response = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION"]).to_dict()
        bq_project = response["access_information"]["big_query"]["project_id"]
        bq_schema = response["access_information"]["big_query"]["dataset_name"]
    except Exception as e:
        logging.error("Error retrieving BQ project and schema: {}".format(str(e)))
    client = bigquery.Client()
    query = "SELECT datarepo_row_id FROM `{project}.{schema}.{table}`".format(project = bq_project, schema = bq_schema, table = table_name)
    try:
        query_job = client.query(query)
        results = [row["datarepo_row_id"] for row in query_job]
        return results
    except Exception as e:
        logging.error("Error retrieving datarepo_row_id list: {}".format(str(e)))
        raise Exception(e)

# Function to delete rows from a dataset
def delete_old_records(dataset_id, table, datarepo_row_ids):
    logging.info(f"Attempting to delete original {table} records.")
    if datarepo_row_ids:
        data_deletion_payload = {
            "deleteType": "soft",
            "specType": "jsonArray",
            "tables": [{
              "tableName": table,
              "jsonArraySpec": {
                "rowIds": datarepo_row_ids
              }
            }]
        }
        api_client = utils.refresh_tdr_api_client()
        datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
        try:
            data_deletion_result, job_id = utils.wait_for_tdr_job(datasets_api.apply_dataset_data_deletion(id=dataset_id, data_deletion_request=data_deletion_payload))
            logging.info("Result: {}".format(data_deletion_result))
        except Exception as e:
            logging.info("Error: {}".format(str(e)))
            raise Exception(e)
    else:
        logging.info("No datarepo_row_ids specified for deletion.")

def ingest_updated_records(profile_id, dataset_id, table, records_dict):
    logging.info(f"Submitting ingest for updated {table} records.")
    
    # Build, submit, and monitor ingest request
    ingest_request = {
        "table": table,
        "profile_id": profile_id,
        "ignore_unknown_values": True,
        "resolve_existing_files": True,
        "updateStrategy": "replace",
        "format": "array",
        "bulkMode": False,
        "load_tag": f"File ref fields patch for {table} in {dataset_id}",
        "records": records_dict
    }
    attempt_counter = 0
    while True:
        try:
            api_client = utils.refresh_tdr_api_client()
            datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
            ingest_request_result, job_id = utils.wait_for_tdr_job(datasets_api.ingest_dataset(id=dataset_id, ingest=ingest_request))
            logging.info("Ingest succeeded: {}".format(str(ingest_request_result)[0:1000]))
            status = "Success"
            return
        except Exception as e:
            logging.error("Error on ingest: {}".format(str(e)))
            attempt_counter += 1
            if attempt_counter < 1:
                logging.info("Retrying ingest (attempt #{})...".format(str(attempt_counter)))
                sleep(10)
                continue
            else:
                logging.error("Maximum number of retries exceeded. Logging error.")
                status = "Error"
                raise Exception(e)
                
def update_recs_w_file_refs(dataset_id):
    logging.info(f"Processing md5-added files for Dataset ID = {dataset_id}")

    ## Retrieve dataset information
    logging.info("Retrieving necessary information from TDR.")
    src_schema_dict = {}
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        response = datasets_api.retrieve_dataset(id=dataset_id, include=["SCHEMA", "ACCESS_INFORMATION"]).to_dict()
        src_schema_dict["tables"] = response["schema"]["tables"]
        bq_project = response["access_information"]["big_query"]["project_id"]
        bq_dataset = response["access_information"]["big_query"]["dataset_name"]
    except Exception as e:
        logging.error("Error retrieving information from TDR. Exiting function. Error: {}".format(e))
        return "Failure - Pre-processing"

    ## Parse TDR schema to identify file reference fields
    table_dict = {}
    for table in src_schema_dict["tables"]:
        if table["name"] in ["file_inventory", "anvil_file"]:
            continue
        else:
            col_list = []
            for column in table["columns"]:
                if column["datatype"] == "fileref":
                    col_list.append([column["name"], column["array_of"]])
            if col_list:
                table_dict[table["name"]] = col_list

    ## Loop through tables and re-process impacted records
    for table in table_dict.keys():
        logging.info(f"Processing updates for {table}.")
        # Retrieve relevant records from BigQuery
        col_list = []
        old_cols = ""
        new_cols = ""
        join_clause = ""
        where_clause = ""
        for idx, col in enumerate(table_dict[table]):
            column_name = col[0]
            col_list.append(column_name)
            if idx == 0: 
                old_cols += column_name
                where_clause += f"t.{column_name} IN (SELECT file_ref FROM file_list)"
            else:
                old_cols += ", " + column_name
                where_clause += f" OR t.{column_name} IN (SELECT file_ref FROM file_list)"
            new_cols += f", CASE WHEN t{idx}.source_name IS NOT NULL THEN TO_JSON(STRUCT(t{idx}.source_name AS sourcePath, t{idx}.target_path AS targetPath)) END AS {column_name}"
            join_clause += f" LEFT JOIN load_hist t{idx} ON t.{column_name} = t{idx}.file_id"

        query = """WITH 
            file_list AS (SELECT * FROM `{project}.{dataset}.file_inventory` WHERE md5_hash IS NULL),
            load_hist AS (SELECT * FROM `{project}.{dataset}.datarepo_load_history` WHERE state = 'succeeded')
            SELECT t.* EXCEPT({old_cols}){new_cols}
            FROM `{project}.{dataset}.{table}` t {joins} WHERE {where}""".format(project=bq_project, dataset=bq_dataset, table=table, old_cols=old_cols, new_cols=new_cols, joins=join_clause, where=where_clause)
        try:
            client = bigquery.Client()
            res = client.query(query).result()
            if res.total_rows > 0:
                logging.info(f"{res.total_rows} records to process.")
                df = res.to_dataframe()
                records_json = df.to_json(orient='records')
                records_list = json.loads(records_json)
            else:
                logging.info("No records to process.")
                records_list = []
        except Exception as e:
            logging.error(f"Error retrieving update records from BigQuery: {str(e)}")
            return "Failure - Table Processing"
        # Ingest updated records back to TDR dataset
        try:
            datarepo_row_ids = []
            for record in records_list:
                datarepo_row_ids.append(record.pop("datarepo_row_id", None))
                for col in col_list:
                    record[col] = json.loads(record[col])
            if records_list:
                ingest_updated_records("e0e03e48-5b96-45ec-baa4-8cc1ebf74c61", dataset_id, table, records_list)
                delete_old_records(dataset_id, table, datarepo_row_ids)
        except Exception as e:
            logging.error(f"Error replacing TDR records: {str(e)}")
            return "Failure - Table Processing"
        
    ## Re-process file_inventory
    logging.info(f"Processing updates for file_inventory.")
    # Retrieve relevant records from BigQuery
    query = """WITH 
        file_list AS (SELECT file_ref FROM `{project}.{dataset}.file_inventory` WHERE md5_hash IS NULL),
        load_hist AS (SELECT * FROM `{project}.{dataset}.datarepo_load_history` WHERE state = 'succeeded')
        SELECT t1.*, CASE WHEN t2.source_name IS NOT NULL THEN TO_JSON(STRUCT(t2.source_name AS sourcePath, t2.target_path AS targetPath)) END AS file_ref
        FROM `{project}.{dataset}.file_inventory` t1
          INNER JOIN load_hist t2 ON t1.file_ref = t2.file_id
        WHERE file_ref IN (SELECT file_ref FROM file_list)""".format(project=bq_project, dataset=bq_dataset)
    try:
        client = bigquery.Client()
        res = client.query(query).result()
        if res.total_rows > 0:
            logging.info(f"{res.total_rows} records to process.")
            df = res.to_dataframe()
            records_json = df.to_json(orient='records')
            records_list = json.loads(records_json)
        else:
            logging.info("No records to process.")
            records_list = []
    except Exception as e:
        logging.error(f"Error retrieving update records from BigQuery: {str(e)}")
        return "Failure - File Inventory Processing"
    # Loop through records and update md5_hash from GCS metadata
    try:
        storage_client = storage.Client()
        datarepo_row_ids = []
        for record in records_list:
            bucket = re.match('gs:\/\/([a-z0-9\-]+)', record["uri"]).group(1)
            obj = re.match('gs:\/\/[a-z0-9\-]+\/([A-Za-z0-9\-_\/\.]+)', record["uri"]).group(1)
            bucket = storage_client.bucket(bucket, user_project="anvil-datastorage")
            blob = bucket.get_blob(obj)
            record["md5_hash"] = blob.md5_hash
            datarepo_row_ids.append(record.pop("datarepo_row_id", None))
    except Exception as e:
        logging.error(f"Error retrieving file metadata from GCS: {str(e)}")
        return "Failure - File Inventory Processing"
    # Ingest updated records back to TDR dataset
    try:
        if records_list:
            ingest_updated_records("e0e03e48-5b96-45ec-baa4-8cc1ebf74c61", dataset_id, "file_inventory", records_list)
            delete_old_records(dataset_id, "file_inventory", datarepo_row_ids)         
    except Exception as e:
        logging.error(f"Error replacing TDR records: {str(e)}")
        return "Failure - File Inventory Processing"

    ## Empty anvil_% tables
    logging.info("Clearing out existing anvil_% tables")
    table_list = ["anvil_activity", "anvil_alignmentactivity", "anvil_antibody", "anvil_assayactivity", "anvil_biosample", "anvil_diagnosis", "anvil_donor", "anvil_file", "anvil_sequencingactivity", "anvil_variantcallingactivity"]
    for table in table_list:
        try:
            datarepo_row_ids = collect_all_datarepo_rows(dataset_id, table)
            if datarepo_row_ids:
                delete_old_records(dataset_id, table, datarepo_row_ids)
        except Exception as e:
            logging.error(f"Error clearing out existing anvil_% records: {str(e)}")
            return "Failure - anvil_% Record Deletion"
    
    ## Re-run T pipeline without validation
    params = {}
    params["ws_name"] = ws_name
    params["ws_project"] = ws_project
    params["ws_bucket"] = ws_bucket
    params["ws_bucket_name"] = ws_bucket_name
    params["profile_id"] = "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61" 
    params["mapping_target"] = "anvil"
    params["skip_transforms"] = False
    params["transform_list_override"] = [] # Leave empty to run transforms for all files, otherwise populate with target table names 
    params["skip_schema_extension"] = False
    params["skip_ingests"] = False
    params["ingest_list_override"] = [] # Leave empty to run ingests for all files, otherwise populate with target table names
    params["skip_file_relation_inference"] = False
    params["skip_dangling_fk_resolution"] = False
    params["skip_supplementary_file_identification"] = False
    params["skip_snapshot_creation"] = False
    params["snapshot_readers_list"] = ["azul-anvil-prod@firecloud.org"] # Include "auth-domain" to add the auth domain(s) as a reader (if one exists)
    params["skip_data_validation"] = True
    try:
        api_client = utils.refresh_tdr_api_client()
        datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
        dataset_info = datasets_api.retrieve_dataset(id=dataset_id, include=["SCHEMA", "ACCESS_INFORMATION", "PROPERTIES"]).to_dict()
        dataset_name = dataset_info["name"]
        phs_id = dataset_info["phs_id"]
        consent_name = dataset_info["properties"]["consent_name"]
        auth_domains = dataset_info["properties"]["auth_domains"]
        src_workspaces = dataset_info["properties"]["source_workspaces"]
    except:
        dataset_name = ""
        return "Failure - Dataset Retrieval for T Pipeline"
    if dataset_name:
        params["dataset_id"] = dataset_id
        params["dataset_name"] = dataset_name
        params["phs_id"] = phs_id
        params["consent_name"] = consent_name
        params["auth_domains"] = auth_domains
        utils.run_t_pipeline(params)
    
    # Return success message if no failures recorded
    logging.info("Function completed successfully.")
    return "Success"

# Loop through datasets and process md5 updates
dataset_id_list = [
'700303c2-fcef-48a5-9900-096bf34e2d83',
'a715c70d-da92-43ee-a851-1a27277909a2',
]
results = []
for dataset_id in dataset_id_list:
    status = update_recs_w_file_refs(dataset_id) 
    results.append([dataset_id, status])
    results_df = pd.DataFrame(results, columns = ["dataset_id", "run_status"])
display(results_df)


In [None]:
# # Testing
# dataset_id = 'bc6075ac-5cfe-4613-8601-36ceb614939e'

# logging.info(f"Processing md5-added files for Dataset ID = {dataset_id}")

# ## Retrieve dataset information
# logging.info("Retrieving necessary information from TDR.")
# src_schema_dict = {}
# api_client = utils.refresh_tdr_api_client()
# datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
# try:
#     response = datasets_api.retrieve_dataset(id=dataset_id, include=["SCHEMA", "ACCESS_INFORMATION"]).to_dict()
#     src_schema_dict["tables"] = response["schema"]["tables"]
#     bq_project = response["access_information"]["big_query"]["project_id"]
#     bq_dataset = response["access_information"]["big_query"]["dataset_name"]
# except Exception as e:
#     logging.error("Error retrieving information from TDR. Exiting function. Error: {}".format(e))
#     #return "Failure - Pre-processing"

# ## Parse TDR schema to identify file reference fields
# table_dict = {}
# for table in src_schema_dict["tables"]:
#     if table["name"] in ["file_inventory", "anvil_file"]:
#         continue
#     else:
#         col_list = []
#         for column in table["columns"]:
#             if column["datatype"] == "fileref":
#                 col_list.append([column["name"], column["array_of"]])
#         if col_list:
#             table_dict[table["name"]] = col_list

# ## Loop through tables and re-process impacted records
# for table in table_dict.keys():
#     logging.info(f"Processing updates for {table}.")
#     # Retrieve relevant records from BigQuery
#     col_list = []
#     old_cols = ""
#     new_cols = ""
#     join_clause = ""
#     where_clause = ""
#     for idx, col in enumerate(table_dict[table]):
#         column_name = col[0]
#         col_list.append(column_name)
#         if idx == 0: 
#             old_cols += column_name
#             where_clause += f"t.{column_name} IN (SELECT file_ref FROM file_list)"
#         else:
#             old_cols += ", " + column_name
#             where_clause += f" OR t.{column_name} IN (SELECT file_ref FROM file_list)"
#         new_cols += f", CASE WHEN t{idx}.source_name IS NOT NULL THEN TO_JSON(STRUCT(t{idx}.source_name AS sourcePath, t{idx}.target_path AS targetPath)) END AS {column_name}"
#         join_clause += f" LEFT JOIN load_hist t{idx} ON t.{column_name} = t{idx}.file_id"

#     query = """WITH 
#         file_list AS (SELECT * FROM `{project}.{dataset}.file_inventory` WHERE md5_hash IS NULL),
#         load_hist AS (SELECT * FROM `{project}.{dataset}.datarepo_load_history` WHERE state = 'succeeded')
#         SELECT t.* EXCEPT({old_cols}){new_cols}
#         FROM `{project}.{dataset}.{table}` t {joins} WHERE {where}""".format(project=bq_project, dataset=bq_dataset, table=table, old_cols=old_cols, new_cols=new_cols, joins=join_clause, where=where_clause)
#     try:
#         client = bigquery.Client()
#         res = client.query(query).result()
#         if res.total_rows > 0:
#             logging.info(f"{res.total_rows} records to process.")
#             df = res.to_dataframe()
#             records_json = df.to_json(orient='records')
#             records_list = json.loads(records_json)
#         else:
#             logging.info("No records to process.")
#             records_list = []
#     except Exception as e:
#         logging.error(f"Error retrieving update records from BigQuery: {str(e)}")
#         break
#         #return "Failure - Table Processing"
#     # Ingest updated records back to TDR dataset
#     try:
#         datarepo_row_ids = []
#         for record in records_list:
#             datarepo_row_ids.append(record.pop("datarepo_row_id", None))
#             for col in col_list:
#                 record[col] = json.loads(record[col])
#         if records_list:
#             ingest_updated_records("e0e03e48-5b96-45ec-baa4-8cc1ebf74c61", dataset_id, table, records_list)
#             delete_old_records(dataset_id, table, datarepo_row_ids)
#     except Exception as e:
#         logging.error(f"Error replacing TDR records: {str(e)}")
#         break
#         #return "Failure - Table Processing"

# # ## Re-process file_inventory
# # logging.info(f"Processing updates for file_inventory.")
# # # Retrieve relevant records from BigQuery
# # query = """WITH 
# #     file_list AS (SELECT file_ref FROM `{project}.{dataset}.file_inventory` WHERE md5_hash IS NULL),
# #     load_hist AS (SELECT * FROM `{project}.{dataset}.datarepo_load_history` WHERE state = 'succeeded')
# #     SELECT t1.*, CASE WHEN t2.source_name IS NOT NULL THEN TO_JSON(STRUCT(t2.source_name AS sourcePath, t2.target_path AS targetPath)) END AS file_ref
# #     FROM `{project}.{dataset}.file_inventory` t1
# #       INNER JOIN load_hist t2 ON t1.file_ref = t2.file_id
# #     WHERE file_ref IN (SELECT file_ref FROM file_list)""".format(project=bq_project, dataset=bq_dataset)
# # try:
# #     client = bigquery.Client()
# #     res = client.query(query).result()
# #     if res.total_rows > 0:
# #         logging.info(f"{res.total_rows} records to process.")
# #         df = res.to_dataframe()
# #         records_json = df.to_json(orient='records')
# #         records_list = json.loads(records_json)
# #     else:
# #         logging.info("No records to process.")
# #         records_list = []
# # except Exception as e:
# #     logging.error(f"Error retrieving update records from BigQuery: {str(e)}")
# #     #return "Failure - File Inventory Processing"
# # # Loop through records and update md5_hash from GCS metadata
# # try:
# #     storage_client = storage.Client()
# #     datarepo_row_ids = []
# #     for record in records_list:
# #         bucket = re.match('gs:\/\/([a-z0-9\-]+)', record["uri"]).group(1)
# #         obj = re.match('gs:\/\/[a-z0-9\-]+\/([A-Za-z0-9\-_\/\.]+)', record["uri"]).group(1)
# #         bucket = storage_client.bucket(bucket, user_project="anvil-datastorage")
# #         blob = bucket.get_blob(obj)
# #         record["md5_hash"] = blob.md5_hash
# #         datarepo_row_ids.append(record.pop("datarepo_row_id", None))
# # except Exception as e:
# #     logging.error(f"Error retrieving file metadata from GCS: {str(e)}")
# #     #return "Failure - File Inventory Processing"
# # # Ingest updated records back to TDR dataset
# # try:
# #     if records_list:
# #         ingest_updated_records("e0e03e48-5b96-45ec-baa4-8cc1ebf74c61", dataset_id, "file_inventory", records_list)
# #         delete_old_records(dataset_id, "file_inventory", datarepo_row_ids)         
# # except Exception as e:
# #     logging.error(f"Error replacing TDR records: {str(e)}")
# #     #return "Failure - File Inventory Processing"

# # ## Empty anvil_% tables
# # logging.info("Clearing out existing anvil_% tables")
# # table_list = ["anvil_activity", "anvil_alignmentactivity", "anvil_antibody", "anvil_assayactivity", "anvil_biosample", "anvil_diagnosis", "anvil_donor", "anvil_file", "anvil_sequencingactivity", "anvil_variantcallingactivity"]
# # for table in table_list:
# #     try:
# #         datarepo_row_ids = collect_all_datarepo_rows(dataset_id, table)
# #         if datarepo_row_ids:
# #             delete_old_records(dataset_id, table, datarepo_row_ids)
# #     except Exception as e:
# #         logging.error(f"Error clearing out existing anvil_% records: {str(e)}")
# #         break
# #         #return "Failure - anvil_% Record Deletion"

# # ## Re-run T pipeline without validation
# # params = {}
# # params["ws_name"] = ws_name
# # params["ws_project"] = ws_project
# # params["ws_bucket"] = ws_bucket
# # params["ws_bucket_name"] = ws_bucket_name
# # params["profile_id"] = "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61" 
# # params["mapping_target"] = "anvil"
# # params["skip_transforms"] = False
# # params["transform_list_override"] = [] # Leave empty to run transforms for all files, otherwise populate with target table names 
# # params["skip_schema_extension"] = False
# # params["skip_ingests"] = False
# # params["ingest_list_override"] = [] # Leave empty to run ingests for all files, otherwise populate with target table names
# # params["skip_file_relation_inference"] = False
# # params["skip_dangling_fk_resolution"] = False
# # params["skip_supplementary_file_identification"] = False
# # params["skip_snapshot_creation"] = False
# # params["snapshot_readers_list"] = ["azul-anvil-prod@firecloud.org"] # Include "auth-domain" to add the auth domain(s) as a reader (if one exists)
# # params["skip_data_validation"] = True
# # try:
# #     api_client = utils.refresh_tdr_api_client()
# #     datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
# #     dataset_info = datasets_api.retrieve_dataset(id=dataset_id, include=["SCHEMA", "ACCESS_INFORMATION", "PROPERTIES"]).to_dict()
# #     dataset_name = dataset_info["name"]
# #     phs_id = dataset_info["phs_id"]
# #     consent_name = dataset_info["properties"]["consent_name"]
# #     auth_domains = dataset_info["properties"]["auth_domains"]
# #     src_workspaces = dataset_info["properties"]["source_workspaces"]
# # except:
# #     dataset_name = ""
# #     return "Failure - Dataset Retrieval for T Pipeline"
# # if dataset_name:
# #     params["dataset_id"] = dataset_id
# #     params["dataset_name"] = dataset_name
# #     params["phs_id"] = phs_id
# #     params["consent_name"] = consent_name
# #     params["auth_domains"] = auth_domains
# #     utils.run_t_pipeline(params)

# # Return success message if no failures recorded
# logging.info("Function completed successfully.")
# #return "Success"


In [None]:
# for idx, record in enumerate(records_list):
#     if record["library_2_estimated_library_size"]:
#         print(str(idx) + " - " + str(record["library_2_estimated_library_size"]))

In [None]:
# records_list[50]

# Add new workspace files to the appropriate TDR dataset

## Script to diff file inventories between TDR and source workspaces

In [None]:
#############################################
## Functions
#############################################

def anvil_tdr_file_diff(dataset_id_list, print_queries, write_out_files):

    # Loop through and process datasets
    results = []
    df_detailed_results = pd.DataFrame()
    for dataset_id in dataset_id_list:

        # Retrieve dataset information
        logging.info(f"Processing dataset_id = {dataset_id}...")
        api_client = utils.refresh_tdr_api_client()
        datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
        try:
            logging.info("Retrieving dataset details.")
            dataset_details = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION", "PROPERTIES"]).to_dict()
            dataset_name = dataset_details["name"]
            bq_project = dataset_details["access_information"]["big_query"]["project_id"]
            bq_dataset = dataset_details["access_information"]["big_query"]["dataset_name"]
            try:
                source_workspace = dataset_details["properties"]["source_workspaces"][0]
            except:
                source_workspace = ""
        except Exception as e:
            error_message = f"Error retrieving dataset details: {str(e)}"
            logging.error(error_message)
            results.append([dataset_id, "", 0, 0, "Error", error_message])

        # Build and execute aggregate query
        logging.info("Building and executing aggregate file diff query.")
        client = bigquery.Client()
        query = f"""WITH files_all_workspaces
                    AS
                    (
                      SELECT file_path
                      FROM `broad-dsde-prod-analytics-dev.anvil_inventory_uscentral1.ref_object_inventory_20240405`
                      WHERE file_path NOT LIKE '%/'
                    ),
                    files_source_workspace
                    AS
                    (
                      SELECT file_path
                      FROM `broad-dsde-prod-analytics-dev.anvil_inventory_uscentral1.ref_object_inventory_20240405`
                      WHERE workspace_name = '{source_workspace}'
                      AND billing_project = 'anvil-datastorage'
                      AND file_path NOT LIKE '%/'
                    ),
                    files_tdr
                    AS
                    (
                      SELECT uri AS file_path
                      FROM `{bq_project}.{bq_dataset}.file_inventory`  
                    )
                    SELECT COUNT(CASE WHEN t0.file_path IS NOT NULL THEN 1 END) AS tdr_files,
                    COUNT(CASE WHEN t0.file_path IS NOT NULL AND t1.file_path IS NULL THEN 1 END) AS tdr_files_not_in_inv,
                    COUNT(CASE WHEN t0.file_path IS NULL AND t2.file_path IS NOT NULL THEN 1 END) AS inv_files_not_in_tdr,
                    COUNT(CASE WHEN t0.file_path IS NULL AND t2.file_path IS NOT NULL AND t2.file_path NOT LIKE '%SubsetHailJointCall%' AND t2.file_path NOT LIKE '%.vds/%' THEN 1 END) AS non_vds_inv_files_not_in_tdr,
                    COUNT(CASE WHEN t2.file_path IS NOT NULL AND t2.file_path LIKE '%SubsetHailJointCall%' AND t2.file_path LIKE '%.vcf%' THEN 1 END) AS joint_call_vcf_files
                    FROM files_tdr t0 
                      LEFT JOIN files_all_workspaces t1 ON t0.file_path = t1.file_path  
                      FULL JOIN files_source_workspace t2 ON t0.file_path = t2.file_path"""
        if print_queries:
            print("Aggregate file diff query:")
            print(query)
        try:
            df = client.query(query).result().to_dataframe()
            tdr_files = df["tdr_files"].values[0]
            tdr_files_not_in_inv = df["tdr_files_not_in_inv"].values[0]
            inv_files_not_in_tdr = df["inv_files_not_in_tdr"].values[0] 
            non_vds_inv_files_not_in_tdr = df["non_vds_inv_files_not_in_tdr"].values[0]
            joint_call_vcf_files = df["joint_call_vcf_files"].values[0]
            results.append([dataset_id, dataset_name, tdr_files, tdr_files_not_in_inv, inv_files_not_in_tdr, non_vds_inv_files_not_in_tdr, joint_call_vcf_files, "Success", ""])
        except Exception as e:
            error_message = f"BigQuery error: {str(e)}"
            logging.error(error_message)
            results.append([dataset_id, dataset_name, 0, 0, 0, 0, 0, "Error", error_message]) 
        
        # Build and execute details query
        if write_out_files:
            logging.info("Building and executing detailed file diff query.")
            client = bigquery.Client()
            query = f"""WITH files_all_workspaces
                        AS
                        (
                          SELECT file_path
                          FROM `broad-dsde-prod-analytics-dev.anvil_inventory_uscentral1.ref_object_inventory_20240405`
                          WHERE file_path NOT LIKE '%/'
                        ),
                        files_source_workspace
                        AS
                        (
                          SELECT file_path
                          FROM `broad-dsde-prod-analytics-dev.anvil_inventory_uscentral1.ref_object_inventory_20240405`
                          WHERE workspace_name = '{source_workspace}'
                          AND billing_project = 'anvil-datastorage'
                          AND file_path NOT LIKE '%/'
                        ),
                        files_tdr
                        AS
                        (
                          SELECT uri AS file_path
                          FROM `{bq_project}.{bq_dataset}.file_inventory`  
                        )
                        SELECT '{dataset_id}' AS dataset_id, 'tdr_files_not_in_inv' AS metric, t0.file_path
                        FROM files_tdr t0 
                          LEFT JOIN files_all_workspaces t1 ON t0.file_path = t1.file_path  
                          FULL JOIN files_source_workspace t2 ON t0.file_path = t2.file_path
                        WHERE t0.file_path IS NOT NULL AND t1.file_path IS NULL
                        UNION ALL
                        SELECT '{dataset_id}' AS dataset_id, 'non_vds_inv_files_not_in_tdr' AS metric, t2.file_path
                        FROM files_tdr t0 
                          LEFT JOIN files_all_workspaces t1 ON t0.file_path = t1.file_path  
                          FULL JOIN files_source_workspace t2 ON t0.file_path = t2.file_path
                        WHERE t0.file_path IS NULL AND t2.file_path IS NOT NULL AND t2.file_path NOT LIKE '%SubsetHailJointCall%' AND t2.file_path NOT LIKE '%.vds/%'"""
            if print_queries:
                print("Detailed file diff query:")
                print(query)
            try:
                df = client.query(query).result().to_dataframe()
                df_detailed_results = pd.concat([df_detailed_results, df])
            except Exception as e:
                error_message = f"BigQuery error: {str(e)}"
                logging.error(error_message)

    # Write out detailed results, if specified
    if write_out_files:
        destination_dir = "ingest_pipeline/resources/file_inventory_diff/details"
        current_datetime_string = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        output_file = f"file_diffs_{current_datetime_string}.tsv"
        logging.info(f"Writing out detailed file diff results out to {ws_bucket}/{destination_dir}/{output_file}")
        df_detailed_results.to_csv(output_file, index=False, sep="\t")
        !gsutil cp $output_file $ws_bucket/$destination_dir/ 2> stdout
        !rm $output_file
    
    # Display results
    print("Aggregate Results:")
    results_df = pd.DataFrame(results, columns = ["dataset_id", "dataset_name", "tdr_files", "tdr_files_not_in_inv", "inv_files_not_in_tdr", "non_vds_inv_files_not_in_tdr", "joint_call_vcf_files", "status", "message"])
    display(results_df)
        
#############################################
## Input Parameters
#############################################

# List of datasets to process
dataset_id_list = [
    'd48adc59-8934-41bb-9720-63e71f1933be',
    '80baf71d-28d0-4bca-81b7-49ddfadfa7a3',
]

# Variable to enable query printing, if desired
print_queries = False

# Variable to output files in addition to aggregation
write_out_files = False

#############################################
## Execution
#############################################

anvil_tdr_file_diff(dataset_id_list, print_queries, write_out_files)


## Script to identify specific files that haven't been ingested into TDR

In [None]:
#############################################
## Functions
#############################################

def identify_additional_files(dataset_id_list, file_exclusions, output_dir):
    
    # Loop through and process dataset_ids
    logging.info("Starting identify_additional_files function...")
    agg_results = []
    for dataset_id in dataset_id_list:
        result = [dataset_id, "Failure", 0, 0, 0]
        try:
            # Retrieve dataset details
            logging.info(f"Processing dataset_id {dataset_id}...")
            logging.info("Retrieving dataset details.")
            api_client = utils.refresh_tdr_api_client()
            datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
            dataset_details = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION", "PROPERTIES"]).to_dict()
            try:
                source_workspaces = dataset_details["properties"]["source_workspaces"]
                bq_project = dataset_details["access_information"]["big_query"]["project_id"]
                bq_schema = dataset_details["access_information"]["big_query"]["dataset_name"]
            except Exception as e:
                print("Failure - Issue Retrieving Dataset Info") 
                continue

            # Use source workspace(s) to find workspace bucket(s) to look for new files
            logging.info("Determining source workspace bucket(s).")
            data_files_src_buckets = {}
            for ws in source_workspaces:
                try:
                    ws_attributes = utils.get_workspace_attributes("anvil-datastorage", ws)
                    src_bucket = ws_attributes["bucketName"] if ws_attributes.get("bucketName") else ""
                    if not src_bucket:
                        print("Failure - Issue Retrieving Source Buckets")
                        continue
                    elif src_bucket not in data_files_src_buckets:
                        data_files_src_buckets[src_bucket] = {
                            "include_dirs": [],
                            "exclude_dirs": []
                        }
                except Exception as e:
                    print("Failure - Issue Retrieving Source Buckets")
                    continue

            # Pull existing file inventory from BigQuery
            logging.info("Pulling existing file inventory records.")
            client = bigquery.Client()
            query = """SELECT uri FROM `{project}.{schema}.file_inventory`""".format(project = bq_project, schema = bq_schema)
            file_list = []
            try:
                output = client.query(query).result()
                if output.total_rows > 0:
                    for row in output:
                        file_list.append(row.uri)
            except Exception as e:
                print("Failure - Issue Retrieving Existing File Inventory Records")
                continue

            # Build file inventory from workspace bucket(s)
            logging.info("Building new file inventory.")
            ws_attributes = utils.get_workspace_attributes(ws_project, ws_name)
            params = {}
            params["data_files_src_buckets"] = data_files_src_buckets
            params["google_project"] = ws_attributes["googleProject"]
            params["file_inventory_dir"] = output_dir
            params["global_file_exclusions"] = []
            inventory, retry_count = bfi.build_inventory(params)

            # Diff files to ingest and collect summary stats
            logging.info("Diffing new and existing file inventory records.")
            full_diff_list = []
            exclude_list = []
            include_list = []
            for file in inventory:
                file_excluded = False
                if file["uri"] not in file_list:
                    full_diff_list.append(file)
                    for exclude_term in file_exclusions:
                        if exclude_term in file["uri"]:
                            exclude_list.append(file)
                            file_excluded = True
                            break
                    if not file_excluded:
                        include_list.append(file)
            new_file_cnt = len(full_diff_list)
            new_exclusion_file_cnt = len(exclude_list)
            new_non_exclusion_file_cnt = len(include_list)
            result = [dataset_id, "Success", new_file_cnt, new_exclusion_file_cnt, new_non_exclusion_file_cnt]

            # Record diff files and write out to tsv 
            if len(include_list) > 0:
                logging.info("Writing out inclusion results.")
                df_inventory = pd.DataFrame(include_list)
                destination_dir = "ingest_pipeline/resources/file_inventory_diff/output"
                output_file = f"file_inventory_{dataset_id}.tsv"
                logging.info(f"Writing inclusion results out to {ws_bucket}/{destination_dir}/{output_file}")
                df_inventory.to_csv(output_file, index=False, sep="\t")
                !gsutil cp $output_file $ws_bucket/$destination_dir/ 2> stdout
                !rm $output_file
            else:
                logging.info("No inclusion results to write out.")
        except:
            logging.info("Unspecified error.")
            
        # Write out agg_results
        agg_results.append(result)
        with open("file_diff_out.csv", "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(result)

    # Display results
    logging.info("Function 'identify_additional_files' finished successfully.")
    logging.info("\nResults:")
    df_agg_results = pd.DataFrame(agg_results, columns =["Dataset ID", "Status", "New Files", "New Files (Exclusion List)", "New Files (Non-Exclusion List)"])
    display(df_agg_results)

#############################################
## Input Parameters
#############################################

# List of dataset IDs to analyze
dataset_id_list = [
    '8da05494-fe7a-4af5-b257-bada143ee426',
    '8e88cabc-e713-44ed-a5d2-41935c3b4eb5',
    'be8cfc23-cd19-46fb-92e1-a77ac380d7aa',
    'f9224ea2-dd31-421d-80d4-f35082ef8d68',
    '487016d8-ea02-4b20-a45f-7382139aa865',
    'eb35085f-0cbf-4829-a3ad-acaa53a250b5',
    '7577f264-8e84-440d-9346-7c4d5affda51',
    'febd8561-4769-4f3b-b7c0-ae7ff6ede2e9',
    'b8c5b185-8669-43d1-8ec7-c0f6d223d505',
    '166746e8-ce26-4fa1-a587-443ca9fc59a1',
    '49a97523-0a7a-4d5a-ae20-496f86de2032',
    '583023a1-aa12-40e2-a964-8ad50ad400ba',
    '73f7d2b4-86ec-4f7e-a1f9-37c7b023e3bf',
]

# List of file exclusions to apply
file_exclusions = ["SubsetHailJointCall", ".vds/"]

# Output directory
output_dir = "ingest_pipeline/resources/file_inventory_diff/output"

#############################################
## Execution
#############################################

identify_additional_files(dataset_id_list, file_exclusions, output_dir)


## Script to ingest missing workspace files into the appropriate TDR dataset

In [None]:
#############################################
## Functions
#############################################

def ingest_additional_files(dataset_id_list, file_inventory_dir):
    
    # Loop through and process datasets
    results = []
    for dataset_id in dataset_id_list:

        # Retrieve dataset details
        logging.info(f"Processing dataset_id {dataset_id}...")
        try:
            logging.info("Retrieving dataset details.")
            api_client = utils.refresh_tdr_api_client()
            datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
            dataset_details = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION", "PROPERTIES"]).to_dict()
            try:
                source_workspaces = dataset_details["properties"]["source_workspaces"]
            except:
                error_message = "No source workspace found on dataset."
                logging.info(error_message) 
                results.append([dataset_id, "Failure", error_message])
        except Exception as e:
            error_message = "Issue Retrieving Dataset Info"
            logging.info(error_message) 
            results.append([dataset_id, "Failure", error_message])
            continue

        # Read in file inventory
        logging.info("Reading in file inventory, if exists.")
        try: 
            file_inventory_name = f"file_inventory_{dataset_id}.tsv"
            inventory_file_path = "gs://" + ws_bucket_name + "/" + file_inventory_dir + "/" + file_inventory_name
            df_inv = pd.read_csv(inventory_file_path, delimiter = "\t")
            df_inv["file_ref"] = df_inv.apply(lambda x: json.loads(x["file_ref"].replace("\'", "\"")), axis=1)
            df_inv = df_inv.replace(np.nan, None)
            file_inventory = df_inv.to_dict(orient='records')
            logging.info("File inventory populated successfully.")
        except Exception as e:
            error_message = "File inventory not populated. Unable to populate from file: {}".format(e)
            logging.info(error_message)
            results.append([dataset_id, "Failure", error_message])
            continue

        # Build, submit, and monitor ingest request
        logging.info("Building and submitting ingest request.")
        ingest_request = {
            "table": "file_inventory",
            "profile_id": "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61" ,
            "ignore_unknown_values": True,
            "resolve_existing_files": True,
            "updateStrategy": "replace",
            "format": "array",
            "bulkMode": True,
            "load_tag": f"Ingest for {source_workspaces[0]}",
            "records": file_inventory
        }
        attempt_counter = 0
        while True:
            try:
                api_client = utils.refresh_tdr_api_client()
                datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
                ingest_request_result, job_id = utils.wait_for_tdr_job(datasets_api.ingest_dataset(id=dataset_id, ingest=ingest_request))
                logging.info("Ingest succeeded: {}".format(str(ingest_request_result)[0:1000]))
                results.append([dataset_id, "Success", None])
                break
            except Exception as e:
                logging.error("Error on ingest: {}".format(str(e)))
                attempt_counter += 1
                if attempt_counter < 1:
                    logging.info("Retrying ingest (attempt #{})...".format(str(attempt_counter)))
                    sleep(10)
                    continue
                else:
                    logging.error("Maximum number of retries exceeded. Logging error.")
                    results.append([dataset_id, "Failure", str(e)])
                    break

    # Display results
    logging.info("\nResults:")
    df_results = pd.DataFrame(results, columns =["Dataset ID", "Status", "Message"])
    display(df_results)

#############################################
## Input Parameters
#############################################

# List of dataset IDs to analyze
dataset_id_list = [
    '1c2fe11d-b020-4c54-8c71-1ea91623d626',
    '902596ce-714e-49b3-8271-f3dfece52309',
    '18b1a7a4-1724-4e10-95ca-fa35164c4801',
    '63b229b5-e7c8-4fd3-bbc8-ecf344da70d4',
    '352a503b-41eb-4a84-b257-68d70e55337e',
    '737d39b8-2f99-4eac-bcda-a03996e08939',
    'b8c5b185-8669-43d1-8ec7-c0f6d223d505',
    '31e61d00-61cc-46f2-a793-8ea8dfbb0832',
    '9737abab-2d09-4912-b300-f32553bda82c',
    'c56f0a76-2b91-4860-8dff-63c9504bb0e2',
    '732eaae3-b509-4a7a-8961-09d861e55253',
    'b5d7c34a-c383-4fc7-aa4d-b6dc941cd41a',
    'bcfe7f3b-3e63-45de-9e4d-144f9fc63753',
]

# File inventory directory
file_inventory_dir = "ingest_pipeline/resources/file_inventory_diff/output"


#############################################
## Execution
#############################################

ingest_additional_files(dataset_id_list, file_inventory_dir)


## Script to soft-delete tabular data records for files deleted at the source

In [None]:
#############################################
## Functions
#############################################

# Function to delete rows from a dataset
def delete_datarepo_rows(dataset_id, table_name, datarepo_row_ids):
    logging.info("Attempting to delete specified rows from {} for dataset {}".format(table_name, dataset_id))
    if datarepo_row_ids:
        data_deletion_payload = {
            "deleteType": "soft",
            "specType": "jsonArray",
            "tables": [{
              "tableName": table_name,
              "jsonArraySpec": {
                "rowIds": datarepo_row_ids
              }
            }]
        }
        try:
            api_client = utils.refresh_tdr_api_client()
            datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
            data_deletion_result, job_id = utils.wait_for_tdr_job(datasets_api.apply_dataset_data_deletion(id=dataset_id, data_deletion_request=data_deletion_payload))
            logging.info("Result: {}".format(data_deletion_result))
            return "Success"
        except Exception as e:
            logging.info("Error: {}".format(str(e)))
            return "Failure"
    else:
        logging.info("No datarepo_row_ids specified for deletion.")
        return "Success"

# Function to evaluate and potentially remove deleted files
def remove_deleted_files(dataset_id, file_uri_list):
    # Retrieve dataset information
    logging.info(f"Processing dataset_id = {dataset_id}...")
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        logging.info("Retrieving dataset details.")
        dataset_details = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION", "PROPERTIES", "SCHEMA"]).to_dict()
        bq_project = dataset_details["access_information"]["big_query"]["project_id"]
        bq_dataset = dataset_details["access_information"]["big_query"]["dataset_name"]
        fileref_col_dict = {}
        key_col_dict = {}
        for table_entry in dataset_details["schema"]["tables"]:
            if table_entry["name"] != "file_inventory" and "anvil_" not in table_entry["name"]:
                fileref_list = []
                for idx, column_entry in enumerate(table_entry["columns"]):
                    if idx == 0:
                        key_col_dict[table_entry["name"]] = column_entry["name"]
                    if column_entry["datatype"] == "fileref":
                        fileref_list.append(column_entry["name"])
                if fileref_list:
                    fileref_col_dict[table_entry["name"]] = fileref_list
    except Exception as e:
        error_message = f"Error retrieving dataset details: {str(e)}"
        logging.error(error_message)
        results.append([dataset_id, "Error", error_message])

    # Retrieving file_inventory records
    logging.info("Fetching file_inventory records associated with the files to remove.")
    bad_row_ids = set()
    bad_file_refs = set()
    max_page_size = 1000
    total_records_fetched = 0
    total_bad_records = -1
    filter_string = "uri in ('" + "', '".join(file_uri_list) + "')"
    attempt_counter = 0
    while True:
        offset = total_records_fetched
        if total_bad_records == -1:
            page_size = max_page_size
        else:
            page_size = min(max_page_size, total_bad_records - total_records_fetched)
        attempt_counter = 0
        while True:
            payload = {
              "offset": offset,
              "limit": max_page_size,
              "sort": "datarepo_row_id",
              "direction": "asc",
              "filter": filter_string
            }
            try:
                record_results = datasets_api.query_dataset_data_by_id(id=dataset_id, table="file_inventory", query_data_request_model=payload).to_dict()
                break
            except Exception as e:
                if attempt_counter < 2:
                    sleep(10)
                    attempt_counter += 1
                    continue
                else:
                    logging.error("Error retrieving file_inventory records")
                    break
        if record_results.get("result"):
            total_bad_records = record_results["filtered_row_count"]
            total_records_fetched += len(record_results["result"])
            for result_entry in record_results["result"]:
                bad_row_ids.add(result_entry["datarepo_row_id"])
                bad_file_refs.add(result_entry["file_ref"]) 
        else:
            break
        if total_records_fetched >= total_bad_records:
            break

    # Loop through tables with filerefs and look for bad file references
    bad_records_dict = {}
    for table in fileref_col_dict.keys():

        # Build filter string
        logging.info(f"Checking the '{table}' table for bad file references.")
        filter_string = ""
        for idx, field in enumerate(fileref_col_dict[table]):
            base_filter_string = f"{field} in ('" + "', '".join(list(bad_file_refs)) + "')"
            if idx == 0:
                filter_string += f"{base_filter_string}"
            else:
                filter_string += f"OR {base_filter_string}"

        # Find problematic records and record information
        bad_records = []
        max_page_size = 1000
        total_records_fetched = 0
        total_bad_records = -1
        attempt_counter = 0
        while True:
            offset = total_records_fetched
            if total_bad_records == -1:
                page_size = max_page_size
            else:
                page_size = min(max_page_size, total_bad_records - total_records_fetched)
            attempt_counter = 0
            while True:
                payload = {
                  "offset": offset,
                  "limit": max_page_size,
                  "sort": "datarepo_row_id",
                  "direction": "asc",
                  "filter": filter_string
                }
                try:
                    record_results = datasets_api.query_dataset_data_by_id(id=dataset_id, table=table, query_data_request_model=payload).to_dict()
                    break
                except Exception as e:
                    if attempt_counter < 2:
                        sleep(10)
                        attempt_counter += 1
                        continue
                    else:
                        logging.error(f"Error retrieving records from table '{table}'")
                        break
            if record_results.get("result"):
                total_bad_records = record_results["filtered_row_count"]
                total_records_fetched += len(record_results["result"])
                for result_entry in record_results["result"]:
                    bad_record = False
                    bad_col_list = []
                    for field in fileref_col_dict[table]:
                        if result_entry[field] in bad_file_refs:
                            bad_record = True
                            bad_col_list.append(field)
                    if bad_record:
                        key_val = result_entry[key_col_dict[table]]
                        bad_record_detail = key_val + ": " + ", ".join(bad_col_list)
                        bad_records.append(bad_record_detail)
            else:
                break
            if total_records_fetched >= total_bad_records:
                break

        # Record results
        if bad_records:
            bad_records_dict[table] = bad_records

    # If bad records outside of file_inventory are identified, report them out, otherwise delete bad file_inventory records
    tabular_data_results = []
    if bad_records_dict:
        logging.info("Tabular data records with references to bad files found. Please review the output and correct. Will NOT soft-delete the records from the file_inventory table.")
        for key, val in bad_records_dict.items():
            tabular_data_results.append([dataset_id, key, key_col_dict[key], val])
        logging.info("Tabular data records with references to bad files:")
        tabular_results_df = pd.DataFrame(tabular_data_results, columns = ["dataset_id", "table", "key_column", "key_vals_w_affected_cols"])
        display(tabular_results_df)    
    else:
        logging.info("No tabular data records with references to bad files found. Soft-deleting bad records from file_inventory.")
        delete_result = delete_datarepo_rows(dataset_id, "file_inventory", list(bad_row_ids)) 
    
#############################################
## Input Parameters
#############################################

# Dataset to update
dataset_id = "4b456e27-e78f-4ced-a6a1-887f2539ddbe"

# List of file URIs to remove from the dataset
file_uri_list = [

]

#############################################
## Execution
#############################################

remove_deleted_files(dataset_id, file_uri_list)


## Script to Examine VDS Dataset Files

In [None]:
def compare_vds_files(bucket):
    
    # Execute query
    client = bigquery.Client()
    query = f"""
            WITH jointcall_files
            AS
            (
              SELECT REGEXP_EXTRACT(name, r'.*\.vds(.+)$') AS object_name, md5Hash, size
              FROM `broad-dsde-prod-analytics-dev.anvil_inventory.object_metadata_26_02_2024__17_14_55` 
              WHERE bucket IN ('fc-secure-9e3357c0-389c-41d7-94ee-56673db6b75f', 'fc-secure-7e69c896-d6c0-4a4e-8490-42cb2d4fdebf')
            ), 
            cohort_ws_files
            AS
            (
              SELECT REGEXP_EXTRACT(name, r'.*\.vds(.+)$') AS object_name, md5Hash, size
              FROM `broad-dsde-prod-analytics-dev.anvil_inventory.object_metadata_26_02_2024__17_14_55` 
              WHERE bucket = '{bucket}'
              AND name LIKE '%.vds%'
            )
            SELECT COUNT(DISTINCT c.object_name) AS total_vds_file_count, 
            COUNT(DISTINCT j.object_name) AS vds_file_in_jointcall_ws_count
            FROM cohort_ws_files c
              LEFT JOIN jointcall_files j
              ON c.object_name = j.object_name 
              AND c.md5Hash = j.md5Hash
              AND c.size = j.size"""
    try:
        df = client.query(query).result().to_dataframe()
        total_vds_file_count = df["total_vds_file_count"].values[0]
        vds_file_in_jointcall_ws_count = df["vds_file_in_jointcall_ws_count"].values[0] 
        diff = total_vds_file_count - vds_file_in_jointcall_ws_count
        return "Success", total_vds_file_count, vds_file_in_jointcall_ws_count, diff
    except Exception as e:
        return "Failure", 0, 0, 0

# Loop through datasets and validate is_supplementary field
bucket_list = [
    'fc-secure-f5d884c0-a24c-46e6-8c29-cad7f5b158c7',
    'fc-secure-6513d7e1-2dbb-41a2-baea-3f7fdbcbb620',
    'fc-d3e9eb24-cb19-47d8-b2c6-d85fd34b4ff1',
    'fc-0ed1ef2d-1039-4c8a-a0a9-91c3e385200a',
    'fc-282a8e0b-df88-42de-9059-2b7447d9f9c7',
    'fc-secure-5efb4966-0994-41f8-a911-1d159c9bae1b',
    'fc-2836a560-113a-4239-acab-5cce58019b73',
    'fc-bb71bb7a-fdb1-427a-9e56-eb08b6fd7955',
    'fc-secure-e9b2e26a-3f73-4f5a-862f-c5b3be68703f',
    'fc-e7051891-25c8-4776-80ed-26b1af860277',
    'fc-4f070061-0bc2-4f9a-9fe9-869a739c9817',
    'fc-c1701683-c10e-4f73-a636-f774e8b650c2',
    'fc-secure-ccca1171-d3ee-42b3-8df8-aca336279cf3',
    'fc-2d61b7df-571f-4201-a674-1107c84711df',
    'fc-5d7cf59f-e361-4073-a6ad-16d8d78cc613',
    'fc-secure-70487b95-e89c-45ec-ad0a-e5382d625c33',
    'fc-2b68ae78-57af-4c65-8020-6f5ed4ae9408',
    'fc-secure-33bdfbdb-de58-474e-8591-dad501aa1995',
    'fc-2bcebe36-5d83-486a-947a-bbb5a606701d',
    'fc-secure-1fb85b31-9a1e-46ef-a206-41040d151f94',
    'fc-secure-ead0ff8d-eee9-4299-bb54-8404ffe9fa22',
    'fc-secure-d8de1fe3-972d-480f-a8a8-2bbc251add30',
    'fc-secure-4d47049d-9a31-435d-8c97-61cffce9a83b',
    'fc-secure-31d85e96-7fa0-4c2e-a89a-fe5c70845fd7',
    'fc-secure-68ff7cc9-274c-45d6-baad-75b9c5971a9c',
    'fc-secure-fdaa7a52-520b-461b-a2d2-e31bf92e8e86',
    'fc-secure-9c348df7-4da1-428a-a785-e06db3a9f208',
    'fc-secure-0de89e54-2149-4e06-81f9-da5af48c68a3',
    'fc-secure-986229e0-ac72-420d-bf0e-aa14dea63a05',
    'fc-secure-4931149d-9e71-4865-9f41-3e4c998ffb38',
    'fc-secure-13597242-de35-44e2-b8fb-b5fa0b983501',
    'fc-secure-75f95e44-299f-4666-bed3-46dd679b12d8',
    'fc-secure-240e1629-6d73-42ab-a373-1abeec17824c',
    'fc-secure-94c90c12-376d-419a-96d9-ed37e1b1a5bb',
    'fc-secure-6c21e787-1a4b-4235-b756-9ce6096fc815',
    'fc-secure-51198b17-37ae-44b7-8513-c11c4bfe3a9d',
    'fc-secure-59794551-d924-4ad7-905b-8727646d9aad',
    'fc-secure-8ec82876-176f-4f33-ae98-0a3cae871ed4',
    'fc-secure-8a282388-3c56-48c6-99c8-ea4b52c053b9',
    'fc-secure-3e71d768-9da9-4845-9e2c-7e909db92cb7',
    'fc-secure-0fc5a889-f57e-40a1-9859-c5b1e8a196d1',
    'fc-secure-0f948ad2-2ae8-433c-9f0c-941c4c5e4a89',
    'fc-secure-678eccb8-3463-4a72-8b57-69dfc8c77002',
    'fc-secure-221b863c-a724-42f3-9f90-2081b352799c',
    'fc-secure-21cd882f-8470-4c2e-93dc-536a908bae73',
    'fc-secure-e92b8081-5e6a-440c-af83-4d428f505529',
    'fc-secure-e0034430-99a3-4dde-99d3-a2330cd90f19',
    'fc-secure-315a127f-649d-4928-b4e0-cdca7d898e05',
    'fc-secure-550ffe2e-04fd-4763-b7d2-09f0c59083e4',
    'fc-secure-84e57da6-4df9-45de-9f82-8a550887a7fa',
    'fc-secure-2b4d5d05-d951-4e51-8ece-7e851660f91a',
    'fc-secure-bfe6497b-69a1-4917-8a7b-c9bd36cb4ae4',
    'fc-secure-3b588f92-0298-4ad6-b75d-fa16de8b718d',
    'fc-secure-fe950bf8-0470-4329-b8c9-8a42d0dd619d',
    'fc-secure-59d2af1f-3dc0-407b-b7ab-05cdcfa4da8f',
    'fc-secure-01106611-a0e9-41bb-ac13-27683ab2fc19',
    'fc-secure-7a160245-84eb-4383-80ed-f41c2411e702',
    'fc-secure-32a2f8aa-4f72-43e9-9450-bbf661bde5ef',
    'fc-secure-ce2baa61-748a-4dbc-a929-f256721b59b2',
    'fc-secure-ac202043-c5ef-4fb7-8ccb-62a274c1b8ec',
    'fc-secure-652024de-0ecd-4de3-8360-c8c5bfcafd72',
    'fc-secure-7c845669-3781-4ac0-bb59-1495d68d1d85',
    'fc-secure-330f768f-83c4-4570-ae46-0626b477d2b0',
    'fc-secure-dccff364-c2ff-42df-8c8e-f979a0472c11',
    'fc-secure-17d8dbc9-d1d8-4d5d-8eb7-c1b82bef24d8',
    'fc-secure-674fbd89-9eeb-4e43-8a6f-97d6e50708e0',
    'fc-secure-9f2f0267-2df4-44e9-a6ae-dd1d3a43cca5',
    'fc-secure-6bc832d1-a35b-4676-bf68-a5772e2be044',
    'fc-secure-e4b45d7c-3fee-479f-83e9-8c85312cb8da',
    'fc-secure-516245cb-7dcc-487d-acf7-43e5fb10085f',
    'fc-secure-ab235723-ed31-4242-b5ab-23c177a0e79c',
    'fc-secure-b9906df4-3012-4c7b-a008-3c5708885971',
    'fc-secure-05e511c4-0b47-41a5-a361-99f747cbef6c',
    'fc-secure-adba6cb8-c49c-405b-af7b-9980e4a9d36a',
    'fc-secure-98a7c433-bacc-44fd-96f6-faed04dd1c96',
    'fc-secure-fd756575-ba39-4893-8b85-b6dfbb376f3b',
    'fc-secure-a473f80e-97a6-4c19-bd68-e37266efb44d',
    'fc-secure-04e82709-08e0-4335-aaef-ba55089f6fd9',
    'fc-secure-ed823158-2149-493f-80d0-ff066cb14a85',
    'fc-secure-bcc5d428-aed0-4814-aefc-f717b97d5106',
    'fc-secure-de72ef13-9b7f-44db-9428-5df489d327ce',
    'fc-secure-35c81df8-8bdf-467a-af6f-fb807185b82e',
    'fc-secure-39458ab6-c2d3-49e2-b6d5-8bb3bae9a245',
    'fc-secure-124c02b9-69b7-468c-b3d7-4a07aee74dc5',
    'fc-secure-a065288d-5bb4-441c-95e9-0ffb20a6cf40',
    'fc-secure-ee694ec4-cb3d-441d-95f7-e6d586419484',
    'fc-secure-bd923846-0b8b-4018-8706-44b2a8e213b4',
    'fc-secure-4c07a18a-8c79-4b81-acbe-91083298f1e4',
    'fc-secure-21c6905e-06c8-45f2-b6ed-ffba467f7f75',
    'fc-secure-538d85ea-c436-43f9-b001-4db614ed96bf',
    'fc-secure-d87970dd-adb0-4b99-a204-ae6fbd457d12',
    'fc-secure-5f916770-fded-4540-b4b6-49f88b8e05fc',
    'fc-secure-0aedc988-3736-496c-b7ac-20cca5b3ceb9',
    'fc-secure-d4bead53-0db1-4e25-87da-c02be5819368',
    'fc-secure-86cbdfa9-cbc0-40fb-adfa-3dd467ae1062',
    'fc-secure-d157fd3c-57ff-4640-a084-cecda832e575',
    'fc-secure-08bb70e6-9fa1-40dc-8822-41d73945c053',
    'fc-secure-6a2f53f1-6712-48a9-a7b2-3289b8df877b',
    'fc-secure-55225e12-ec4c-42e0-a5d1-986c87c6d129',
    'fc-secure-89bba08d-ef3b-47bb-9c9b-a937d7550a97',
    'fc-secure-bf34568b-1c38-4c43-8a21-59630b969553',
    'fc-secure-8a297961-e042-4d02-826f-0322b3d7fbff',
    'fc-secure-9befa92f-ef34-4fcf-8df5-d085656e26dd',
    'fc-secure-870d27c3-a758-4535-b8dd-5fc0514c5215',
    'fc-secure-980cd412-6b18-480a-b2f2-ad1543c06a91',
    'fc-secure-16e0c63c-847a-42ef-91ca-3523b3668357',
    'fc-secure-c53831f7-0431-44e5-abe6-308270690c3b',
    'fc-secure-51a26e99-63eb-442a-869d-87ecbc60c814',
    'fc-secure-e5676c90-7028-4b68-b620-c6944514d52c',
    'fc-secure-977aa72f-e9ce-4fb6-b32b-c675b4ef25d5',
    'fc-secure-d7a002ea-7e1e-45fd-8e76-456fce471f17',
    'fc-secure-6537d7f6-f29f-432b-b66e-8cf2204b7920',
    'fc-secure-b2669acd-7139-464f-af53-af7215c068aa',
    'fc-secure-cb3eeabf-f0ef-497e-9bc6-b5a27be4fec2',
    'fc-secure-2180b508-ce9d-4535-aa9f-f07d5917025c',
    'fc-secure-7e0893cd-4f31-41e4-b1d2-3e656097824a',
    'fc-secure-f8b9ce8d-efc0-4aa1-ad71-c0378d8d7194',
    'fc-secure-b2f4e185-a21a-434a-9494-d1fabaaaf7c0',
    'fc-secure-1355eb72-b00f-4796-8892-ac271b699503',
    'fc-secure-68b7e62f-132b-4818-bf64-6c38ec9152ab',
    'fc-secure-dae591de-00ad-478c-9440-88034a1b8cb9',
    'fc-secure-7e9fe869-643a-4828-a1b7-0245e34745ae',
    'fc-secure-228fd6fd-e0f7-4895-a246-3b055be27aa1',
    'fc-secure-e99706c4-48f9-4a69-baf4-70d1c5eaac5c',
    'fc-secure-d2c84e56-8f0d-420a-96a4-942e92009433',
    'fc-secure-589e3f7a-7b24-46cf-aefd-63b05155d826',
    'fc-secure-3fdbe020-6bdb-4668-bcb8-0d0df9d4ba8a',
    'fc-secure-b31156cd-4993-4f69-a8f4-9a99c2697965',
    'fc-secure-73036e74-c8b0-4e6f-9f4f-ca55b599d5d1',
    'fc-secure-3c4843c0-b83f-4ba1-9bba-9c9a599f3ffb',
    'fc-secure-91f9e579-b064-4992-8b00-c789ca48f861',
    'fc-secure-ac588f86-da2d-4a92-9f45-be2aeedd5fac',
    'fc-secure-2fa3df40-c189-41ee-b5ba-484a0b77ef77',
    'fc-secure-00737009-4e0f-454d-bb02-4b70566a0ed2',
    'fc-secure-36dfb67b-d2fc-47a1-a94c-225d72e08afd',
    'fc-secure-55efa443-810c-48c8-90bb-f07beba0e560',
    'fc-secure-43207dac-0905-4fdd-b816-a34bd2ccebdd',
    'fc-secure-fba19c6f-984e-4616-b253-6d9e6ea5cec5',
    'fc-secure-1614d6d2-d053-4de0-9b97-cc4b0762f547',
    'fc-secure-c40af798-8afc-4ab3-9b66-946955811d3b',
    'fc-secure-abc7f058-0260-4e82-a911-abfec3dcb676',
    'fc-secure-29cd113f-7eca-4526-aa52-dde1b8cb41d0',
    'fc-secure-877e6c8c-72ef-46d0-b3f3-37dd175771fe',
    'fc-secure-0eba3dae-89be-4642-8982-9a80a7428cd2',
    'fc-secure-0ca0c5e6-26ca-47ea-b509-ec4eaa058fc6',
    'fc-secure-bee7792c-ef35-478d-a9bb-c8f2054c335c',
    'fc-secure-72a949c5-0b7d-45c9-96c3-ff4d25815ed5',
]
results = []
for bucket in bucket_list:
    logging.info(f"Examining VDS files in bucket: {bucket}")
    status, total_vds_file_count, vds_file_in_jointcall_ws_count, diff = compare_vds_files(bucket) 
    results.append([bucket, status, total_vds_file_count, vds_file_in_jointcall_ws_count, diff])
    results_df = pd.DataFrame(results, columns = ["bucket", "status", "total_vds_file_count", "vds_file_in_jointcall_ws_count", "diff"])
logging.info("Results:")
display(results_df)

# Identify and resolve records missing part_of_dataset_id

## Script to identify missing values in datasets

In [None]:
#############################################
## Functions
#############################################

def check_dataset_fk_field(dataset_id_list):
    
    # Loop through and process dataset IDs
    results = []
    for dataset_id in dataset_id_list:
    
        # Retrieve dataset information
        logging.info(f"Processing dataset_id = {dataset_id}...")
        api_client = utils.refresh_tdr_api_client()
        datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
        try:
            logging.info("Retrieving dataset details.")
            response = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION"]).to_dict()
            bq_project = response["access_information"]["big_query"]["project_id"]
            bq_dataset = response["access_information"]["big_query"]["dataset_name"]
        except Exception as e:
            error_message = f"Error retrieving dataset details: {str(e)}"
            logging.error(error_message)
            results.append([dataset_id, "All", "Failure", error_message])
            continue
            
        # Evaluate whether data needs to be reprocessed for the tables in question and reprocess if so 
        for table in ["anvil_donor", "anvil_biosample"]:
            
            # Evaluate whether a patch is needed
            logging.info(f"Checking whether patching is required for the {table} table.")
            patch_needed = False
            client = bigquery.Client()
            if table == "anvil_donor":
                query = """SELECT COUNT(*) AS null_cnt FROM `{project}.{dataset}.{src_table}` WHERE part_of_dataset_id IS NULL OR part_of_dataset_id = ''""".format(project=bq_project, dataset=bq_dataset, src_table = table)
            else:
                query = """SELECT COUNT(*) AS null_cnt FROM `{project}.{dataset}.{src_table}` WHERE ARRAY_LENGTH(part_of_dataset_id) = 0 OR (ARRAY_LENGTH(part_of_dataset_id) = 1 AND part_of_dataset_id[0] = '')""".format(project=bq_project, dataset=bq_dataset, src_table = table)
            try:
                df = client.query(query).result().to_dataframe()
                if df["null_cnt"].values[0] > 0:
                    patch_needed = True
            except Exception as e:
                error_message = f"BigQuery error: {str(e)}"
                results.append([dataset_id, table, "Failure", error_message])
                continue
                
            # Patch dataset if needed
            if patch_needed:
                results.append([dataset_id, table, "Success", "Patch Needed"])
            else:
                results.append([dataset_id, table, "Success", "No Patch Needed"])
                
    # Display results
    logging.info("\nResults:")
    df_results = pd.DataFrame(results, columns =["Dataset ID", "Table", "Status", "Message"])
    display(df_results)


#############################################
## Input Parameters
#############################################

# List of dataset IDs to examine and patch if necessary
dataset_id_list = [
    'cefc1a79-446c-40d2-b140-ba8d8b1c0712',
    '4e699ead-bbb5-460d-9b32-2b1b322c601b',
    'a36eeaf7-d6dd-4887-bdbd-e435a07ba156',
    '8de6dae2-55ff-4287-9b75-5b2a950c1f44',
    'ce6692aa-0f97-48fa-8628-b8fa3eab4726',
    '31433635-91d4-431d-8d26-bc54e84c8e8c',
    '0b06619d-39d9-4437-8c42-2e415faa634c',
    '12ffb586-5f6a-4f0a-a353-d2f34599f4cc',
    'e642bca0-52fb-4ab3-ab3a-acaab83deda7',
    'b7fb531e-25a4-427c-9679-b7bdc3d03535',
    '3615e063-f24b-47f7-87cb-430e8aca8d0c',
    'a3ea4f97-6657-4d3c-9be6-96f097f5c952',
    '9f4ac69c-0919-4ac1-98a8-976ed79ace03',
    '96461004-f4b3-4f82-a842-293b3ec46a60',
    '841b7883-9447-4ea0-ae4a-84ea0240d919',
    'd0fc3d6a-c3f4-4533-8a23-817a4e27f9be',
    '2cace5dc-f660-45d4-b689-c4c89e77697c',
    'd2272f2d-c606-4027-b8ea-0bdd6d9d6535',
    'b12fb9be-2ce0-4bfd-8503-732fabba06ab',
    '9f9fc99a-b867-49a9-a3dc-8a39efbd5fa2',
    'ce58654d-b7d3-466b-99ba-b203d527a543',
    '179eb85e-2557-4677-9cba-d763310f3df9',
    'cba804c9-0bdd-4219-a53e-98c8db6334a0',
    'd239dd7b-8d10-4960-aa91-8f8ede641e25',
    '5c6a1c4f-ccd3-48a8-ac00-e18e5ecaa0bb',
    '19e2c8ab-853a-4204-86c3-f591125fbf63',
    '7cf0d3d0-f79b-4bfe-bfc8-e4e6c33dd4c3',
    '3a72e4b8-afb4-4299-98ec-a9ba9606be06',
    'f3c89298-0dd2-40da-8627-3baea553b34a',
    '9a32e23e-840d-4ba3-8cd9-392f48b8e9d2',
    'c5d967fd-09ce-4b02-97dd-ac3abf6f79fa',
    '5069fc2c-b957-4130-adca-6eabae943867',
    '173e56f7-b813-4c41-89ff-09a824e1407f',
    '80312f74-bd56-4938-96ba-e9bed95d1f3b',
    '017445d7-d56e-4e2e-b480-b4879b51e944',
    '13b2076a-cfe1-49ec-ac61-bad1af9a52ea',
    '175dd803-02c7-4823-81d5-9e0621652ace',
    '15492baa-05ed-47bc-b50c-e587679ae51a',
    '33705ce9-b2b3-4edc-9b47-f54283e193cf',
    '92486440-3a46-44dd-b853-b300ef75b31e',
    'dd2cb8fc-42a6-482f-898e-ef6125feccb8',
    '4e99b8e1-40b9-4fb2-90a0-d85e926ef31e',
    '128332b6-5060-4ec4-b6a6-f53b54a810be',
    '06f05f58-3c83-4f5c-bddd-bed7d2d1d147',
    '51e9935f-ec18-4832-801a-6d9186537572',
    'd6291444-8c3c-470c-b28c-7cf1d5c7aad8',
    '41cb9f29-4ba6-4690-821c-cb085e6b0f2f',
    'e68d1d39-99df-4cd7-8053-1b298f03eabb',
    'b252e3ac-4a8c-48e0-9999-5ee0c9a5842d',
    '7ea006d9-1e19-4678-b2e6-d4a1ea327f74',
    '34fd3b22-ac73-47d2-8849-5877158ec072',
    'a08dc7a6-f8ce-4205-95d2-83f614c2c32f',
    '577f36fe-8154-4c82-ac87-b2a64cb68f35',
    '7ce3270e-b2f2-47f4-a288-639751b2f87f',
    '36bdd59f-4f5b-43cd-8d34-a21ef87bbf30',
    '41d12dc1-8718-4439-b409-26cc23573107',
    'c4c49fcd-0c20-4cff-841a-cb58f5689c5b',
    '6b40557c-ddc3-4e7e-8a45-1761e7fcb8b5',
    'd6518df9-fc11-46ed-9c12-b9782d3829a0',
    '9ee2a552-89f8-4a48-9c94-9fa26ebb7483',
    '425412ba-894a-4824-acb8-bf18fe4576e0',
    'f22bd762-5c45-453e-bf22-b174514abb84',
    '0ee62643-b064-42f8-9b09-5d10eacd70a3',
    '1a7f6728-5116-4f24-897a-59a7f322cfd2',
    'c37b388c-7107-43d6-bee6-4e82b40ed271',
    'bf6f1d78-6a0d-4afb-aea6-17a3c34340db',
    'a3becdde-018b-46f0-adea-d587076eef4a',
    'a9ad3a05-24fb-4e59-85b0-ee09e55a4492',
    '719f7581-21db-4aec-8c46-4a5811832710',
    '3725b660-1106-4173-9c4b-0a15926becf5',
    '318a75f4-ac50-4944-81b0-70a1323e7497',
    '75fb0984-2124-444f-881b-30a1a6f8b8f7',
    '15be288e-53e1-41cb-8d20-8ea87efb9258',
    '700303c2-fcef-48a5-9900-096bf34e2d83',
    '38fd20ce-affd-4791-9810-7f5a7fe876d0',
    '8b8185d3-ba5c-4832-af23-3ff8ca6ed016',
    '140797da-dc94-4fc2-8b0b-f2e1dec7bd43',
    'ec97fa0f-e174-40fe-a6b8-ee240bdf4318',
    '5488d7c1-5195-4ebc-b0f0-31033fa06dc9',
    '56f9888f-e623-4a1a-b2b4-46378a6cd6fe',
    'b2e7f15b-65d5-4812-abfd-b2dbc6d18850',
    '69f8d7c2-2e14-48e6-b838-7881016313fb',
    '809fa952-3178-46b7-bb82-8a476ef32e67',
    'dfb14a1f-38b2-4668-b98f-59b5b5b53ca3',
    'f553b765-1c9c-464c-a8fa-07700a1691c5',
    'b108dfd0-711d-4bc1-aab5-1b312226c8ad',
    '38eca26c-d79e-4447-99d3-1889d20ade21',
    'd5a0e24d-689a-4854-92c7-9a39f980b523',
    'ecd0606f-4fa7-4e57-b6e8-eea377e65d5b',
    '0c6bc810-5ae0-4926-a56b-2bc2fe7dbe6d',
    '9dc31133-c882-4f39-903a-a25f316bb560',
    '23a0ede0-4f97-46af-9f04-bd2805050980',
    '7593c1c2-3680-4bf5-8a65-dce5f96a3b59',
    '02661394-2886-4ef7-aff1-d53225c82025',
    'c1644d4e-06e2-4fa8-95f1-5c1da5831257',
    '267cf516-dd33-4640-a71a-78bd8f5db9d8',
    'df06ff22-6a2d-4934-aac9-c8368efbea1a',
    'dcdefb14-f6de-4c46-ac7e-842b273416bf',
    '28208cc6-50bf-4864-9a48-981632066640',
    'ccfe264d-a35e-44f8-9b2b-241a0f8327cc',
    'f177843d-47fb-46ae-83be-73c92ee85081',
    'd049d487-1a69-4358-8dad-0e6fa6c06fdd',
    'da29226b-e856-4014-8c8b-c4268d0df2cc',
    '714dcdbd-8d17-40b0-8246-0e941af8175d',
    '53ce7d12-facc-4412-a710-f535efb209a2',
    'b9842819-5fd6-40c5-9668-aae1ea44a308',
    '7c056125-3ed8-459c-b73e-edfa3f80cc27',
    'ed1215f1-787c-40f5-9d77-4b5bc2dfbb84',
    '7feeb2b1-1926-4968-b6dc-e0a1e4cf8d4a',
    'dbf5d87b-4cab-44cd-a792-1d0218aad973',
    '85dbde76-c130-40b2-8a8a-ba815ba499da',
    '1c2fe11d-b020-4c54-8c71-1ea91623d626',
    '84133066-68cd-41fa-819b-d74a3ac85862',
    '54c6fa73-9b84-4a3b-9e97-e4e43165c48b',
    '90ba1853-f845-4502-ba36-b75b9e571bc5',
    'f3c88c3c-8e1b-4af9-9467-0621404e314c',
    '332bb145-6ef1-40ef-932c-aec5bb6210d9',
    '44f83f20-d618-40b5-b2cb-3676b8fe3ad7',
    '3c2c39a9-4cc2-4f7c-89e0-054a871e2c4e',
    '2ebb722f-a3df-4ea4-b72a-813e3db0bab5',
    '1817528a-4f88-4ed6-8965-9eae0220ab27',
    'e03eb011-05f9-4491-b779-0cc2aefabff1',
    'd4bb7169-5a7d-4090-ba62-12ea799c3ade',
    '3f172982-060d-4339-a09b-6994c2c9eb16',
    '51789659-5233-4ee7-8bca-dedebfc87773',
    'a5fe75bb-d28c-42fb-aaf8-92fa37b266d2',
    '6545d602-e5b4-4dd1-8f6a-64e0a1952ddc',
    'e5c79b74-20d5-4b6f-8085-0bc788eed2ea',
    'f492567d-6db8-45c8-b44e-6b5def26c812',
    '12e54f96-00d9-4a38-921e-e0d42610b2c1',
    '845b131e-7c05-4397-ad40-23dea8e9b399',
    'bd492b71-b20e-4056-b8ae-ad8c94cfbc02',
    '9ecc231f-e3d3-4417-a98a-c4db4c638161',
    '3fb2d04a-d18b-4bdc-9372-99b992f2ae42',
    'e922a496-e686-4fa1-911d-2159ceb0f09f',
    '8fbfea50-6a71-4b19-98e9-f95e3a8594c7',
    'd911e57a-ebb8-4be8-876b-d8e5790ddce3',
    '6c9423a2-3ea7-4c3c-9b12-0cc993bc095f',
    '52e015b5-22b7-4a96-9f0a-ea3afccbfcbc',
    '325f3ee8-2adb-4092-bd78-1b5ea5b0d1d6',
    'ae50ef98-ef3d-4427-b094-83b2d90787a0',
    'e6771964-50e9-482f-9d23-18c22cd89ab8',
    '3fbacc64-4c53-4770-8cdf-a616c10ec5c7',
    'a3ae33bb-8b3a-47e5-a2d1-a49c954776b3',
    '0b0a52bb-a1a2-4638-9259-4447761c2da4',
    '0eb42259-7b44-450f-a9d7-500b2ea7179c',
    'e16adabb-88e0-4739-983a-98ac5c181842',
    '71f94dff-fbe8-4881-af1f-4987b67d5181',
    '5627cdbb-22a0-436f-a7a4-34d7ce21bb45',
    '0e65b131-fd14-4fce-908b-c5b89a71a9c1',
    'd56ae233-d6d2-483c-917e-1de0fe1cfeb7',
    'd00353de-f6f9-42d9-8a8f-f88b3d880dbf',
    '3be57453-9325-4c2e-b73a-832139b61778',
    'fcc60ac9-0d20-4a7c-97e4-e3c8d3aa8f76',
    'dc5f85d8-333b-4b68-b160-ad9856233887',
    '655e6a61-5400-4d8a-95bc-1506e026b289',
    '1f2d14d4-1bd8-46fc-9d35-1a415e5f326a',
    '64fd39fc-b32e-4b0a-8f83-4bf11b197462',
    'a77a2c65-38fe-4bf7-9ea6-0a2dc65eb21f',
    '25248cd8-2e98-4a83-9ccf-af7214fa71d6',
    '158ebecd-4596-4541-b832-a137232b7036',
    '1ccb95c3-1901-428e-b7bb-34495f41f4d2',
    '02ff1051-cd1d-4bbb-a005-21384cbff846',
    '0144b0d3-a809-46df-8c67-7ce42bdd579a',
    '35a1009d-93a2-49b1-a801-fe84d6b7a2f5',
    '50132478-c9fb-4dc5-86cd-d5dfab909393',
    '35064fc1-6c52-4005-8e99-cb0d6afd3f8c',
    '5cf859f6-990c-4b04-8609-35d5c57920f0',
    '62cfdce6-2d4d-415c-a11e-5ab60131c668',
    '2d07dd45-a263-440d-a339-9ccbab93aba8',
    'f1513955-0264-4733-bd25-3f752c61a323',
    '93e712f2-3e54-466e-aa53-57eb69c43bc0',
    '296f653a-91a8-4139-9bab-e6ae13afe99c',
    '633dc1aa-084d-43bd-9b17-bc6e57f81d48',
    '9320b3b5-3944-4bd2-913a-23b72bccd86c',
    '86ab4d3b-86ce-422b-ae6f-1ec6968a874d',
    '4124010f-7308-4831-80d7-ea14343249ab',
    '3037caeb-fa7a-4924-b399-7e4c7173b3b8',
    '146b72bc-1dcc-4e3b-bcda-d3dd25418012',
    '1d575e14-c3b1-4ead-a63b-a21c08c6a14d',
    'c5b1e333-7203-41ce-b8f7-3ef3a3bd721f',
    'bf519ea2-afe1-486a-9954-7362f10b6b60',
    '254ffffc-2bd6-4b2e-905b-a8c54c348cd0',
    'bb65d291-a673-4e4d-8a37-ab1f7401a902',
    '3a9604d7-456a-453d-a46b-40408624a07e',
    '2cbe079d-e7ab-47d8-836e-454a71440297',
    '84fad495-2756-472f-ad20-f91de6f67baf',
    '28e73469-12d4-493b-bf6f-83359c1f69c5',
    'db266afc-2f75-4b03-a3b8-c69e0ce6f713',
    'dd6866e4-8949-45bd-8910-8ce64f79e3c7',
    '12bbfa4c-c30a-4cf6-b79f-45354f842964',
    '84ac0d05-4be5-43e9-973e-ef999144d802',
    'ff8b1212-858a-4048-8f63-9464c922591a',
    'c814d754-cdc5-4b0d-8671-a39e85b2c473',
    '797b2563-5d56-4f5c-bdaf-3bfd11e8f5b3',
    '85287d84-fefe-40df-ad40-5b135ee0c07f',
    '7eeede5a-c86f-4577-9f3c-65ab618a6dee',
    'a52c04ee-cfef-46bb-9b40-6a9b292e1a7b',
    '0194eea9-d779-4957-8521-11717a378e66',
    '0d82658c-44b3-4cea-a388-3353a96a31ef',
    '77dca0d5-4d22-4415-8858-075590d25cb5',
    '1b05159b-6277-4345-9d59-f7bba5ea1d56',
    '92299ff4-c0d0-4e94-b374-75d0038cbd68',
    '60f96582-79ad-4461-9f9a-53c1bc3d17b6',
    'cc107de7-d623-464a-a875-c8b7ae5fb09d',
    'b5d7c34a-c383-4fc7-aa4d-b6dc941cd41a',
    '85baa8f8-619c-4165-9d3e-53220f645814',
    '3f278de3-f201-4344-9639-d35cd7a62adb',
    'c423b18d-12f2-43e4-97f9-993e2943270e',
    '416b8daa-9537-46db-ae7b-3f5ff5f01dc3',
    '61940344-e6c1-484e-ba10-131f43a9b13a',
    'ceee2791-0fdf-45fc-a4e8-8077916771aa',
    '5205f817-7de0-48b4-89fc-6398cf13bff7',
    '1d2f5472-ab6a-4a9b-ba53-520858cf79db',
    'f757278a-3c74-4690-bf89-5149d21ff3af',
    '5a103ab3-29c3-4d07-a0f6-4999c256cf26',
    '2a263db0-8c33-4171-840f-54bf4755a4b9',
    '9828f3fe-f676-4bf1-b600-5effa24ea9c8',
    '28849dc9-a97f-469b-b2ac-a8ff97693f02',
    '29cd0578-fb47-495a-8f48-b37325eed81a',
    'ba503d2e-48af-48bb-910a-be41790d921c',
    '472f01ad-7bc3-4fe5-9771-2695930dbc95',
    '956cd931-7077-4a08-9c75-ab8b4e5d1eb8',
    '31a42df0-29f6-4d4e-ae5c-2e13abc355f2',
    '13364604-ed08-4a61-89cd-65eb372ac8c3',
    'b724164c-712c-4615-97b7-529a108a753a',
    '22199347-9454-41e5-8912-eb38edd33a25',
    '60cadee3-9e63-4897-ac81-4fb283033648',
    'dcd4112f-09d3-43ed-8441-df9bf4c9ddc8',
    'da02c3f1-371b-4afe-9b5e-b8c584fd5907',
    'f6565f2f-4478-45ad-8c11-04dd242fc6a9',
    '275ea204-4612-4d3c-ac0d-f110f61d62ad',
    '72f73fc5-6a3a-43a0-8cce-09f4726b736c',
    'be72f1e3-b5f5-43f4-80db-6d7de93a654e',
    'b6bf4699-6f61-4c6a-9d42-ad055a0de008',
    '8abf299c-cd4e-4ce0-b5cf-4f9abe8cc891',
    '0b6eb077-2eca-4fe6-b012-26fab725b907',
    '5c659e81-e687-4710-a4fd-000ca593155d',
    '42965913-4223-484a-9b3d-abc0002d277d',
    '72e639d1-b8c5-45fd-9acd-a8e5e2b7fa0d',
    'd6823ccd-7247-4efc-8841-f53f456351ed',
    '1048a860-d5ff-4f61-95e5-851e1266d4c1',
    '8681cdcf-b775-4b56-aace-3f3e448261ef',
    'f0db3b27-c952-477d-bc33-9b96a250e168',
    '9e1a6a7d-b45e-4fd2-a1ff-df131da4c713',
    'd596ee91-481c-4eb5-9a8a-88c1e10ba9b6',
    '24470eb6-97c2-4cd4-b484-87a7d634c5b3',
    '3a781e70-cf6e-41c2-8d68-2326f16986e7',
    '048afc84-cdd2-4b39-8ea5-7351f4699761',
    '3fdcdafd-5328-418b-85f8-47b0006de468',
    '032d39fb-d278-427d-b7d2-de648a25a20c',
    '27acea14-41c9-4bf9-ad43-3ebb3ce90456',
    'ec6f49a2-176c-4564-82c5-e751baab46aa',
    '68ea655f-b4a3-43e2-95e4-f158ca2d67dd',
    '0447c960-bbfe-4e42-a95b-dd3d1d9a368e',
    'cc19d19e-6f7e-41b8-87a6-77f41d53e650',
    '16031a34-f1ba-4bde-af43-1822f1516944',
    '483d3454-54da-4243-bbeb-98cbf1d088d0',
    '6765ce2d-ebc8-4367-8855-c0f8e62cb355',
    '1b4a324a-5621-4399-85d2-f91aa03418b5',
    '09642596-d33a-4261-8bf7-eb1dbb37d572',
    '75119ed5-b8aa-4f45-bdef-e3c673bbe44c',
    '04a874df-c57b-40fc-9139-bc3a05129115',
    '2b8ad26a-e66e-4b03-a65a-5b504cecacfd',
    'a7226f10-bdba-4284-97b3-0738a5912770',
    '93b2ac60-2208-4ef8-a1c2-68a623e45807',
    'a963c15d-9c97-49e4-af95-cdee96333a76',
    'fb5d9952-ebe7-4ee6-ba00-819ed00f3593',
    '8da05494-fe7a-4af5-b257-bada143ee426',
    '8b098ab4-df02-4619-8ded-657e496695c1',
    'd48adc59-8934-41bb-9720-63e71f1933be',
    '8e88cabc-e713-44ed-a5d2-41935c3b4eb5',
    'be8cfc23-cd19-46fb-92e1-a77ac380d7aa',
    'f9224ea2-dd31-421d-80d4-f35082ef8d68',
    'e2a398ff-18c3-4258-9d75-89adb2923e88',
    'cb8ebcd0-bb5e-4a6d-bfef-5c651a1a9f6e',
    '6238f8f7-5efb-4023-8d85-ef7db9b4dad7',
    '32c09444-3d4a-44d5-af6b-07eef92189db',
    'd7686f98-05a4-45c9-af2e-3ebc524a5b2d',
    '1939b7ae-fc6b-42a8-ad5f-dc51a1682a17',
    '8ccefc59-38a5-476f-b7d3-3f98315a97f0',
    '2cda53ba-b852-47e8-8f24-59ab8e9f1d1f',
    '6e67e1e1-5c39-43da-960f-48385789c4e1',
    '92382848-f5e9-426c-b7dc-f2841ae97018',
    '4999a410-990e-484b-b4f3-d636f894a741',
    '1f534eb4-701f-4182-9895-64c5e5b52d82',
    'd01a4268-1bfe-4a2d-a2d4-e296162c406e',
    'feca4815-b44b-4b2b-8d77-75edd62ba5a6',
    '039dd3d6-0cb5-4cd1-86b3-e9579c9b5218',
    'd0ce8b95-9c3b-4f9e-8ce0-169fd89a8b20',
    '7427b2eb-a84f-413c-bfb0-7d2e36b0628f',
    '9d796a02-e2aa-4c15-b8d6-1e90cd736681',
    '28c3df75-0b08-4d5c-9feb-6e2e918572ea',
    '433e3a09-661a-46a5-96f2-dbb07bdc87f3',
    'f69c21e9-cb5f-4e72-acfe-c54b672a9f3b',
    '2ef4530a-cc36-4f32-9a1a-63a555346587',
    'e917c83d-c482-442d-81ce-869de7d20903',
    '10774229-1487-4188-b2c3-1fabcf85492a',
    '65793118-3c88-4185-9172-2354850e6056',
    'fcb03f4f-e685-4803-aadb-0e8940ff4f37',
    '46536136-08e4-4521-8e6c-67f023de020d',
    'c2f0e7cf-ac07-48f7-b5f1-497ee6c134b2',
    'b8e7fe18-9c3d-4cc0-bbc7-85b27197fc8f',
    '3abfc362-7e73-4663-9dcf-07b78b9aa2d4',
    'b60b4737-c646-4299-85a0-520890e830b7',
    '280c5d6f-39a3-4d1d-aad2-a174451cd9b2',
    '2d434f2c-6aaa-46b2-ada9-de4b887e13d3',
    'bad1fb5c-d263-48d7-8e4c-fa873a17d707',
    'e4ccd185-2b0c-445d-9c57-0dc45c8f9d7e',
    'd48db47e-acba-4377-b031-f6dfc21f3658',
    '3fd2204c-8654-4af7-832f-c186447262e0',
    'ae34e63e-13af-48b8-8b72-8137289091b3',
    'd3ed2595-b8be-40c8-b7b6-10a4997b9d2e',
    '575dc7da-58ed-407d-9e88-7b586f28bf65',
    '20ddfcd5-d456-431b-9f05-781e05d873d6',
    '15d41c35-943c-474b-afa6-e1c6d6e4be2b',
    '61803dc8-f649-43e5-ab15-d351f2cef629',
    'abe58d43-e1c7-4953-aa41-4d3b6f6cca44',
    '3ef7966a-ec1e-4dba-9d31-cdb33692e78f',
    'fa278604-7d85-4491-a30d-15c7821f8b00',
    'dd6c6688-b73a-464c-86d9-3369fdf98268',
    'b5c0bf91-9d20-41a2-9dd2-87d0ef0310f9',
    '97c636f9-0983-481f-8ff9-7b5b3ee6b10e',
    '15b153f5-ed02-4216-8f96-99743b8b4fc3',
    '747858c0-d139-4f52-9f0e-a618b880d6d6',
    '32bc49c6-7583-4613-a72f-5edb12b2a808',
    '3eb8ea77-4605-4bb7-90f9-671953abe4a2',
    '2b08cb76-061d-44c6-a00f-b43a5421df5e',
    'ab7e390a-adc5-4f9e-b317-a216a2904c93',
    'd1e6d0e4-d49e-4a16-93c6-7956b2c03414',
    '713f8676-8034-4827-bccc-cd6d95b1a4c4',
    'b00883d8-9251-435d-aefc-8a703d96d2fb',
    'eefbea02-0d65-441e-b455-35aa21d25ba3',
    'ff7e3be2-c0ac-4d97-85da-6229bf7585ac',
    '7ac92a42-e112-49c3-a8f5-8ad2c7ef5578',
    '0701aae2-8661-4eec-84e0-7c8be1c89a18',
    '7efb1905-34b4-4f1c-a8a6-8e64b3640a68',
    '11a2b088-8c1c-47d2-9c1e-455d457d2f05',
    '74608bd9-39e4-4f48-9b7c-1cd9d3c599c9',
    '7baf8e8c-de11-452d-b2e1-aad7c08cc18e',
    '23b0219d-0820-4017-b942-bda8578e90e2',
    'd7bcfc5d-e258-4bd6-a413-bb7a118e6bff',
    'a5f631ea-2b4b-43f2-9ea0-e31f2b11fa27',
    '8523489a-f57c-4993-81e4-1ed86a5c092d',
    '395da421-e6e8-4a26-ac93-eb7050a7cb1f',
    '9cb5ce25-38e8-4628-9ddf-d6aedf5efe0f',
    '615f6246-1c39-4e44-a9d4-c7133a2ae62d',
    'a647528d-925e-4c02-8825-ff54720c6ee4',
    '2c6f63b2-439e-499f-b687-b3fdd88a492e',
    '68a916af-2e0c-41bd-8535-c7eacbc2d1b7',
    '0e7f31a0-c712-4ebf-ab3a-64c37f43e52a',
    'c8b1d323-f352-482e-bf17-82075c23dcee',
    'd30f51c7-d642-4e7d-a168-967b9520a80a',
    '8d89608c-0d61-4d71-a2e3-9fbc6cda69bf',
    '71219f56-551f-4ad4-9a38-cc4aaf8a1e9a',
    '48dd6010-77dc-465b-a27c-695e29b57a5e',
    '3376a8b6-7ef6-4191-97ab-a547da0d330d',
    '21384132-1697-4e9b-b863-a6492d13285d',
    '582f5f8d-b96f-490e-b417-ba824baeb06c',
    '7e825ee6-7c03-43cc-b0a4-0d9203a30bd9',
    '2843292e-e494-4642-90e0-57e5c153f12c',
    '4ecbb7c8-0246-47f8-9654-4caca1d52565',
    '7e3ea1bd-95ba-4cad-90c8-3eec95be9cc8',
    'bbba696b-d023-4bb1-a213-c8bee31e8bae',
    '00bd45f9-beb2-4fb0-8680-bd30e392975a',
    'f85e467a-958f-4da5-a01b-8df883e69122',
    '470eee0f-2053-4d9b-9f5e-ca9661a6cc16',
    '5c1dc76d-b703-445c-9b38-cc2d00b9ab16',
    '0b25d09e-b2d9-4452-9810-1d0ef777f9d6',
    '608d793e-a78b-4872-a50c-21a9eaa60ec3',
    '74ede771-6781-4980-bfb9-5d853b7cdd6f',
    '6c47e282-5d5e-445c-b6bd-c0024946fbe0',
    '6ac178b7-a923-407f-8cd8-1733e1b2ebd5',
    'ff8ffbcf-c932-48c7-8d5e-d995d5680e21',
    '9d74b4f0-b2d4-46aa-867a-52fb6102bfdf',
    'e34f15f7-c225-4314-a638-90504bb0aa0d',
    'c1d222ab-bc0e-4e13-8379-0ee5be9e140e',
    '582187a5-ad63-4759-9162-55fa6337eb07',
    'd1e8d19a-970d-4ede-b5bc-9cab7237adec',
    'af867604-d801-41cc-9949-017eb30a0cbf',
    'c9dd3578-01db-4687-9807-4f71368941d1',
    '722e332c-fb1a-45fe-80c7-cc670f025b7f',
    '9f152896-ebf1-4756-b678-bdf739a92256',
    '478aa270-fbd4-4a45-8f63-221b4066168e',
    'e9c7ad29-2213-4648-9164-33a07bd42cdb',
    '1d140c76-a06b-42a0-bae8-b9e169ebe394',
    '5edcc3db-c676-412a-9506-600959bb81f2',
    '9f7dbe05-96b5-4b2f-9f3a-34b552e3dd21',
    'ccc524ab-d9ad-467c-a25b-9a14fb05e976',
    '9e3fb02d-dcf6-486f-a42d-89446a852057',
    '4b341ba9-49a5-43a2-9b7e-cc96beb59946',
    '15ae6390-6f6d-4fd8-9a51-ecf988676c4d',
    '3a3100bb-369e-47c1-a77c-2cacb7cf020d',
    '2c11b505-17c8-402e-8422-0239accb449d',
    'a6d7e030-e6c8-4c62-8cb5-165ef54987c4',
    'e25a8172-1e34-442c-a45d-583027a2d734',
    '0c18589c-6432-4a6c-90ce-985a47a66f39',
    'c911503c-f010-4c17-ac57-1d82e954bdc7',
    '487016d8-ea02-4b20-a45f-7382139aa865',
    '677f0bdf-6c5c-462b-8294-3666f777bbc5',
    '34da5c11-bbe8-4e55-8d89-9ef8a1c66200',
    '9a4d9d5f-72aa-4d7d-90f1-6d1181ee984c',
    'e6b15b39-daba-431f-a918-e4e43e702c30',
    'bef62e8a-5f5c-4e81-a8f8-ddeaf657b4e8',
    '128dce74-fa37-4f2f-8a80-d542edd81a11',
    '841970b7-bed0-4a75-a28a-a4cc59740a84',
    'a5f53fc8-8f9b-4e9a-af63-6f8c54d478b2',
    'f461fca1-80b2-4980-83a8-e165d49acc18',
    '37f0f1f9-83fb-49a1-9941-093c068c32d0',
    'cfb3dad7-c6d9-47c0-81b0-2133d75f5c0d',
    'c5c0893f-b254-4038-8d08-b28ef5a26b5d',
    'bbcf8529-1a04-43fc-b6cf-cb161028159d',
    '06421648-dfcb-4460-b93b-c7d6804dddbb',
    'e0b28b59-1cb5-44f4-ab8f-badf5c74f69f',
    '631deea0-2821-4d14-ad02-dc0ce4864924',
    '95788aa7-c897-4ae8-9166-4b8fc1fc5342',
    'eede320a-ed63-41d8-960d-5405a26a194f',
    '36dccf81-6932-43ae-9864-53379832d878',
    '9102024d-58c0-4bb9-aa55-12c00d98b6cd',
    '01eaf423-8cab-491a-b82e-6915dbc73594',
    '0481a135-9db1-424f-9065-a83ebd7ec995',
    'b60876c5-d825-4303-befb-ffff55b92aba',
    '49022563-1be1-4e42-a11c-01743cd5c94d',
    '64f2dbe1-6f58-493d-ab6b-c93568d828f4',
    'a9626803-72c2-4e23-968c-a090e3f22c5e',
    '095728d6-4ea1-4909-8a74-a8f3fa7f86cb',
    '8309cd89-a912-462a-90ad-f13ae0d7aa6c',
    '902596ce-714e-49b3-8271-f3dfece52309',
    'f5f29e4b-68f7-443c-b290-0827d4167fd5',
    '07c3a7f4-1e59-4dcb-a244-2fd3d084e2b0',
    '6f49717f-8f57-42d0-8548-316ecc292415',
    '7e693091-8ae4-4c40-8e66-c3b39f01b90e',
    '544f643d-b19f-4aa0-a6ec-a90e1a8681d6',
    '5137255f-0c58-4ac7-9266-bda8ab0247c2',
    '5243df74-712d-49a8-989b-528d15088e8f',
    '278a26cb-a710-4fff-928e-fc2e7084a75a',
    '58a1d168-8290-4c69-bf01-17ba3a084365',
    '9fc492f3-8d13-47ae-93e9-812c0224f1aa',
    '822d381e-cea0-45bb-8fa0-1b7194b4b64b',
    '2dc01a50-ea7b-4d9b-be57-1ffbdd98b27b',
    '5b6676dc-f46e-43a8-b87c-e431e369e53c',
    'eb35085f-0cbf-4829-a3ad-acaa53a250b5',
    '36fa2d20-622d-4cca-80b0-683672c94170',
    '44161b51-953d-4f6b-9448-5cba4a44a9d8',
    '0b90b2ea-8ca3-406a-9f69-95eddf7699ef',
    'bfb202bc-4078-4df0-82b9-9218dbc1f1a1',
    '8cc59f51-b0df-4a5d-a3c5-83ee526ff1af',
    'fda7c4b9-9f35-482b-9eff-be7f11058d94',
    '85646f4a-e424-4363-8033-1e7522e8f175',
    '8945794e-174a-49f9-a2d4-4242f9bf3833',
    '529343b4-698a-4b36-ac55-db8a6965ad3f',
    '0f949ee9-0986-42b2-af5d-0f4c8338c664',
    'cb1f06fa-b916-477d-8ab6-fb4b3f24efd3',
    '05253b3c-e8a3-4db4-8a6d-014eac7b3d94',
    '4807db90-b0f7-441d-b489-932f9b341f74',
    'c33b1f32-6021-4d1c-a4d5-fc3d501107f4',
    'aa314675-af62-41df-b5cb-3b22558e903b',
    '20741062-7d1d-44b7-bc33-39c9ad26e414',
    '69ce1be3-1815-43a4-bdd2-4696d9c8d09a',
    '76dd508c-aa80-4e54-9ac4-23b5e0545316',
    '1c6bef41-3cfa-46b2-b183-0a523e417457',
    '18716daf-4223-44a9-bba9-fc9baeef7d07',
    '475430c5-28cb-456d-9c5c-bdbfab9fafb2',
    'f0061cb3-688e-4ad4-aeb8-8614282292ec',
    '6905d8d1-da77-4f7c-86e5-3af7db2b00b4',
    '3a89c170-2939-4c12-9940-f32d96fa9e55',
    '9a06c401-da3f-41b4-b38b-238796fcae09',
    'b32d88c8-31e3-4789-a75f-e52bf1272937',
    '2a81cd6f-aa6e-436b-b4ba-68d5f713fb07',
    '5e0e8f9a-ce97-4b18-9540-3015c61e393c',
    '1c8ba244-1c7f-433a-825b-d2d34d018dcf',
    'efcdb584-7659-4780-9d6d-e6599fb0033c',
    '373ff2e8-0f63-4179-a55c-3fe0b85556aa',
    '352a503b-41eb-4a84-b257-68d70e55337e',
    '737d39b8-2f99-4eac-bcda-a03996e08939',
    '7577f264-8e84-440d-9346-7c4d5affda51',
    'febd8561-4769-4f3b-b7c0-ae7ff6ede2e9',
    'b8c5b185-8669-43d1-8ec7-c0f6d223d505',
    '31e61d00-61cc-46f2-a793-8ea8dfbb0832',
    '2355554e-8951-4b41-bcd8-32e18cddb7c9',
    'de1e7762-673f-4d44-8f45-7e693bb338b7',
    '239a484f-67c2-4ba3-a3d0-d6e4c2b27475',
    'af6c6f09-f0d2-46fe-bda0-c6fa5901c4a3',
    'c6f3bd64-ea67-488f-904f-f0bdf6320b5c',
    '166746e8-ce26-4fa1-a587-443ca9fc59a1',
    '80baf71d-28d0-4bca-81b7-49ddfadfa7a3',
    '6d18aafc-0240-499c-902e-a72a5b98ff0a',
    '263ab7c9-bd69-45dd-abb7-bbf35b9786ed',
    'e0c7877e-75d7-47d8-b5e9-5dd677d03353',
    '49a97523-0a7a-4d5a-ae20-496f86de2032',
    '583023a1-aa12-40e2-a964-8ad50ad400ba',
    'c56f0a76-2b91-4860-8dff-63c9504bb0e2',
    '17d3ffb4-e891-4ac6-a91a-fd52971c1115',
    '5203f051-7e84-4969-b4ce-eda56a859793',
    '488a38ee-f996-482d-a562-a4474f5594de',
    '680d748c-7c60-46e2-aea5-7fc557a916ea',
    '462d992a-7c13-45ac-a6da-1254fc3a9031',
    '4bc5b4eb-da91-48f7-bca0-134ed1a484a0',
    '6fd0f009-3c34-4529-9a38-c59745545490',
    '74d1e549-5ae8-4410-9428-f8f2cc85fa80',
    '868f72af-99e8-406e-9f7e-14577e6c7157',
    '73f7d2b4-86ec-4f7e-a1f9-37c7b023e3bf',
    '595b6755-e7ae-4e83-af2e-693c089aeec3',
    '4d01e12e-503e-4447-8e49-8c2b77ffb00d',
    '732eaae3-b509-4a7a-8961-09d861e55253',
    '279e5670-8a47-4992-bb10-14e6c719db97',
    'd306000b-88c1-4220-8d7e-933c0118a983',
    '703c4bc2-81bf-435a-87fa-21dc9278bad6',
    'ab76b5ca-e464-4063-b949-853f61036370',
    '672b617f-936e-440a-a735-80f94798aed1',
    '516ceb43-1378-4c02-88fc-a1d2a2258d59',
    '033fc1e1-0337-4656-bbe1-3f06fef641e9',
    'bda2bec8-a142-47ab-bfb4-83759ac2bddd',
    'e858d4f9-3385-4640-b0cb-4894e86d501c',
    '39fe0c8b-bd78-4565-9415-63eabc1d6d85',
    'da4e904f-0346-4cd3-a5c2-ba932511d98d',
    'c46c2220-da88-4f60-a0cf-eebfd0a8ff12',
    '629e31cb-dd7b-4345-abf2-fa23c6c65a09',
    'e9a57082-5a93-481a-bbd0-1acb03ac751a',
    '0faf149d-b316-4fbd-8605-a59354f0eacd',
    '1d23d3cc-5db6-4734-bfaa-507dd366d99b',
    'dd58f556-0049-49c3-9a51-d6470a2abddc',
    '53185d06-f2cc-4942-88c4-8534b559a9ff',
    '0132f320-830d-40d0-a4da-06a5d5f9e8d9',
    'bb7d6408-941a-4da6-8613-36498bc6d91b',
    'b8d11ca1-3db8-4efa-bf57-0305e004a26d',
    '9bd56ad6-080e-4d26-acca-83e4df8aa913',
    '00c11c7e-8530-4bfc-abd7-8c10f4c602d3',
    '8f6b9e20-9468-4f46-aa45-eeab9de88e53',
    '272dff18-acf3-4874-a55f-ba8fb6f80352',
    '70ac3659-06bb-4022-be55-af81d3e35b6f',
    '409b92cf-5c4d-4997-9736-ef2ea10d19e9',
    'a8636719-e26c-49b6-9a53-7d77f3d3c94b',
    'a3e81d5f-8dd6-43dd-9172-d80d212efa2d',
    'd40af129-c13f-45b2-92f0-d0e8fa5cc1c9',
    'ecd2d2f9-2b6f-4743-8d04-c9bb554a96cb',
    '9ee78822-7acd-4fab-9999-c58e9fe266ad',
]

#############################################
## Execution
#############################################

check_dataset_fk_field(dataset_id_list)


## Script to identify missing values in snapshots

In [None]:
#############################################
## Functions
#############################################

def check_snapshot_fk_field(snapshot_id_list):
    
    # Loop through and process dataset IDs
    results = []
    for snapshot_id in snapshot_id_list:
    
        # Retrieve dataset information
        logging.info(f"Processing snapshot_id = {snapshot_id}...")
        api_client = utils.refresh_tdr_api_client()
        snapshots_api = data_repo_client.SnapshotsApi(api_client=api_client)
        try:
            logging.info("Retrieving snapshot details.")
            response = snapshots_api.retrieve_snapshot(id=snapshot_id, include=["ACCESS_INFORMATION"]).to_dict()
            bq_project = response["access_information"]["big_query"]["project_id"]
            bq_dataset = response["access_information"]["big_query"]["dataset_name"]
        except Exception as e:
            error_message = f"Error retrieving snapshot details: {str(e)}"
            logging.error(error_message)
            results.append([snapshot_id, "All", "Failure", error_message])
            continue
            
        # Evaluate whether data needs to be reprocessed for the tables in question and reprocess if so 
        for table in ["anvil_donor", "anvil_biosample"]:
            
            # Evaluate whether a patch is needed
            logging.info(f"Checking whether patching is required for the {table} table.")
            patch_needed = False
            client = bigquery.Client()
            if table == "anvil_donor":
                query = """SELECT COUNT(*) AS null_cnt FROM `{project}.{dataset}.{src_table}` WHERE part_of_dataset_id IS NULL OR part_of_dataset_id = ''""".format(project=bq_project, dataset=bq_dataset, src_table = table)
            else:
                query = """SELECT COUNT(*) AS null_cnt FROM `{project}.{dataset}.{src_table}` WHERE ARRAY_LENGTH(part_of_dataset_id) = 0 OR (ARRAY_LENGTH(part_of_dataset_id) = 1 AND part_of_dataset_id[0] = '')""".format(project=bq_project, dataset=bq_dataset, src_table = table)
            try:
                df = client.query(query).result().to_dataframe()
                if df["null_cnt"].values[0] > 0:
                    patch_needed = True
            except Exception as e:
                error_message = f"BigQuery error: {str(e)}"
                results.append([snapshot_id, table, "Failure", error_message])
                continue
                
            # Patch dataset if needed
            if patch_needed:
                results.append([snapshot_id, table, "Success", "Patch Needed"])
            else:
                results.append([snapshot_id, table, "Success", "No Patch Needed"])
                
    # Display results
    logging.info("\nResults:")
    df_results = pd.DataFrame(results, columns =["Dataset ID", "Table", "Status", "Message"])
    display(df_results)


#############################################
## Input Parameters
#############################################

# List of snapshot IDs to examine
snapshot_id_list = [
    'c53121e7-5464-4b08-9a79-9c3a435d5b39',
    '2429f987-345e-43f0-aae1-a4752a925066',
    'dd292226-35c4-4416-9fa7-5ee42a9b0441',
    'a37d9def-52ca-488e-9468-8e2e211fb3d5',
    '216ba0eb-9446-4538-af47-dd15c117b56c',
    '51620360-86eb-437f-b3dc-b9abd9eef4cc',
    'd771bc68-4ac4-4ed6-abcc-8269a16c7121',
    '3b5c564e-a310-4cda-b8e6-6b68f41a6f86',
    '32252585-907e-4e7d-ab50-8bc7e5eefcba',
    '8765e5c4-2b1b-4f5b-aa20-e877fb41295d',
    'a8d89992-838e-474e-86b7-b3384ce6d6a6',
    '19c8ccd5-fc2e-4c45-984e-453994dab156',
    '127fcfd9-565f-4d05-a91a-5a508ece85bd',
    'b5b89490-cc7a-44fe-8de2-f0934819c22f',
    '10d94161-9a9a-419f-8f80-b6f6b1f03f66',
    'ca17fec7-109d-4534-b969-5e0246249196',
    'e18cd59b-cb26-426b-ade1-e4342b082a6c',
    '245805dc-d7ab-4a78-bb35-18e1635e6ba5',
    '37c0a1a2-ae3e-4573-a4b4-16c2e3228e09',
    '7609860a-bee5-41c5-b5a2-032ce367f44c',
    '66544f8c-034f-487e-a923-8418eb6c4f94',
    '944c0080-d5e3-4e9a-b418-9b59e8a8dd00',
    '5ff335b6-8bb6-4205-a8ee-e28d75f4ee4c',
    '374120e5-dff0-43dc-ae22-10d4e59e505f',
    '59cb4733-11d6-4a01-adaa-590510b6b1b8',
    'eb67973a-5186-44e9-8777-00e6471a23c7',
    'ccfec783-8b2f-4ef5-9dfc-c71a2de5b966',
    '3013571a-9ecf-4758-9e73-f6877d505d1d',
    '48390abd-821d-4af1-b563-a99b38e260da',
    '936af9f9-0421-4dc4-8646-2d7463200a06',
    '8dbf53cc-4100-4de7-b5ac-7350df65cbef',
    'fbdb8c08-0d94-4db2-9dd1-1f48ed0e72b2',
    '2cc5b3db-286a-4160-a746-e4ea7bc6d4c4',
    '99a0c351-1533-4a50-bb13-4b75923080cc',
    '5fe081df-aede-4283-9ee9-5858ad8d4d85',
    'e2736891-a569-449e-8cbf-b7d0274b64d0',
    '92e0bbb7-3bde-4382-984c-55324e415685',
    'bf6c8799-c680-4dc3-abd0-03589d98cf26',
    'ae61a5ab-7a98-446e-8520-e9198b6a039c',
    '119d4480-b12f-4939-994c-40b249cb3ce4',
    '5a8c4d8a-d0f2-4717-81bc-28263e742cd7',
    '31506ce4-f1bf-44c3-9b19-f23d065dc136',
    'e1e25d3e-cad2-43ef-96c4-e741fdc8c9a6',
    '1624cf29-2dad-4f12-8146-4e8d91ec6c81',
    'c7f980b3-1dd6-4edf-9412-7c72b89ec3c2',
    '40eeef36-5305-4ca5-8f2b-cfe163de02d8',
    'b6baee06-c290-4848-9a4e-73fd765895e3',
    '7d21a199-9b42-488f-b371-eb8c04e913e3',
    'ecd6e2db-58f2-4b6b-961b-977041c29399',
    'b4f7c49a-e0a9-48e2-9981-457e75bda3fd',
    '88276cb9-cfe6-40af-bd85-52e19dcffb8c',
    '5e050276-a987-40c6-8dc2-46124ddc1a64',
    '410561ee-02b0-4dfe-b7ec-58d5b1cca4d1',
    '681d65dd-247d-4a38-a1a3-9c62551985a2',
    'c3fbbbe9-50ce-4723-a555-72f1bbef984d',
    'aa5caf41-21e6-4e84-a046-cdb9b1bd9e62',
    'b25bf30f-7ab1-4eb7-a740-dbb637f1f0e8',
    'acf0ed61-3fa3-4fc8-bee1-edacc169d36c',
    'a20e2c68-19f1-4217-8752-c822d51c6ede',
    'c110840b-4fb7-4ac6-963d-ccac0e443191',
    '6f15ad4d-2c42-4512-9c5b-e29d3dbf60e4',
    '93097bca-33d7-469c-b12e-0a2157c9eb84',
    '4f7c04bb-3ea7-4780-ad57-e7b19fe3d851',
    '995972f7-5477-4414-84cd-4de3565f0c97',
    'f7de2c35-eb9e-4727-afe2-f5756239e64b',
    '414103f1-3cec-4e80-bfd8-046e224c5988',
    '30613a35-0844-4d31-bbe3-94902f45cf96',
    '1db0f331-6207-40b9-89b8-9cbfb4fbdad2',
    '6f1efaa0-0b77-4719-8d15-0d9afc01d91a',
    'e5b2ba63-9978-4671-b939-dc115ca3c665',
    '6789dbc7-abf0-4291-a014-ccbf323e308c',
    '02e352b1-ab59-4cda-a2ba-cf9a960552b2',
    'c25614e5-c350-4aaa-8180-32151e259d87',
    'a72f9ce4-cfcc-4701-ab9d-67df41936da8',
    '602f3f08-3067-4fda-b586-1e114a03151e',
    'e1e59839-9307-4c5f-906b-036eecc6197e',
    'f14783da-5251-4800-abbd-0dbd18b2d306',
    '3a599138-282a-4dec-9a29-40bc0885321e',
    'b32c07bc-d899-4194-ba1d-a29ec70ae0b5',
    '5b4c1063-a1de-46af-a844-ce56800548d8',
    'f105f715-7bd0-4103-9c12-e097f902fc35',
    'ac852b54-1578-4812-bd62-8544617b1c00',
    '495329a6-fc6f-4088-a7d8-afd5d3148bfd',
    '30c2f5b3-80d2-4b96-846e-3774160c0417',
    '16c066f7-d2c1-4920-966c-c545e9d1d114',
    'f68344f4-4aee-4ce5-aa8f-44cd24e934e2',
    '4e711c00-75bb-4b1f-a4ee-ec8e47e2b9af',
    '0dfe1df2-8139-4fba-9afb-bd47d1a2fbf6',
    '7300865c-2f5f-412d-a40c-0eef523a1738',
    '6287a9aa-0556-4ef3-89b4-e9a16da6f71c',
    'f8e05104-0369-49f8-8469-0b49d3de1ebd',
    '9ebfd2a3-aeba-4aa4-a38f-6fbbf794c7af',
    '6babae16-eb39-4fcf-8bcb-5d4896fc2cd3',
    '269300d9-c82c-4fbc-be11-f27cc7a010bf',
    'e1560527-6e20-447c-835c-44b10fa20b79',
    '19ba7e89-3d13-4f19-a7a7-04ad93185b44',
    '02b4a9fc-4e4d-4977-be04-977ea8f88176',
    '492d02fd-2194-4c1d-a888-e665a068b35f',
    '273be8c5-6303-47a6-8b33-57f65cc88840',
    'bea23c53-b3c0-488a-a997-52ab2fe38f01',
    '74bb77d8-e245-4305-8c3f-65385d331fb9',
    '2b2d0eb2-6d1f-4072-ab28-082ec1b054e6',
    'e0d46f3a-5872-4190-ae94-8eddca9d65d0',
    '07d82c74-91a0-4eaa-87e0-a6f055d9a5c6',
    '341f76ba-c06c-4e58-a9d5-7e9f740d621d',
    '80969539-637f-4a71-bdfa-fdaf414cf8b4',
    '9a747c42-9058-4fcf-9fda-7b355e42d7b3',
    'cd3d7010-c63c-467f-b585-abe1b3da4e48',
    'd761e8f3-45e7-4d2d-99b9-462ace937e68',
    'bfadebce-def1-4e3e-97e1-0b768188db02',
    'c138efe7-9400-4ad6-b23d-287d06ab2179',
    '95dc1d22-9bff-4363-bddb-b29c266b4e28',
    '67211908-3193-4d4f-9966-a5de8548b4d0',
    'ff1d59ff-dc35-482a-933a-e9d9a1eb6a20',
    '6c6b260b-0a17-4cfa-ad5a-8cc5a75c2188',
    '363888df-5cb4-4c07-9218-06938d219c2e',
    '835c7254-7b6d-4db2-9f91-c3a5261304af',
    '461d7216-0d2c-4349-90eb-9a8e5db4d3c3',
    '4e682923-5e29-48e4-a3ff-76d86c08cf8d',
    'f8404885-5a20-4c14-a75c-5711262868fb',
    'dbd5c82f-0e81-4d4d-9f29-a34a2404fbbe',
    '6d95d827-c9b5-4296-9b90-15dc646bb00d',
    '3e4ebe7c-b5d4-4239-95da-03da7d8dedd3',
    '3cce8ea2-297c-4097-9c13-c3f1cadad921',
    '9986e29b-1b35-439d-83c9-2120679e1860',
    '12add555-dbc1-45a5-a5c4-d3d9a172759e',
    'd197888e-7be6-4bf0-b33a-0919236481b2',
    '3633eeb2-b317-4d17-9daa-4a5ca479c05b',
    'e28d0ba2-5523-4b40-b34e-0dd80653dd0a',
    'b378d487-7de4-41b1-aca9-050c6e5deef9',
    '57b52802-5caf-4611-aa0a-7371dd11d221',
    '455c8618-fe26-4424-87ae-42b1fbaeb9d9',
    '7a8e14f1-ffec-47d1-ae87-fdbb8267d427',
    '82b5cf36-94e7-408a-a4d3-db797a0ffbe9',
    'ce3b35d7-9805-401c-a36e-d118d8fa4a1e',
    '236ff37d-335c-4561-80f6-4ffa8bd88b2f',
    '0b31081f-1bce-490a-bd0a-b1aa0fd0daf6',
    'b6d3176b-525a-417a-bda0-def9611bf08a',
    '024cbb4f-c989-4ba4-b33d-a53790e3d6d7',
    '76ec3691-30f3-43cd-af8b-e73c80da90b9',
    '5a42e770-ed61-4a5b-b43c-cbf3a4744733',
    '62a4b183-9157-4320-96e6-32f79c561399',
    '1d676097-4e9e-4c5a-8ee1-63d865054897',
    '553ba443-b8cc-4d8e-9743-e384116a1236',
    '3fc2937c-dc08-400f-9458-3779de623bd0',
    'bfab39d1-1a38-4884-a139-be2809378e7b',
    '7f1126cd-3dcb-4b7d-8d32-dfad3c43e9e2',
    'a1df0d3b-4871-4371-8418-58a302719e6e',
    '99a1ace0-aa83-4d9d-9e9c-e9b6b0111ba2',
    '99f9f7aa-535c-44cb-b6f9-df8f479eb80a',
    'b5c0b07a-5653-4912-944f-80069cbf8360',
    '982af1af-7098-4274-ac33-9bb7b78d5880',
    '601f2346-7a1a-4b09-865f-764fb92e60a9',
    '4487c14a-49e0-4185-b3d8-4a40f5fa9721',
    'ee5ca91d-01d8-43a9-a571-16b1390109b1',
    '77590745-7d3d-4499-bb46-9a433d76a1cb',
    '8bbe3019-638a-4497-96b0-fc9fbd48ce6b',
    'a56397ee-005b-411d-9129-eb978f035de7',
    'e10e798c-2262-4a40-9942-c9dcfcc97aca',
    '1674e6c7-f3de-4cea-985d-a8c5b520f8e7',
    '6eda136b-5141-4cc7-a752-a757e06eb179',
    'ea14aac8-3c44-4f09-acdd-34d22a0169a2',
    'ba2414b4-2854-4801-9719-6ee91971aba3',
    '9c3e6a87-ac6b-4f3a-bd86-c2e0ce9051e9',
    'dc9ed67a-62da-48a4-89eb-61d86474659b',
    '8fa1f61c-a63f-432c-a623-dc939316d482',
    '3f5ef236-6b19-4e9f-9fb5-30dc8d9f6be6',
    'ba05015c-d9a5-40ea-9b9f-4ccb608a1233',
    '4017f0b6-5228-442c-82e6-eb449a6d9804',
    '441b79d9-6142-44d9-9aa9-05d4d03bc118',
    '30387b65-4b3c-4f9a-9f15-f57f30ff76ef',
    '0f37bf7e-230a-4482-8342-ed5e7333026d',
    'bd725fc0-94f0-48c4-952c-d4752e950b47',
    '9658265c-56f0-4fa4-bf6e-c64ea225d7a2',
    'f0d50b6b-f225-4989-8aa1-a9e9a441070d',
    'aa6b58c2-6eb3-4b4d-9e73-89cbb323ee26',
    'b0595994-56fc-45ee-bd2b-f497231dd79b',
    'feb70203-2abc-41ab-bfaa-3bf7cfa12606',
    '3fdc6bde-1142-41bf-a16b-76b68051837b',
    '08e33fe7-4757-4737-b0e0-227207f98ff5',
    'd42322cb-d761-4dad-8814-a96555164400',
    'cf30be0a-1a09-4a01-9a8a-35474a92aa71',
    '14e2cc61-7a85-400b-bdfd-729c7950592c',
    'b6f69c83-d069-417b-a6e3-b3fb5fdd21a6',
    '83cfe90a-6c9e-44ca-aae4-16ed7f78554a',
    '3877f3c9-bd2f-4f86-b97b-a5bec85f9f3c',
    'f183e8af-8728-4ad0-bac0-fb68a7eb9bef',
    'ad4ed62b-bf63-4dff-ab94-70a6432c161c',
    '83a1eee3-0395-4916-a62b-a37b24d9ca78',
    'c195ed64-842d-4525-8a1c-9083eccaafa7',
    '84103748-39eb-45f9-b4ca-e23a9e52d0cc',
    '96cfd9ec-5eb4-48f5-9284-84f349701033',
    '8f104c7f-8b5a-489e-95b4-616130405e7c',
    'b300b5ae-6ca3-4350-bc46-345173f6faba',
    'cbc80926-dd3e-4ff8-8d8f-77078f260c7e',
    'a2bb366f-029d-4bef-8da7-dce818743881',
    '7bc891a2-a634-4cf2-b41e-0b1e98fce599',
    'f2f0d8a3-6e18-436a-bc51-ff742d30d6a4',
    '2b2a1f74-9c2e-4c6c-840b-80f466d1e209',
    '9dbac1be-a33c-419c-be92-d1a5452c1292',
    '3442e75a-7452-4680-9ce5-70fa21363083',
    '79509151-b96a-483f-a6b3-eeede54467d1',
    '40d6feec-e6f7-42f1-8e74-a3404e1f9208',
    'c2f86bff-92c2-4c35-a5ec-f284bfc934b9',
    '887dd90c-6742-4283-92b2-bc9ed6bc2ae1',
    '95b4c57b-8e88-45f5-9dbb-e2575f4b2a68',
    'aff867bd-ac09-4dbc-9031-a145fcc36a5c',
    'ec292668-bc78-45f7-b601-8d452b038e6c',
    'c5da9730-1af8-4944-9dc8-273f6c845731',
    '80b8af9b-d54e-447a-9a53-e1b1c12b7e55',
    '432c6422-ceaf-4c6c-bd8d-7c90771a284f',
    'c53cc8ed-7b5f-4c7d-ba7f-c3520856c082',
    '613a8d12-7ac3-4d9d-996a-0f1bd2318b69',
    '991b8415-06bf-4527-9753-0345b32cc4b0',
    'f033bb12-7aef-4b7e-86bb-448b8e9f1c58',
    '8d055301-47e0-4384-9746-8bc1b93d9a96',
    '6f1d6a31-1997-4b59-a311-f84631ebdcbf',
    '138c04b9-bf59-45c9-89d9-630fe606074e',
    '23dbf4be-b4c1-492a-b754-941626d03c53',
    '47cf45d3-8054-4e53-b569-ecb7d47d72b7',
    '4574e2f2-832b-430d-9558-f9ea6088cbe6',
    '2aee2dfa-a819-4beb-b8a0-c07d5d577470',
    '514bbe9f-0ffc-47a6-b25e-fe01fe26b720',
    '79c20af6-5788-47ce-9651-f6a6ae084cbc',
    'a504feea-036f-4627-83c8-4cbb0e42da65',
    'f5626dd5-b0f3-4c59-b7e1-e7fce6488419',
    'a51570ab-b0f7-4f30-bdad-ef25e5a6e9a9',
    '72f7d3a3-8c70-46cb-93eb-f258a5577fd8',
    '88b16321-7f0a-44b1-8131-d4b2188d9839',
    'ed24c069-fde1-443a-8bf2-77ec8b4e86dd',
    '58226893-e7d5-4ea3-9195-d512c70dacf9',
    '12d5b6d6-0942-4759-978f-768c92b9f2dd',
    '1623f347-3bb0-40cd-a9c6-207d0278025f',
    'b9992098-e09c-44f4-b091-b290b12dfc10',
    '5dcb42e6-3702-4764-a3be-2829f704f176',
    '30092c80-3b91-4433-ae6b-8085b0a19a5b',
    'e1e5a2c3-e046-4483-ad39-909980026783',
    '5a8aef0b-e101-4b9e-8cc5-da005295e42a',
    '9345adce-2f83-4c02-8859-72ddccb22069',
    '22331a2a-42d9-47e8-a6a9-ff6fb3e71ee5',
    '15d118c9-4954-41ef-920a-bbce759e5ed0',
    '75f5452d-ceca-402e-bfc4-759c8352f4da',
    'cdcfc6ac-6c9f-4d99-a8c3-4d1e5d171261',
    'cb787d4a-8f56-4e79-a0da-2e4281e30362',
    'ae5f32ce-35fe-498a-944c-b6b647570a93',
    'e1e6f206-0466-41ae-8603-edc210f2d448',
    'bf8681db-72bb-45c2-a9ca-64d418464ae2',
    'bffe9c59-8da4-407c-ab88-f7a394d24476',
    'c4ed93d6-b3f0-4f3f-844f-07d90366b64d',
    '461c1b26-7306-4feb-b141-f83c209baf27',
    '36e807b4-3e10-41fe-a92b-21fa352648e6',
    '2b40f81a-71ca-496f-bada-e18aad990793',
    'a3665f39-21c3-4016-8bbe-e5dd96df6af0',
    '0c90a04d-719f-46cc-bfaa-8d209ff49dcd',
    'a3b32c40-6c50-49c8-8961-24abeffff0d5',
    'd72bad95-9750-4802-8d25-05f5bd943519',
    'f39bb497-1ac2-4017-b8c4-3ca223d57b61',
    '13569ac7-87e5-40d7-8acf-0574a18d1b09',
    'ccb2ab9f-a03c-40ef-a6c9-13fcac52e619',
    '824afdf1-50d9-462f-9f09-db5a1f646bd8',
    'b9314197-1618-4dd7-8441-38dfb1490389',
    '761e172c-f530-4154-b5b6-a1c52b0530e6',
    'e1c34b81-2435-4c12-87d7-3f995cfd4a0a',
    'cab35bdd-4b15-4836-8470-b922d5761602',
    '9e2f0ab6-f964-4aa8-a83c-894d716d55ce',
    'ecef49e8-2fa4-4507-ada8-2c8d9ad39417',
    '218f6eb5-a71f-4e2f-bc6d-ed6df248422a',
    '253e2b36-1674-482b-bfbd-4e0b05cdfe63',
    '3f53e841-ca9d-4b55-b390-590718533561',
    '6b129887-63b8-41a9-aa5e-0ed83755c58f',
    '01cf2450-604b-43e5-9f4e-9ec4e0bf0a61',
    '85b0b351-cd0a-4efe-95a4-e39273c42831',
    '74cb5b41-63ac-45b3-a9cc-18fcbbaccb3b',
    'dd2b61fb-d420-4a38-9cd2-8464f51d7617',
    'c0c0d1fe-9d5d-4b84-911c-e74f34b2edb6',
    'd7b2b2c6-72fd-4084-af34-a86edfe3ac47',
    'd63a63ce-24c8-413a-89c0-4bd4c82370c0',
    'c9d30b32-ae82-475e-a8bc-d88e0c489aee',
    '132e9681-5ee5-4b17-99b9-e444aa3eb658',
    '5925e258-4864-4c0b-9b17-03f25a02ef4c',
    '8aab2cf0-95fb-4b67-8da2-1c0c20be63c8',
    '376ff43a-6a26-4d95-b4c3-72c63d387349',
    '7075a479-b7a4-44fd-a140-8431e438b193',
    'feb8ba77-9077-448a-b8fd-0e1549ee70e4',
    '6bcb6897-c907-46aa-95d0-bfa417e27e45',
    '8d07bc3b-e043-49d4-b802-9f174fb98e77',
    'a9df5cdd-c041-4e74-9ee6-85d744b8fc6b',
    'a0b58558-82ff-4de2-835f-5f0af4ca29ec',
    '7d08223e-7dcf-4eac-9c67-9e4a619ff783',
    '3aec0ff3-cd28-4aa9-a47a-d9ab047285c9',
    '0c40923c-e4eb-45b8-9183-137e24eebc35',
    'fe3b687b-0462-4a02-941b-92ac61a65bb4',
    '3a15ecc8-d81d-4b11-9163-d94d63657000',
    '84ecc954-e50d-418e-aa92-94a933dc4bb0',
    'a7d82508-b20b-4190-96e6-210cd63bfbbf',
    '562943e3-55be-4df4-8c8b-8eade170f5f3',
    '660e4508-8f0d-4325-bace-40a19b2d53ea',
    'f4db72e3-3320-4bcd-8dc4-b3d864992fcc',
    '48f8e9c7-c4b6-40cc-88cb-67e999fd4947',
    '32077f3d-92ca-4d47-a6d5-2e06f1c3036f',
    '77863e88-84f0-46d8-ab38-ed8fca7d3a4e',
    '7c2974a1-7dfe-477d-bc2e-20c5a6a08643',
    '84ad1513-ac8f-45ca-98f8-e239a2eb09f8',
    '80a0e698-3d7e-43ce-a80a-2f7571d027ff',
    '6a242848-a716-4de9-ab38-3c82983810a8',
    'c48c956a-1ede-4c6e-805d-46754dc58126',
    'b116216f-8ee1-4058-b40e-0b33b0928107',
    'aeee7408-eb4e-42a9-956f-bb61759f2f55',
    'a209c033-3364-4867-8fab-36ad15c77185',
    '27e7471a-2187-4209-b912-05cb7913cb82',
    'b0d76691-39f9-4fea-99e5-b6af5ab8b51a',
    '63b4bfba-706d-422c-8764-6cf6872ea616',
    '5157b8d6-89f5-42dc-8504-ebe5d4cda170',
    'f3e644af-d04a-4bf1-8dc2-f2932e98ac89',
    '775b8b5f-9e9c-477f-a97d-a4307343b28a',
    'd85c9132-dc6d-4035-914d-58147f210411',
    '63e21508-bc22-4bb1-8eac-4712d234c282',
    '1b66db3a-1ef6-4f05-b8a3-b0765fa9407d',
    '1fda8281-dc94-4a01-b492-0cf385b51f26',
    '7fffbc86-4ac1-4fb4-8e47-13b83706a6bd',
    '61620208-9698-4605-9f9a-3d6e734f2fb4',
    'ab1a4ab5-b26c-433e-8c79-418c80cddff0',
    'b24fe2ae-a0a2-4ac7-ad2a-f810a0b88a9b',
    'bb70de9b-a893-476b-b698-b1ee228831ee',
    '20ed15ad-6c34-4858-b73e-8fcf5119c70c',
    '7c10751a-3cb4-4a42-9c7f-44ed3a524157',
    'eaf0c529-923d-4977-88dc-c6c7022c560a',
    'ad6fafdf-91f1-4865-9a20-c8c749bef3f2',
    '45f66040-a698-492b-b59f-3047cc23394b',
    'c5e72ae7-4cb0-4455-9c5f-36d1dd2f3fba',
    '8b23004c-3207-4eb9-a0e9-cbbb3f8a807d',
    'eee54af2-6806-4a40-b76d-f4e222dccd8a',
    '7433f821-ef51-4bcd-b2a8-e0b7fc96eaa1',
    '4d0f0ef9-b6c5-4fcf-9fe8-60730ceb3cea',
    '10e9ac52-d181-4ceb-a1a3-550dd2b5bb8b',
    '50516407-200c-4eca-9412-3606417f8f23',
    'e0c6dfc2-6f4b-4ee8-96cf-aaaf7344d077',
    '0b685347-bb00-4713-8d71-e140c2be626a',
    'f376d6e2-1f38-484f-901f-afa80a261f07',
    '03f91de4-db9a-4d90-81c9-010f197e382a',
    'fe54953d-b1e0-401f-b9ff-91f1125f8fbf',
    'ad3dcca6-30bc-4f4a-ac73-af7552040611',
    '189c67e2-afd6-4087-bdd7-f2960b03f021',
    '50992782-4372-43bc-8b49-8b58e0a65e47',
    'a0ff171a-5825-40df-853b-3a969f3d7700',
    'aacb6053-ea9c-44e8-bdb9-0b86b68161fa',
    '4f9520a4-19b4-4860-83f6-f1dee915abc2',
    '39d5e826-b3e6-4fd2-8c74-56a3b68f92fa',
    'd7c8a22b-8b66-4825-b3bb-ff58fc00d294',
    '81680a18-cfe4-49a5-9545-2ad4d78e217d',
    'b5b8a461-912e-4a82-80b0-48a08fc4cc21',
    'a89808ff-de6b-496c-8204-2cd120f8a40a',
    '3096ba31-3ff4-4e2c-8bfb-c2940c3900af',
    '9786fd6d-3b6e-42f7-a889-cd9d51fe0670',
    '0ab0e90b-55d6-487e-b3e0-0e9694faeaf8',
    '1bccd783-15a8-4191-b08f-b8a5556bfd52',
    'fb356cb8-55b9-4e14-94c4-7bff0cc8a08d',
    '3380d1a3-7202-41bb-9d08-0958a712225f',
    'ec365580-d587-4f7e-92ef-6e3dc77eb2ed',
    '9716da3d-4523-492d-8d1a-b91009b66713',
    'c4c8a9a9-97bd-41a4-9070-9fff6e9712e8',
    '79df9b36-7fb1-4eb6-8a19-2c4859a50b41',
    '622d78ed-c06e-48ed-ba23-acb9d9e9fb71',
    '6a29d4f2-66ec-4b1e-bd72-d6af0d53247b',
    '45cdfb71-f149-4910-89cb-446111ba741f',
    '76517186-09b3-4389-b7e6-faacf466d5a2',
    'f0c0e5de-05a9-45b0-85cc-64b71d5d983a',
    '4ca93f73-cf6c-47b9-854c-8465e79c7fc8',
    'cb5807f7-2c8c-49cc-9b42-7a4b93d6cd0c',
    '909e2c8a-8a58-4dc2-862c-7f90164bf4eb',
    '00311bcc-e88b-4172-b1bc-4c2bf1a2eca6',
    '8efb7fd2-1395-40f6-83fa-4ed459a3370e',
    '5fc28947-e4eb-4e06-87f4-179712351a0d',
    '9fafd3ab-cc80-495d-bbef-8a360df839b6',
    'da67c6ce-2b68-4302-a556-f895dc2669c3',
    'd008b876-11a2-4788-a032-5e9a3aed0635',
    '197f4b5b-3bd9-4452-8f5f-cecf79cceb46',
    '60bd03ac-7bc6-4c91-b5ee-d8b81d378531',
    '6b77df14-4a2c-4325-9d75-9741080c4c90',
    'efaea9d6-0c9c-41aa-b739-600a431e8f58',
    'b07711f6-f0a8-46ff-a574-53ca8b1c407f',
    'd5155531-bb05-4e63-88dd-a00cfba70144',
    '9412de7c-e015-4944-bff4-a6ab7dfc26b5',
    '4f01acb5-f851-4af2-976a-fbb6f2e22883',
    'e9546f48-85b9-45d0-b734-5f9b78510b09',
    '73f0ac44-748b-4b94-9e53-5b5eb3507ffa',
    '188f3bd7-e9af-4f17-b4ab-5d77c297dcb1',
    'b8a6edf5-3636-47ab-94f6-d948f2d14571',
    '5107a92c-8fea-4589-a367-1a66b16e440e',
    '7b00e1cf-e811-40af-9f01-2b6682b1c44d',
    'ff8029a8-9ed8-42a9-8cc9-1970faec129d',
    'e0c37053-3e7b-492e-ab61-4dc0689482d4',
    '4d6f1e45-d7f3-4522-a3ae-6fb3056bb237',
    'bab31567-c06d-4fc5-a95f-7146c890ea42',
    '10bda5a6-720e-44e4-9fea-f3dc7fcb430e',
    'f73959f2-8d1c-4998-bae4-85551e2ca445',
    'db1d3440-5cf3-4e67-b232-2aa8b7237eff',
    '9c55b651-7379-4221-afe8-f1fe0d8c11b1',
    '6fdea8c7-69d9-466e-9fa2-aca30722ff68',
    '6a92c922-83b3-4acb-9b9d-36eeea3a7f0b',
    '45254a52-1157-43d6-a218-e6965bf2f6d2',
    'e66e025f-e07c-4f0d-93ed-3ac609b570d5',
    'e5677706-5065-46d4-a519-d358c0b267c1',
    '8a75c087-e043-4364-adb3-9cb6d58fc3b4',
    '6a477149-a7f0-4758-8570-b288a8314fbd',
    '07b0243c-48fc-4eee-a338-c7571cc2df1a',
    'e04d1bfd-39ee-4e2d-bbc6-04c6398a6410',
    '457179a0-4bec-42c7-a4ee-a73116d16c31',
    'a13d48e4-be63-415d-9a88-6df2d8ce0c9a',
    'c930f337-5a82-48e4-b403-9615f0e4f951',
    'bbe983b9-6c26-460a-951b-28f048d148a7',
    '8a64c8f5-aa30-4cfd-aa62-d7cb4ba2b41b',
    '94f79040-68f5-4801-bf41-6f29bc0be8c6',
    '965c7172-46ff-4f12-8a8e-a17bf5bd4780',
    '5d6cc84c-f03a-485a-8f90-1b44c1fa55c9',
    '9656570b-c0c8-41af-a759-6ac3786e498c',
    'f3436925-4cd6-4409-bd6a-9f0714ff6f86',
    '47336244-8514-4842-b3fc-f9500dd12cd8',
    '0cbcaf9e-0d62-48c6-a0d8-69b9d78b50ed',
    '9272d799-00d2-48f9-b3fc-48c73b85a4f9',
    '62af84d8-f0f3-4b4a-b3a0-71262132b9e6',
    '2a1375fc-a976-4327-829f-d0d0f6155cc5',
    'bceb34d5-a22c-460c-94c1-14e3c14ad467',
    '190e0383-54b2-491e-8f5a-81167f6e1770',
    '6bcd4128-b24d-4448-8717-eb7519364147',
    '00002787-81de-47e4-a7a9-7da09bc95592',
    '9868d80a-ef8b-4dc7-bfeb-0c9dc488739b',
    'c98486d7-107c-49b7-a5d1-d36da16b3f66',
    '470dc943-2c98-4466-a6ab-0c134a4189fb',
    '6c50e533-41ab-4c09-9845-fe006b40ac3d',
    '57eb0f97-fc7b-49c3-821e-11c763ee6a94',
    '2302effc-1f0d-4618-a360-543e1892a549',
    'c13b2c4e-e5da-4384-a479-803dbbf3acc5',
    '533ba93b-506e-4547-9174-037a6b17835d',
    '768fd1b9-8785-44d3-bff3-353657ef1174',
    '310d443d-dd36-4884-843d-6d93596034a6',
    'd9f8c794-3d10-4150-8c82-31f6542777fb',
    '943f6d78-c341-4c95-8cf1-497293fa5d02',
    '918bd1b7-11e8-400d-8906-fabf3b30b7f9',
    '82703bd5-1ee1-4c9a-920c-824437b91dbf',
    'e36630d4-e842-4fcd-9ef0-b763df1ae0c1',
    '8f341ba5-a191-4899-b62e-68c50434b43a',
    'b35c33d8-19ce-4454-8a49-c5d403b3852f',
    '2e0c00b1-c743-48af-93a4-0599efb714f2',
    'fc24235e-8c3e-448b-88c1-ad607f1dea52',
    '67d9333e-874a-45ad-b550-d1489263a23c',
    '37e96cd2-487b-4969-a4f7-ea0fed69505a',
    'dddf6096-97fa-4387-9b2e-9ccc1f447eeb',
    'df6894ed-1854-49ae-9097-c3e5527b9174',
    '41aa42bf-c3ba-4ff4-8ec4-f4519edeeea4',
    '224b66ae-5714-40d3-ba64-14caca2ef232',
    '56078c29-a393-4c60-9e04-3674e02fe729',
    'cf26bb77-6a92-4e0d-9da6-b1f92087137a',
    'a35fc432-b9ba-4633-bef7-4e317ff34df5',
    'ab71d294-4ba9-44d4-8051-913b3d5ccff3',
    'c9e51094-9991-4946-b6a8-6cd19c399173',
    'ac0cdd08-47f4-4776-bd70-8bb512c6563e',
    'b9134038-96b5-434b-8456-963caac4c6db',
    'd3047f14-4202-4986-9daa-673a578eebcc',
    '57457a3b-ed1e-4d48-8585-ef8a4b053c64',
    '0b3dd699-d4d3-4295-8ec2-502e6c41d8d7',
    '2b781259-3ff4-44fa-bba1-e2b674548e6c',
    '8fda9c6e-3e5d-474a-99d2-07eeae12f768',
    '651049a2-2950-4be3-b755-6d133233a010',
    '16f19aec-a9df-4ca8-84fb-b892e9a40ea8',
    'ca358d94-47cb-4aa1-8565-9d4280f286fb',
    '6c69e870-8def-432d-8fbe-dc0da610635e',
    '53fd76c8-6745-414e-adbe-62ff72011fc5',
    'f6db6471-03c5-44b6-a463-4976d8fc6350',
    'a6c392e5-cbef-469d-a151-4f54c73b5fb3',
    '6441b9e0-ca7b-4ab4-b7e7-9c7c7041ebaa',
    'ab9e5f9a-a829-4a63-bb72-d3cdb2d02ecf',
    '2fdba9a4-6593-439a-a7fc-c3a5825c26cd',
    'ae0c27d6-c8e3-4dd5-abf5-06e5f39fc4a0',
    '18a28450-31ec-4e4a-a305-dbbdd226ae3c',
    'f7d225d9-1675-483d-a1eb-9ef750301cd4',
    'b13297c4-bb9a-4222-b069-9efcdc9d7ac3',
    '49e0dc54-7254-43c5-849e-7b1434638f73',
    'c753046a-cf9b-4813-be68-cb3b9dd9866e',
    '19cf6f7c-07ce-410a-9332-ebb4e3237a70',
    'cb5a6268-c0c8-433d-b62c-7beeeb0a6a92',
    'ad18153b-870c-491a-9d4e-df30d902a03f',
    '76ee61e9-ca73-4f0b-8c7d-26f7d0a0a383',
    '197bda16-0ad5-4085-bba8-11bdc038efc6',
    'd092a983-18a0-4e5f-8d56-d374a6bb0b90',
    '100ce437-e8b4-42ec-a5e7-49b5318f3adf',
    '15728ae8-aa66-49cf-bffb-300c50f9c88e',
    '32b50544-21d8-4a8c-af88-7a4f134001f0',
    '98527f0d-e774-48f7-bc0c-409a07f2f540',
    '1985e363-b6da-47ec-8c92-dabcd587e6b6',
    '389f685b-f727-41ad-adf5-c72365223ab6',
    '031a89ca-ed61-407b-91f6-a07092b48214',
    'bcc52739-926b-4a37-ac9e-23d60400770f',
    '79a135da-2f10-4dc7-8424-f49dad0cf24c',
    '1c4ff086-8435-4572-a6af-898b73852711',
    '0a0e5e3d-042f-4e3a-8dcd-c5ad0a7209b3',
    '162ab47c-4d0d-478f-ab46-976f73b77359',
    '37374406-0c9c-4b94-bdb5-d4f9daf3b335',
    '09027102-ca0f-44d1-94cd-3ebd6af379ec',
    '808e4748-b080-4989-89ba-003a2b8b76bf',
    '0f6db24a-05d6-46fc-9ff6-795e29d10ca5',
    'f4accfc6-d9e4-49b1-a590-6a580b4d305f',
    'f7867764-967a-4c61-a680-3c5741340bf3',
    'f885a740-5559-45ec-a05f-5f43fc6d2cd7',
    'c4990f27-a5d9-4fe0-9ab6-d579d358699d',
    '9efd748c-ad09-4765-b645-1b6ef6b5d402',
    'c48b26c3-a7aa-45ad-829c-1967ddd41be2',
    '4f51b794-aaf3-4553-9d5c-509dc1e9e8f4',
    '79502d0c-bc1c-4d51-a6de-eb0334b3b660',
    'e556da44-6d46-4cf2-8dfd-07fa62ff3bc4',
    '0565b2e4-ade1-46e7-80bf-ca647a89a8b8',
    'ceb17697-5bd1-4ada-8201-cf875be1b8dd',
    '09eb47e8-1683-485b-84ae-9cef53ca6981',
    '0c742f04-7723-49b4-8b5b-290856e508c3',
    'dfbe2ba6-7f48-4309-989b-0c65e5cb2788',
    '0af0d35e-1f9a-464d-80fe-474b5dbbd914',
    '8902fe1d-a7a9-4046-9a35-244475e113fd',
    '00d1d1e0-4b46-4af1-ba91-4f15f23d55cb',
    '66945b11-520c-4a1e-b76c-e09e36cb7a02',
    '66b2f4d2-ecea-4eee-9868-8e8c41d76efa',
    'ea08adf0-2383-41ae-a91a-88c7b8f6f42b',
    'f90f565e-0ade-4750-a308-5c8e1677b43d',
    '194c4b14-cb6a-469c-83db-d37f7ec65f29',
    '33c73ae8-f829-438d-bdb1-da0be8f3773f',
    '3d6afb8e-dbcd-4972-8281-ae546b23356c',
    '4019997c-8d8e-4e21-8caa-26458c743b24',
    '08f28ada-3fa1-41f3-a7eb-5b4ff8325145',
    '8d157b6b-8f13-4bbf-9b88-e1fbf6844749',
    'c24bf8dd-fa9c-4e4e-98ed-83a1713f3276',
    '6b21e796-e4bd-410c-990f-31698edd7275',
    '18651608-70f6-4725-8084-aa51833367a9',
    'ad73394e-f797-4a85-aaff-b69a9a1700e0',
    '8d68226a-47c7-4c25-a1a3-95dca2b6cc1a',
    'ca3fb362-a24e-4c79-a84c-b61f60542a38',
    '7cd8067f-67b2-4934-9d07-4da82109f9e4',
]

#############################################
## Execution
#############################################

check_snapshot_fk_field(snapshot_id_list)


## Script to patch dataset

In [None]:
#############################################
## Functions
#############################################

def check_and_patch_dataset_fk_field(dataset_id_list):
    
    # Loop through and process dataset IDs
    results = []
    for dataset_id in dataset_id_list:
    
        # Retrieve dataset information
        logging.info(f"Processing dataset_id = {dataset_id}...")
        api_client = utils.refresh_tdr_api_client()
        datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
        try:
            logging.info("Retrieving dataset details.")
            response = datasets_api.retrieve_dataset(id=dataset_id, include=["ACCESS_INFORMATION"]).to_dict()
            bq_project = response["access_information"]["big_query"]["project_id"]
            bq_dataset = response["access_information"]["big_query"]["dataset_name"]
        except Exception as e:
            error_message = f"Error retrieving dataset details: {str(e)}"
            logging.error(error_message)
            results.append([dataset_id, "All", "Failure", error_message])
            continue
            
        # Evaluate whether data needs to be reprocessed for the tables in question and reprocess if so 
        for table in ["anvil_donor", "anvil_biosample"]:
            
            # Evaluate whether a patch is needed
            logging.info(f"Checking whether patching is required for the {table} table.")
            patch_needed = False
            client = bigquery.Client()
            if table == "anvil_donor":
                query = """SELECT COUNT(*) AS null_cnt FROM `{project}.{dataset}.{src_table}` WHERE part_of_dataset_id IS NULL""".format(project=bq_project, dataset=bq_dataset, src_table = table)
            else:
                query = """SELECT COUNT(*) AS null_cnt FROM `{project}.{dataset}.{src_table}` WHERE ARRAY_LENGTH(part_of_dataset_id) = 0""".format(project=bq_project, dataset=bq_dataset, src_table = table)
            try:
                df = client.query(query).result().to_dataframe()
                if df["null_cnt"].values[0] > 0:
                    patch_needed = True
            except Exception as e:
                error_message = f"BigQuery error: {str(e)}"
                results.append([dataset_id, table, "Failure", error_message])
                continue
                
            # Patch dataset if needed
            if patch_needed:
                logging.info(f"Patching {table} table.")
                
                # Reprocess table to populate missing values
                client = bigquery.Client()
                target_file = f"{table}.json"
                destination_dir = f"ingest_pipeline/output/transformed/anvil/{dataset_id}/table_data"
                if table == "anvil_donor":
                    query = """SELECT * EXCEPT(part_of_dataset_id), MAX(part_of_dataset_id) OVER (ORDER BY part_of_dataset_id DESC NULLS LAST) AS part_of_dataset_id 
                                FROM `{project}.{dataset}.{src_table}`""".format(project=bq_project, dataset=bq_dataset, src_table = table)
                else:
                    query = """WITH dataset_id
                                AS
                                (
                                  SELECT MAX(ARRAY_TO_STRING(part_of_dataset_id, "")) AS id
                                  FROM `{project}.{dataset}.{src_table}` 
                                )
                                SELECT * EXCEPT(part_of_dataset_id), [(SELECT MAX(id) FROM dataset_id)] AS part_of_dataset_id
                                FROM `{project}.{dataset}.{src_table}`""".format(project=bq_project, dataset=bq_dataset, src_table = table)
                logging.info("Creating updated table data.")
                try:
                    df = client.query(query).result().to_dataframe()
                    records_json = df.to_json(orient='records') 
                    records_list = json.loads(records_json)
                    records_cnt = len(records_list)
                    with open(target_file, 'w') as outfile:
                        for idx, val in enumerate(records_list):
                            json.dump(val, outfile)
                            if idx < (records_cnt - 1):
                                outfile.write('\n')
                    !gsutil cp $target_file $ws_bucket/$destination_dir/ 2> stdout
                    !rm $target_file
                    logging.info(f"Successfully created new {table}.json file.")
                except Exception as e:
                    error_message = f"Error creating new json file. Exiting function. Error: {str(e)}"
                    logging.error(error_message)
                    results.append([dataset_id, table, "Failure", error_message])
                    continue
            
                # Ingest updated anvil_donor data
                logging.info("Submitting ingest request for updated data.")
                source_full_file_path = "{}/{}/{}".format(ws_bucket, destination_dir, target_file)
                ingest_request = {
                    "table": table,
                    "profile_id": "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61",
                    "ignore_unknown_values": True,
                    "resolve_existing_files": True,
                    "updateStrategy": "replace",
                    "format": "json",
                    "load_tag": "Ingest for {}".format(dataset_id),
                    "path": source_full_file_path
                }
                attempt_counter = 0
                while True:
                    try:
                        api_client = utils.refresh_tdr_api_client()
                        datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
                        ingest_request_result, job_id = utils.wait_for_tdr_job(datasets_api.ingest_dataset(id=dataset_id, ingest=ingest_request))
                        logging.info("Ingest succeeded: {}".format(str(ingest_request_result)[0:1000]))
                        results.append([dataset_id, table, "Success", "Records Patched"])
                        break
                    except Exception as e:
                        logging.error("Error on Dataset Ingest: {}".format(str(e)))
                        attempt_counter += 1
                        if attempt_counter < 2:
                            logging.info("Retrying Dataset Ingest (attempt #{})...".format(str(attempt_counter)))
                            sleep(10)
                            continue
                        else:
                            logging.error("Maximum number of retries exceeded. Exiting function.")
                            results.append([dataset_id, table, "Failure", str(e)])
                            break
            else:
                logging.info("No patching required!")
                results.append([dataset_id, table, "Success", "No Patch Needed"])
                
    # Display results
    logging.info("\nResults:")
    df_results = pd.DataFrame(results, columns =["Dataset ID", "Table", "Status", "Message"])
    display(df_results)


#############################################
## Input Parameters
#############################################

# List of dataset IDs to examine and patch if necessary
dataset_id_list = [
    'f9224ea2-dd31-421d-80d4-f35082ef8d68',
]

#############################################
## Execution
#############################################

check_and_patch_dataset_fk_field(dataset_id_list)


# Patch Dataset Properties

## Patch Source Workspaces

In [None]:
# Dataset-workspace list
dataset_ws_list = [
    ['9a32e23e-840d-4ba3-8cd9-392f48b8e9d2', 'AnVIL_CCDG_Baylor_CVD_HemStroke_GOCHA_DS_WGS'],
    ['5069fc2c-b957-4130-adca-6eabae943867', 'AnVIL_CCDG_Baylor_CVD_HemStroke_WashU_DS_WGS'],
    ['1939b7ae-fc6b-42a8-ad5f-dc51a1682a17', 'AnVIL_CCDG_Broad_CVD_AF_Darbar_UIC_Cases_Arrays'],
    ['4e99b8e1-40b9-4fb2-90a0-d85e926ef31e', 'AnVIL_CCDG_Broad_CVD_AF_Darbar_UIC_Cases_WES'],
    ['2cda53ba-b852-47e8-8f24-59ab8e9f1d1f', 'AnVIL_CCDG_Broad_CVD_AF_Darbar_UIC_Controls_Arrays'],
    ['128332b6-5060-4ec4-b6a6-f53b54a810be', 'AnVIL_CCDG_Broad_CVD_AF_Darbar_UIC_Controls_WES'],
    ['06f05f58-3c83-4f5c-bddd-bed7d2d1d147', 'AnVIL_CCDG_Broad_CVD_AF_EAST_WES'],
    ['41cb9f29-4ba6-4690-821c-cb085e6b0f2f', 'AnVIL_CCDG_Broad_CVD_AF_Figtree_BioHeart_WES'],
    ['9d796a02-e2aa-4c15-b8d6-1e90cd736681', 'AnVIL_CCDG_Broad_CVD_AF_Natale_TCAI_Arrays'],
    ['7ea006d9-1e19-4678-b2e6-d4a1ea327f74', 'AnVIL_CCDG_Broad_CVD_AF_Natale_TCAI_WES'],
    ['433e3a09-661a-46a5-96f2-dbb07bdc87f3', 'AnVIL_CCDG_Broad_CVD_AF_Olesen_Arrays'],
    ['34fd3b22-ac73-47d2-8849-5877158ec072', 'AnVIL_CCDG_Broad_CVD_AF_Olesen_WES'],
    ['a08dc7a6-f8ce-4205-95d2-83f614c2c32f', 'AnVIL_CCDG_Broad_CVD_AF_PEGASUS_HMB'],
    ['7ce3270e-b2f2-47f4-a288-639751b2f87f', 'AnVIL_CCDG_Broad_CVD_AF_Roberts_UWO_WES'],
    ['fcb03f4f-e685-4803-aadb-0e8940ff4f37', 'AnVIL_CCDG_Broad_CVD_AF_TMDU_Cases_Arrays'],
    ['41d12dc1-8718-4439-b409-26cc23573107', 'AnVIL_CCDG_Broad_CVD_AF_TMDU_Cases_WES'],
    ['c2f0e7cf-ac07-48f7-b5f1-497ee6c134b2', 'AnVIL_CCDG_Broad_CVD_AF_TMDU_Controls_Arrays'],
    ['c4c49fcd-0c20-4cff-841a-cb58f5689c5b', 'AnVIL_CCDG_Broad_CVD_AF_TMDU_Controls_WES'],
    ['9ee2a552-89f8-4a48-9c94-9fa26ebb7483', 'AnVIL_CCDG_Broad_CVD_AFib_Duke_WGS'],
    ['425412ba-894a-4824-acb8-bf18fe4576e0', 'AnVIL_CCDG_Broad_CVD_AFib_GENAF_WGS'],
    ['f22bd762-5c45-453e-bf22-b174514abb84', 'AnVIL_CCDG_Broad_CVD_AFib_Intermountain_WGS'],
    ['0ee62643-b064-42f8-9b09-5d10eacd70a3', 'AnVIL_CCDG_Broad_CVD_AFib_JHU_WGS'],
    ['c37b388c-7107-43d6-bee6-4e82b40ed271', 'AnVIL_CCDG_Broad_CVD_AFib_MPP_WGS'],
    ['bf6f1d78-6a0d-4afb-aea6-17a3c34340db', 'AnVIL_CCDG_Broad_CVD_AFib_Penn_WGS'],
    ['719f7581-21db-4aec-8c46-4a5811832710', 'AnVIL_CCDG_Broad_CVD_EOCAD_PROMIS_WGS'],
    ['15be288e-53e1-41cb-8d20-8ea87efb9258', 'AnVIL_CCDG_Broad_MI_ATVB_DS_CVD_WES'],
    ['8b8185d3-ba5c-4832-af23-3ff8ca6ed016', 'AnVIL_CCDG_Broad_MI_UnivUtah_DS_CVD_WES'],
    ['140797da-dc94-4fc2-8b0b-f2e1dec7bd43', 'AnVIL_CCDG_Broad_NP_Autism_State-Sanders_WGS'],
    ['8de6dae2-55ff-4287-9b75-5b2a950c1f44', 'AnVIL_CCDG_Broad_NP_Epilepsy_AUSALF_HMB_IRB_GSRS_GSA-MD'],
    ['d3ed2595-b8be-40c8-b7b6-10a4997b9d2e', 'AnVIL_CCDG_Broad_NP_Epilepsy_AUSRMB_DS-EAED-MDS-NPU-IRB_GSA-MD'],
    ['61803dc8-f649-43e5-ab15-d351f2cef629', 'AnVIL_CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_MDS_NPU_GSA-MD'],
    ['abe58d43-e1c7-4953-aa41-4d3b6f6cca44', 'AnVIL_CCDG_Broad_NP_Epilepsy_AUTMUV_DS_NS_NPU_ADLT_GSA-MD'],
    ['395da421-e6e8-4a26-ac93-eb7050a7cb1f', 'AnVIL_CCDG_Broad_NP_Epilepsy_GHAKNT_GRU_GSA-MD'],
    ['615f6246-1c39-4e44-a9d4-c7133a2ae62d', 'AnVIL_CCDG_Broad_NP_Epilepsy_HKOSB_GRU_GSA-MD'],
    ['21384132-1697-4e9b-b863-a6492d13285d', 'AnVIL_CCDG_Broad_NP_Epilepsy_KENKIL_GRU_GSA-MD'],
    ['b7fb531e-25a4-427c-9679-b7bdc3d03535', 'AnVIL_CCDG_Broad_NP_Epilepsy_TWNCGM_HMB-NPU-ADULTS_WES'],
    ['608d793e-a78b-4872-a50c-21a9eaa60ec3', 'AnVIL_CCDG_Broad_NP_Epilepsy_USACCF_HMB-MDS_GSA-MD'],
    ['af867604-d801-41cc-9949-017eb30a0cbf', 'AnVIL_CCDG_Broad_NP_Epilepsy_USALCH_HMB_MDS_GSA-MD'],
    ['722e332c-fb1a-45fe-80c7-cc670f025b7f', 'AnVIL_CCDG_Broad_NP_Epilepsy_USAMGH_HMB_MDS_GSA-MD'],
    ['1d140c76-a06b-42a0-bae8-b9e169ebe394', 'AnVIL_CCDG_Broad_NP_Epilepsy_USAMON_HMB_NPU_MDS_GSA-MD'],
    ['3615e063-f24b-47f7-87cb-430e8aca8d0c', 'AnVIL_CCDG_Broad_NP_Epilepsy_USAUPN_GRU_NPU_WES'],
    ['e642bca0-52fb-4ab3-ab3a-acaab83deda7', 'AnVIL_CCDG_Broad_NP_Epilepsy_USAUPN_GRU_WES'],
    ['9ecc231f-e3d3-4417-a98a-c4db4c638161', 'AnVIL_CCDG_Broad_NP_Epilepsy_USAVANcontrols_HMB-GSO_WES'],
    ['c911503c-f010-4c17-ac57-1d82e954bdc7', 'AnVIL_CCDG_Broad_NP_Epilepsy_ZAFAGN_DS-EPI-COMO-MDS_GSA-MD'],
    ['3fb2d04a-d18b-4bdc-9372-99b992f2ae42', 'AnVIL_CCDG_Broad_NP_Epilepsy_ZAFAGN_DS-EPI-COMO-MDS_WES'],
    ['a3ae33bb-8b3a-47e5-a2d1-a49c954776b3', 'AnVIL_CCDG_NYGC_NP_Autism_HMCA_WGS'],
    ['0e65b131-fd14-4fce-908b-c5b89a71a9c1', 'AnVIL_CCDG_NYGC_NP_Autism_TASC_WGS'],
    ['d56ae233-d6d2-483c-917e-1de0fe1cfeb7', 'AnVIL_CCDG_TOPMED_Broad_CVD_EOCAD_PROMIS_WGS'],
    ['655e6a61-5400-4d8a-95bc-1506e026b289', 'AnVIL_CCDG_WashU_AI_T1D_T1DGC_WGS'],
    ['1f2d14d4-1bd8-46fc-9d35-1a415e5f326a', 'AnVIL_CCDG_WashU_CVD-NP-AI_Controls_VCControls_WGS'],
    ['64fd39fc-b32e-4b0a-8f83-4bf11b197462', 'AnVIL_CCDG_WashU_CVD_Brazil-CVD_WGS'],
    ['158ebecd-4596-4541-b832-a137232b7036', 'AnVIL_CCDG_WashU_CVD_EOCAD_BioMe_WGS'],
    ['1ccb95c3-1901-428e-b7bb-34495f41f4d2', 'AnVIL_CCDG_WashU_CVD_EOCAD_BioVu_WGS'],
    ['02ff1051-cd1d-4bbb-a005-21384cbff846', 'AnVIL_CCDG_WashU_CVD_EOCAD_Cleveland_WGS'],
    ['0144b0d3-a809-46df-8c67-7ce42bdd579a', 'AnVIL_CCDG_WashU_CVD_EOCAD_Duke_WGS'],
    ['35a1009d-93a2-49b1-a801-fe84d6b7a2f5', 'AnVIL_CCDG_WashU_CVD_EOCAD_Emerge_WGS'],
    ['50132478-c9fb-4dc5-86cd-d5dfab909393', 'AnVIL_CCDG_WashU_CVD_EOCAD_Emory_WGS'],
    ['35064fc1-6c52-4005-8e99-cb0d6afd3f8c', 'AnVIL_CCDG_WashU_CVD_EOCAD_Finland-CHD_WGS'],
    ['62cfdce6-2d4d-415c-a11e-5ab60131c668', 'AnVIL_CCDG_WashU_CVD_EOCAD_METSIM_WGS'],
    ['c5c0893f-b254-4038-8d08-b28ef5a26b5d', 'AnVIL_CMG_Broad_Brain_Engle_WGS'],
    ['b60876c5-d825-4303-befb-ffff55b92aba', 'AnVIL_CMG_Broad_Heart_Ware_WES'],
]

# Loop through and process
for entry in dataset_ws_list:
    # Pull dataset details
    dataset_id = entry[0]
    workspace_name = entry[1]
    logging.info(f"Processing dataset_id = {dataset_id}...")
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        logging.info("Retrieving dataset details.")
        dataset_details = datasets_api.retrieve_dataset(id=dataset_id, include=["PROPERTIES"]).to_dict()
        current_properties = dataset_details["properties"]
    except Exception as e:
        logging.error(f"Error retrieving dataset details: {str(e)}")
    
    # Update current properties and patch dataset
    if current_properties["source_workspaces"] != workspace_name:
        current_properties["source_workspaces"] = [workspace_name]
        try:
            logging.info("Patching dataset.")
            resp = datasets_api.patch_dataset(id=dataset_id, dataset_patch_request_model={"properties": current_properties})
        except Exception as e:
            logging.error("Error on Dataset Patch: {}".format(str(e)))



## Patch Consent Codes

In [None]:
# Dataset-consent list
dataset_consent_list = [
    ['ff8b1212-858a-4048-8f63-9464c922591a', 'DS-RARED'],
    ['0194eea9-d779-4957-8521-11717a378e66', 'DS-ASD-MDS'],
    ['0d82658c-44b3-4cea-a388-3353a96a31ef', 'DS-ASD-MDS'],
    ['b5d7c34a-c383-4fc7-aa4d-b6dc941cd41a', 'DS-ASD-NPU-MDS'],
    ['ceee2791-0fdf-45fc-a4e8-8077916771aa', 'HMB-MDS'],
    ['2a263db0-8c33-4171-840f-54bf4755a4b9', 'DS-NDEVRD'],
    ['6c9423a2-3ea7-4c3c-9b12-0cc993bc095f', 'DS-ASD'],
    ['52e015b5-22b7-4a96-9f0a-ea3afccbfcbc', 'GRU'],
    ['0b0a52bb-a1a2-4638-9259-4447761c2da4', 'DS-AASD'],
    ['e16adabb-88e0-4739-983a-98ac5c181842', 'DS-ASD-RD-IRB'],
    ['ae50ef98-ef3d-4427-b094-83b2d90787a0', 'DS-AUT'],
    ['2d434f2c-6aaa-46b2-ada9-de4b887e13d3', 'DS-MULTIPLE_DISEASES-IRB-COL-NPU-RD'],
    ['3725b660-1106-4173-9c4b-0a15926becf5', 'HMB-MDS'],
    ['02ff1051-cd1d-4bbb-a005-21384cbff846', 'DS-CAD-IRB'],
    ['516ceb43-1378-4c02-88fc-a1d2a2258d59', 'GRU-NPU'],
    ['28849dc9-a97f-469b-b2ac-a8ff97693f02', 'DS-IBD'],
    ['c46c2220-da88-4f60-a0cf-eebfd0a8ff12', 'DS-DSDI-MDS'],
    ['29cd0578-fb47-495a-8f48-b37325eed81a', 'DS-DSDI-MDS'],
    ['0faf149d-b316-4fbd-8605-a59354f0eacd', 'DS-GR-IRB-MDS'],
    ['472f01ad-7bc3-4fe5-9771-2695930dbc95', 'DS-GR-IRB-MDS'],
    ['00c11c7e-8530-4bfc-abd7-8c10f4c602d3', 'DS-GID'],
    ['f6565f2f-4478-45ad-8c11-04dd242fc6a9', 'DS-GID'],
    ['8abf299c-cd4e-4ce0-b5cf-4f9abe8cc891', 'HMB'],
    ['d40af129-c13f-45b2-92f0-d0e8fa5cc1c9', 'DS-GID'],
    ['0b6eb077-2eca-4fe6-b012-26fab725b907', 'DS-GID'],
    ['42965913-4223-484a-9b3d-abc0002d277d', 'DS-IBD'],
    ['d6823ccd-7247-4efc-8841-f53f456351ed', 'HMB'],
    ['8b8185d3-ba5c-4832-af23-3ff8ca6ed016', 'DS-CVD-MDS'],
    ['9828f3fe-f676-4bf1-b600-5effa24ea9c8', 'GRU-NPU'],
    ['ec97fa0f-e174-40fe-a6b8-ee240bdf4318', 'DS-EPSBA-MDS-RD'],
    ['5488d7c1-5195-4ebc-b0f0-31033fa06dc9', 'DS-EPSBAID-MDS-RD'],
    ['56f9888f-e623-4a1a-b2b4-46378a6cd6fe', 'DS-EPSBACID-MDS-RD'],
    ['7593c1c2-3680-4bf5-8a65-dce5f96a3b59', 'DS-EPCOM-MDS-RD'],
    ['267cf516-dd33-4640-a71a-78bd8f5db9d8', 'DS-CARNEU-MDS'],
    ['df06ff22-6a2d-4934-aac9-c8368efbea1a', 'DS-EPASM-MDS-RD'],
    ['b9842819-5fd6-40c5-9668-aae1ea44a308', 'DS-EPI-ADULT-NPU-MDS'],
    ['7c056125-3ed8-459c-b73e-edfa3f80cc27', 'DS-NEUROLOGY-MDS'],
    ['85dbde76-c130-40b2-8a8a-ba815ba499da', 'DS-EP-MDS'],
    ['54c6fa73-9b84-4a3b-9e97-e4e43165c48b', 'DS-EPSBACID-NPU-MDS-RD'],
    ['f3c88c3c-8e1b-4af9-9467-0621404e314c', 'DS-NEUROLOGY-ADULTS-NPU'],
    ['332bb145-6ef1-40ef-932c-aec5bb6210d9', 'DS-EPI-MULTI-MDS'],
    ['3c2c39a9-4cc2-4f7c-89e0-054a871e2c4e', 'DS-NEUROLOGY-MDS'],
    ['e03eb011-05f9-4491-b779-0cc2aefabff1', 'DS-EPASM-MDS'],
    ['ccc524ab-d9ad-467c-a25b-9a14fb05e976', 'DS-EP-MDS'],
    ['4b341ba9-49a5-43a2-9b7e-cc96beb59946', 'DS-NEURO-EP-MDS'],
    ['e922a496-e686-4fa1-911d-2159ceb0f09f', 'HMB-NPU-MDS'],
    ['3fb2d04a-d18b-4bdc-9372-99b992f2ae42', 'DS-EP-MDS'],
    ['cc107de7-d623-464a-a875-c8b7ae5fb09d', 'DS-MBND-MDS'],
    ['416b8daa-9537-46db-ae7b-3f5ff5f01dc3', 'DS-ASD-NPU'],
    ['f757278a-3c74-4690-bf89-5149d21ff3af', 'DS-MHNR-NPU-MDS'],
    ['797b2563-5d56-4f5c-bdaf-3bfd11e8f5b3', 'DS-ASD-MDS'],
    ['a52c04ee-cfef-46bb-9b40-6a9b292e1a7b', 'HMB-NPU'],
    ['6d18aafc-0240-499c-902e-a72a5b98ff0a', 'DS-MSC-MDS'],
    ['dbb4df81-9115-45d1-b51d-875e0669edc4', 'DS-SZ-MDS'],
]
show_properties = False

# Loop through and process
for entry in dataset_consent_list:
    # Pull dataset details
    dataset_id = entry[0]
    consent_name = entry[1]
    logging.info(f"Processing dataset_id = {dataset_id}...")
    api_client = utils.refresh_tdr_api_client()
    datasets_api = data_repo_client.DatasetsApi(api_client=api_client)
    try:
        logging.info("Retrieving dataset details.")
        dataset_details = datasets_api.retrieve_dataset(id=dataset_id, include=["PROPERTIES"]).to_dict()
        current_properties = dataset_details["properties"]
    except Exception as e:
        logging.error(f"Error retrieving dataset details: {str(e)}")
    
    # Update current properties and patch dataset
    if show_properties:
        print("Current Properties:")
        print(current_properties)
    if current_properties["consent_name"] != consent_name:
        current_properties["consent_name"] = consent_name
        if show_properties:
            print("New Properties:")
            print(current_properties) 
        try:
            logging.info("Patching dataset.")
            resp = datasets_api.patch_dataset(id=dataset_id, dataset_patch_request_model={"properties": current_properties})
        except Exception as e:
            logging.error("Error on Dataset Patch: {}".format(str(e)))
