In [1]:
N_JOBS = 30
BLOCK_SIZE = 1024

LD_BM_OUTPUT_DIR = 'gs://ukb_data/phenomexcan/ld/bm/'
LD_TSV_OUTPUT_DIR = 'gs://ukb_data/phenomexcan/ld/tsv/'

N_SAMPLE = 50000
N_SAMPLE_SEED = 0
SAMPLE_PREFIX = f'{int(N_SAMPLE / 1000)}k'
SAMPLES_SAMPLED_OUTPUT = f'gs://ukb_data/phenomexcan/samples/{SAMPLE_PREFIX}/samples_{SAMPLE_PREFIX}_neale_eids.ht'
display(SAMPLES_SAMPLED_OUTPUT)

'gs://ukb_data/phenomexcan/samples/50k/samples_50k_neale_eids.ht'

In [2]:
import re
import os
import time
import argparse
import subprocess
from multiprocessing.dummy import Pool as ThreadPool

from google.cloud import storage

In [3]:
import hail as hl
hl.init(tmp_dir='gs://ukb_data/tmp/', min_block_size=BLOCK_SIZE)

Running on Apache Spark version 2.4.3
SparkUI available at http://ukb-m.c.ukb-im.internal:4040
Welcome to
     __  __     <>__
    / /_/ /__  __/ /
   / __  / _ `/ / /
  /_/ /_/\_,_/_/_/   version 0.2.22-597b3bd86135
LOGGING: writing to /home/hail/hail-20190916-1400-0.2.22-597b3bd86135.log


In [4]:
def list_blobs(bucket_name, prefix='', suffix='.*', delimiter=None):
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)

    blobs = bucket.list_blobs(prefix=prefix, delimiter=delimiter)

    return [f'gs://{bucket_name}/{b.name}' for b in blobs if re.search(suffix, b.name)]

In [5]:
list_blobs('ukb_data', prefix='phenomexcan/regions_data/chr1_', suffix='.tsv', delimiter='/')[:5]

['gs://ukb_data/phenomexcan/regions_data/chr1_region0_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr1_region100_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr1_region101_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr1_region102_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr1_region103_variants.tsv']

# Function to read variants in region

In [6]:
def _read_variants_in_region(region_file):
    variants_region = (
        hl.import_table(region_file)
    )
    
    return variants_region.annotate(v=hl.parse_variant(variants_region.variant)).key_by('v')

In [7]:
# Testing

# variants_region = _read_variants_in_region('gs://ukb_data/phenomexcan/regions_data/chr22_region1676_variants.tsv')

# display(variants_region.show(5))

# n_variants_region = variants_region.count()
# print(n_variants_region)

# Function to read genotypes

In [8]:
def _read_bgen(chr_num, variants_region, samples_to_keep=None, sample_file='gs://ukb_data/genotypes/ukb19526_imp_chr1_v3_s487395.sample'):
    bgen_file = f'gs://ukb_data/genotypes/v3/ukb_imp_chr{chr_num}_v3.bgen'
    
    bgen = hl.import_bgen(
        path=bgen_file,
        entry_fields=['GT'], #['GT', 'GP', 'dosage'],
        sample_file=sample_file,
        variants=variants_region.v,
    )
    
    if samples_to_keep is None:
        return bgen

    return bgen.semi_join_cols(samples_to_keep)

In [9]:
# Testing

# bgen = _read_bgen(22, variants_region)
# display(bgen.describe())

# n_bgen = bgen.count()
# print(n_bgen)

# display(bgen.show(5))
# display(bgen.row.show(5))
# display(bgen.col.show(5))

# Compute LD

In [10]:
from concurrent.futures import ThreadPoolExecutor, as_completed

In [11]:
def _get_variants_id(variants_data):
    def _get_variantid(x):
        return f'{x["locus.contig"]}:{x["locus.position"]}:{x["alleles"][0]}:{x["alleles"][1]}'
    
    _tmp = variants_data.rows().to_pandas()
    _tmp = _tmp.assign(variantid=_tmp.apply(_get_variantid, axis=1))
    return _tmp['variantid'].tolist()

def _get_info_from_variant_file(variant_file):
    info = {}
    split = variant_file.split('/')[-1].split('_')
    info['chr'] = split[0]
    info['region'] = split[1]
    info['chr_num'] = int(split[0].split('chr')[1])
    
    return info

In [12]:
def compute_ld_and_save(variants_data, bm_out, tsv_out):
    bgen_ld = hl.row_correlation(variants_data.GT.n_alt_alleles())
    bgen_ld = bgen_ld.sparsify_triangle()
    bgen_ld.write(bm_out, overwrite=True, force_row_major=True)
    
    hl.linalg.BlockMatrix.export(
        path_in=bm_out,
        path_out=tsv_out,
        delimiter='\t',
        entries='strict_upper',
        parallel=None,#'header_per_shard',
        add_index=False,
        header='\t'.join(_get_variants_id(variants_data)),
    )
    
def compute_ld_for_region(variant_file, samples, bm_out_dir, tsv_out_dir):
    region_info = _get_info_from_variant_file(variant_file)
    
    var_data = _read_variants_in_region(variant_file)
    bgen = _read_bgen(chr_num=region_info['chr_num'], variants_region=var_data, samples_to_keep=samples)
    print(f'Region "{region_info["region"]}" count: {bgen.count()}')
    
    bm_file = os.path.join(bm_out_dir, f'{region_info["chr"]}_{region_info["region"]}.bm')
    tsv_file = os.path.join(tsv_out_dir, f'{region_info["chr"]}_{region_info["region"]}.tsv.bgz')
    compute_ld_and_save(bgen, bm_file, tsv_file)
    return True

In [13]:
def compute_ld_complete(variant_files, samples, bm_out_dir, tsv_out_dir, n_jobs=1):
    if n_jobs == -1:
        n_jobs = 10
    
    with ThreadPoolExecutor(max_workers=n_jobs) as executor:
        tasks = {}
        for var_file in variant_files:
            future = executor.submit(
                compute_ld_for_region,
                var_file,
                samples,
                bm_out_dir,
                tsv_out_dir,
            )
            tasks[future] = var_file
        
        # wait for tasks to finish
        for future in as_completed(tasks):
            var_file = tasks[future]
            try:
                result = future.result()
            except Exception as ex:
                print(f'Task on {var_file} finished with an exception: {ex}')
            else:
                var_file_info = _get_info_from_variant_file(var_file)
                print(f'Region completed: {var_file_info["region"]}')

# Run

In [77]:
SELECTED_CHR = 22

In [78]:
variant_files = list_blobs('ukb_data', prefix=f'phenomexcan/regions_data/chr{SELECTED_CHR}_', suffix='.tsv', delimiter='/')
display(len(variant_files))
display(variant_files[:5])

24

['gs://ukb_data/phenomexcan/regions_data/chr22_region1676_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr22_region1677_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr22_region1678_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr22_region1679_variants.tsv',
 'gs://ukb_data/phenomexcan/regions_data/chr22_region1680_variants.tsv']

In [79]:
# use this list when running for a subset of regions
# variant_files = [
#     'gs://ukb_data/phenomexcan/regions_data/chr15_region1389_variants.tsv',
#     'gs://ukb_data/phenomexcan/regions_data/chr15_region1401_variants.tsv',
#     'gs://ukb_data/phenomexcan/regions_data/chr15_region1400_variants.tsv',
#     'gs://ukb_data/phenomexcan/regions_data/chr15_region1415_variants.tsv',
# ]

In [80]:
# read downsampled samples
samples_downsampled = hl.read_table(SAMPLES_SAMPLED_OUTPUT)
n_samples = samples_downsampled.count()
print(n_samples)

50581


In [81]:
print(N_JOBS)

30


In [82]:
%%time
compute_ld_complete(variant_files, samples_downsampled, n_jobs=N_JOBS,
                   bm_out_dir=LD_BM_OUTPUT_DIR,
                   tsv_out_dir=LD_TSV_OUTPUT_DIR)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (type not specified)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (type not specified)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (type not specified)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (type not specified)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (type not specified)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (type not specified)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (type not specified)

2019-09-16 18:11:08 Hail: INFO: Reading table with no type imputation

Region "region1693" count: (3885, 50581)
Region "region1696" count: (4426, 50581)
Region "region1683" count: (4402, 50581)
Region "region1688" count: (4521, 50581)
Region "region1691" count: (3021, 50581)
Region "region1676" count: (2114, 50581)
Region "region1684" count: (4951, 50581)
Region "region1687" count: (6870, 50581)
Region "region1677" count: (2383, 50581)
Region "region1694" count: (5070, 50581)
Region "region1699" count: (5284, 50581)
Region "region1689" count: (6735, 50581)
Region "region1690" count: (4852, 50581)
Region "region1697" count: (5146, 50581)
Region "region1678" count: (4962, 50581)
Region "region1685" count: (4935, 50581)
Region "region1686" count: (3790, 50581)
Region "region1698" count: (5885, 50581)
Region "region1681" count: (3898, 50581)
Region "region1695" count: (6082, 50581)
Region "region1679" count: (5144, 50581)
Region "region1692" count: (4370, 50581)
Region "region1682" count: (6068, 50581)
Region "region1680" count: (5491, 50581)


2019-09-16 18:19:45 Hail: INFO: Wrote all 13 blocks of 2383 x 50581 matrix with block size 4096.
2019-09-16 18:19:50 Hail: INFO: Wrote all 26 blocks of 4521 x 50581 matrix with block size 4096.
2019-09-16 18:19:51 Hail: INFO: Wrote all 13 blocks of 2114 x 50581 matrix with block size 4096.
2019-09-16 18:19:59 Hail: INFO: Wrote all 13 blocks of 3021 x 50581 matrix with block size 4096.
2019-09-16 18:20:14 Hail: INFO: wrote matrix with 2114 rows and 2114 columns as 1 block of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1676.bm
2019-09-16 18:20:15 Hail: INFO: wrote matrix with 2383 rows and 2383 columns as 1 block of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1677.bm
2019-09-16 18:20:35 Hail: INFO: Wrote all 26 blocks of 4426 x 50581 matrix with block size 4096.
2019-09-16 18:20:36 Hail: INFO: merging 1 files totalling 18.8M...
2019-09-16 18:20:37 Hail: INFO: Wrote all 26 blocks of 4951 x 50581 matrix with block size 4096.
2019-09-16 18:20:37 Hail: INFO: while w

Region completed: region1676


2019-09-16 18:20:40 Hail: INFO: wrote matrix with 3021 rows and 3021 columns as 1 block of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1691.bm
2019-09-16 18:20:42 Hail: INFO: merging 1 files totalling 24.1M...
2019-09-16 18:20:43 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1677.tsv.bgz
  merge time: 1.101s


Region completed: region1677


2019-09-16 18:20:47 Hail: INFO: Wrote all 26 blocks of 6735 x 50581 matrix with block size 4096.
2019-09-16 18:20:57 Hail: INFO: merging 1 files totalling 40.3M...
2019-09-16 18:20:58 Hail: INFO: Wrote all 26 blocks of 4402 x 50581 matrix with block size 4096.
2019-09-16 18:20:59 Hail: INFO: wrote matrix with 4521 rows and 4521 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1688.bm
2019-09-16 18:20:59 Hail: INFO: Wrote all 26 blocks of 4852 x 50581 matrix with block size 4096.
2019-09-16 18:21:00 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1691.tsv.bgz
  merge time: 2.488s


Region completed: region1691


2019-09-16 18:21:02 Hail: INFO: Wrote all 13 blocks of 3790 x 50581 matrix with block size 4096.
2019-09-16 18:21:13 Hail: INFO: Wrote all 13 blocks of 3885 x 50581 matrix with block size 4096.
2019-09-16 18:21:18 Hail: INFO: Wrote all 26 blocks of 5146 x 50581 matrix with block size 4096.
2019-09-16 18:21:23 Hail: INFO: Wrote all 26 blocks of 5284 x 50581 matrix with block size 4096.
2019-09-16 18:21:32 Hail: INFO: merging 2 files totalling 80.0M...
2019-09-16 18:21:35 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1688.tsv.bgz
  merge time: 2.930s


Region completed: region1688


2019-09-16 18:21:41 Hail: INFO: Wrote all 26 blocks of 4370 x 50581 matrix with block size 4096.
2019-09-16 18:21:43 Hail: INFO: Wrote all 26 blocks of 5070 x 50581 matrix with block size 4096.
2019-09-16 18:21:44 Hail: INFO: Wrote all 13 blocks of 3898 x 50581 matrix with block size 4096.
2019-09-16 18:21:44 Hail: INFO: wrote matrix with 4426 rows and 4426 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1696.bm
2019-09-16 18:21:51 Hail: INFO: Wrote all 26 blocks of 6068 x 50581 matrix with block size 4096.
2019-09-16 18:21:52 Hail: INFO: wrote matrix with 4951 rows and 4951 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1684.bm
2019-09-16 18:22:07 Hail: INFO: wrote matrix with 3790 rows and 3790 columns as 1 block of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1686.bm
2019-09-16 18:22:08 Hail: INFO: Wrote all 26 blocks of 5885 x 50581 matrix with block size 4096.
2019-09-16 18:22:08 Hail: INFO: wrote matrix 

Region completed: region1696


2019-09-16 18:22:21 Hail: INFO: Wrote all 26 blocks of 6870 x 50581 matrix with block size 4096.
2019-09-16 18:22:26 Hail: INFO: merging 2 files totalling 102.5M...
2019-09-16 18:22:28 Hail: INFO: merging 1 files totalling 53.8M...
2019-09-16 18:22:28 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1684.tsv.bgz
  merge time: 2.543s


Region completed: region1684


2019-09-16 18:22:28 Hail: INFO: wrote matrix with 5146 rows and 5146 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1697.bm
2019-09-16 18:22:29 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1686.tsv.bgz
  merge time: 1.684s


Region completed: region1686


2019-09-16 18:22:33 Hail: INFO: wrote matrix with 5284 rows and 5284 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1699.bm
2019-09-16 18:22:37 Hail: INFO: Wrote all 26 blocks of 4935 x 50581 matrix with block size 4096.
2019-09-16 18:22:43 Hail: INFO: merging 2 files totalling 86.0M...
2019-09-16 18:22:46 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1683.tsv.bgz
  merge time: 2.966s


Region completed: region1683


2019-09-16 18:22:46 Hail: INFO: wrote matrix with 3898 rows and 3898 columns as 1 block of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1681.bm
2019-09-16 18:22:49 Hail: INFO: merging 2 files totalling 99.7M...
2019-09-16 18:22:50 Hail: INFO: wrote matrix with 4370 rows and 4370 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1692.bm
2019-09-16 18:22:52 Hail: INFO: merging 1 files totalling 64.7M...
2019-09-16 18:22:52 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1690.tsv.bgz
  merge time: 3.025s


Region completed: region1690


2019-09-16 18:22:53 Hail: INFO: Wrote all 26 blocks of 4962 x 50581 matrix with block size 4096.
2019-09-16 18:22:55 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1693.tsv.bgz
  merge time: 2.418s


Region completed: region1693


2019-09-16 18:22:57 Hail: INFO: Wrote all 26 blocks of 5144 x 50581 matrix with block size 4096.
2019-09-16 18:22:57 Hail: INFO: Wrote all 26 blocks of 6082 x 50581 matrix with block size 4096.
2019-09-16 18:22:59 Hail: INFO: wrote matrix with 5070 rows and 5070 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1694.bm
2019-09-16 18:22:59 Hail: INFO: wrote matrix with 6068 rows and 6068 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1682.bm
2019-09-16 18:23:11 Hail: INFO: merging 2 files totalling 117.8M...
2019-09-16 18:23:15 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1697.tsv.bgz
  merge time: 3.820s
2019-09-16 18:23:15 Hail: INFO: merging 2 files totalling 198.4M...


Region completed: region1697


2019-09-16 18:23:16 Hail: INFO: merging 1 files totalling 62.7M...
2019-09-16 18:23:18 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1681.tsv.bgz
  merge time: 1.869s


Region completed: region1681


2019-09-16 18:23:20 Hail: INFO: wrote matrix with 5885 rows and 5885 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1698.bm
2019-09-16 18:23:20 Hail: INFO: merging 2 files totalling 78.8M...
2019-09-16 18:23:21 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1689.tsv.bgz
  merge time: 5.937s


Region completed: region1689


2019-09-16 18:23:23 Hail: INFO: Wrote all 26 blocks of 5491 x 50581 matrix with block size 4096.
2019-09-16 18:23:24 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1692.tsv.bgz
  merge time: 3.684s
2019-09-16 18:23:24 Hail: INFO: merging 2 files totalling 120.2M...


Region completed: region1692


2019-09-16 18:23:28 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1699.tsv.bgz
  merge time: 4.344s


Region completed: region1699


2019-09-16 18:23:32 Hail: INFO: wrote matrix with 6870 rows and 6870 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1687.bm
2019-09-16 18:23:43 Hail: INFO: merging 2 files totalling 115.6M...
2019-09-16 18:23:46 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1694.tsv.bgz
  merge time: 3.287s


Region completed: region1694


2019-09-16 18:23:47 Hail: INFO: wrote matrix with 4935 rows and 4935 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1685.bm
2019-09-16 18:23:52 Hail: INFO: merging 2 files totalling 159.1M...
2019-09-16 18:24:01 Hail: INFO: wrote matrix with 4962 rows and 4962 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1678.bm
2019-09-16 18:24:03 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1682.tsv.bgz
  merge time: 11.303s
2019-09-16 18:24:03 Hail: INFO: wrote matrix with 6082 rows and 6082 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1695.bm


Region completed: region1682


2019-09-16 18:24:05 Hail: INFO: wrote matrix with 5144 rows and 5144 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1679.bm
2019-09-16 18:24:09 Hail: INFO: merging 2 files totalling 154.5M...
2019-09-16 18:24:14 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1698.tsv.bgz
  merge time: 4.932s


Region completed: region1698


2019-09-16 18:24:21 Hail: INFO: merging 2 files totalling 100.8M...
2019-09-16 18:24:23 Hail: INFO: merging 2 files totalling 203.5M...
2019-09-16 18:24:24 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1685.tsv.bgz
  merge time: 3.216s


Region completed: region1685


2019-09-16 18:24:29 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1687.tsv.bgz
  merge time: 5.642s


Region completed: region1687


2019-09-16 18:24:32 Hail: INFO: merging 2 files totalling 105.4M...
2019-09-16 18:24:34 Hail: INFO: wrote matrix with 5491 rows and 5491 columns as 3 blocks of size 4096 to gs://ukb_data/phenomexcan/ld/bm/chr22_region1680.bm
2019-09-16 18:24:35 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1678.tsv.bgz
  merge time: 2.967s


Region completed: region1678


2019-09-16 18:24:48 Hail: INFO: merging 2 files totalling 114.5M...
2019-09-16 18:24:51 Hail: INFO: merging 2 files totalling 166.2M...
2019-09-16 18:24:52 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1679.tsv.bgz
  merge time: 4.163s


Region completed: region1679


2019-09-16 18:24:57 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1695.tsv.bgz
  merge time: 5.768s


Region completed: region1695


2019-09-16 18:25:17 Hail: INFO: merging 2 files totalling 128.9M...


Region completed: region1680
CPU times: user 10.3 s, sys: 348 ms, total: 10.7 s
Wall time: 14min 14s


2019-09-16 18:25:21 Hail: INFO: while writing:
    gs://ukb_data/phenomexcan/ld/tsv/chr22_region1680.tsv.bgz
  merge time: 4.663s


In [83]:
bm_list = list_blobs('ukb_data', prefix=f'phenomexcan/ld/tsv/chr{SELECTED_CHR}_', suffix='.tsv.bgz', delimiter='/')
display(len(bm_list))
display(bm_list[:5])
assert len(bm_list) == len(variant_files), (len(bm_list), len(variant_files))

24

['gs://ukb_data/phenomexcan/ld/tsv/chr22_region1676.tsv.bgz',
 'gs://ukb_data/phenomexcan/ld/tsv/chr22_region1677.tsv.bgz',
 'gs://ukb_data/phenomexcan/ld/tsv/chr22_region1678.tsv.bgz',
 'gs://ukb_data/phenomexcan/ld/tsv/chr22_region1679.tsv.bgz',
 'gs://ukb_data/phenomexcan/ld/tsv/chr22_region1680.tsv.bgz']