In [1]:
import hail as hl
hl.init()

Running on Apache Spark version 2.4.1
SparkUI available at http://wm598-921.fios-router.home:4040
Welcome to
     __  __     <>__
    / /_/ /__  __/ /
   / __  / _ `/ / /
  /_/ /_/\_,_/_/_/   version 0.2.61-3c86d3ba497a
LOGGING: writing to /Users/shifa/dev/hail_elasticsearch_pipelines/genome_sv_pipeline/hail-20210405-2000-0.2.61-3c86d3ba497a.log


In [2]:
from hail.plot import show
from pprint import pprint
hl.plot.output_notebook()

In [3]:
# hl.import_vcf('vcf/sv.vcf.gz', force=True, reference_genome='GRCh38').write('vcf/svs.mt', overwrite=True)

In [3]:
mt = hl.read_matrix_table('vcf/svs.mt')

In [4]:
mt.count()

(145568, 714)

In [5]:
import subprocess
GS_SAMPLE_PATH = 'gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project_guid}/{project_guid}_{file_ext}'
def _get_gs_samples(project_guid, file_ext, expected_header, sample_type):
    """
    Get sample metadata from files in google cloud

    :param project_guid: seqr project identifier
    :param file_ext: extension for the desired sample file
    :param expected_header: expected header to validate file
    :param sample_type: sample type (WES/WGS)
    :return: parsed data from the sample file as a list of lists
    """
    file = GS_SAMPLE_PATH.format(project_guid=project_guid, sample_type=sample_type, file_ext=file_ext)
    process = subprocess.Popen(
        'gsutil cat {}'.format(file), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
    if process.wait() != 0:
        return None
    header = next(process.stdout).decode('utf-8')
    if header.strip() != expected_header:
        raise Exception('Missing header for sample file, expected "{}" but found {}'.format(
            expected_header, header))
    return [line.decode('utf-8').strip().split('\t') for line in process.stdout]


def get_sample_subset(project_guid, sample_type):
    """
    Get sample id subset for a given project

    :param project_guid: seqr project identifier
    :param sample_type: sample type (WES/WGS)
    :return: set of sample ids
    """
    subset = _get_gs_samples(project_guid, file_ext='ids.txt', sample_type=sample_type, expected_header='s')
    if not subset:
        raise Exception('No sample subset file found')
    return {row[0] for row in subset}

<br/>
Subset the samples to the project `R0332_cmg_estonia_wgs`

In [6]:
sample_subset = get_sample_subset('R0332_cmg_estonia_wgs', 'WGS')
subset = hl.literal(sample_subset)
mt1 = mt.filter_cols(subset.contains(mt['s']))
missing_samples = sample_subset - {col.s for col in mt1.key_cols_by().cols().collect()}
print('{} missing samples and the first 10 of them are: {}'.format(len(missing_samples), list(missing_samples)[:10]))

61 missing samples and the first 10 of them are: ['HK061-0159_D1', 'OUN_HK124_001_D1', 'HK060-0156_1', 'HK115-002_1', 'HK079-001_D2', 'OUN_HK126_001_D1', 'OUN_HK124_003_D1', 'HK061-0158_D1', 'HK060-0155_1', 'HK112-003_1']


In [7]:
mt1.GT[0].show()

Unnamed: 0_level_0,Unnamed: 1_level_0,'HK010_0026','HK015_0037','HK031_0079','HK031_0080','HK069-0178_1'
locus,alleles,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
locus<GRCh38>,array<str>,int32,int32,int32,int32,int32
chr1:10000,"[""N"",""<DUP>""]",0.0,0.0,0.0,1.0,0.0
chr1:10000,"[""N"",""<DUP>""]",0.0,1.0,0.0,0.0,0.0
chr1:10602,"[""N"",""<BND>""]",0.0,0.0,0.0,0.0,0.0
chr1:41950,"[""N"",""<DUP>""]",0.0,0.0,0.0,0.0,0.0
chr1:44000,"[""N"",""<DUP>""]",0.0,0.0,0.0,0.0,0.0
chr1:44250,"[""N"",""<DUP>""]",0.0,0.0,0.0,0.0,0.0
chr1:51400,"[""N"",""<DEL>""]",0.0,0.0,0.0,0.0,0.0
chr1:52600,"[""N"",""<CNV>""]",,,,,
chr1:66234,"[""N"",""<BND>""]",0.0,1.0,0.0,0.0,0.0
chr1:66350,"[""N"",""<DEL>""]",0.0,0.0,0.0,0.0,0.0


In [8]:
samples = hl.agg.filter(mt1.GT.is_non_ref(), hl.agg.collect(hl.struct(id=mt1.s, gq=mt1.GQ, num_alt=mt1.GT[0]+mt1.GT[1], cn=mt1.RD_CN)))
mt2a = mt1.annotate_rows(samples=samples)
mt2 = mt2a.filter_rows(mt2a.samples != hl.empty_array(hl.dtype('struct{id: str, gq: int32, num_alt: int32, cn: int32}')))
mt2.count()

(67275, 106)

In [9]:
mtx = mt2.filter_entries(hl.is_defined(mt2.RD_CN))
rd_cn_cnt = sorted(mtx.aggregate_entries(hl.agg.counter(mtx.RD_CN)).items())
print('{} different RD_CNs, the counter for the first 10 RD_CNs are: {}.'.format(len(rd_cn_cnt), rd_cn_cnt[:10]))
print('Total RD_CNs>9: {}, max RD_CN: {}'.format(sum(rd_cn_cnt[10:][1]), rd_cn_cnt[-1][0]))

1181 different RD_CNs, the counter for the first 10 RD_CNs are: [(0, 51131), (1, 425325), (2, 4177497), (3, 315015), (4, 79143), (5, 32265), (6, 18043), (7, 11620), (8, 7940), (9, 6123)].
Total RD_CNs>9: 3797, max RD_CN: 4066


<br/>
The distributions of the values of the RD_CNs

In [10]:
dp_hist = mt2.aggregate_entries(hl.expr.aggregators.hist(mt2.RD_CN, start=0, end=10, bins=11))
p = hl.plot.histogram(dp_hist, legend='RD_CN', title='RD_CN Histogram')
show(p)

<br/>
Statistics for differet GT values

In [11]:
mt2.aggregate_entries(hl.agg.counter(mt2.GT))

{Call(alleles=[0, 0], phased=False): 6033770,
 Call(alleles=[0, 1], phased=False): 932341,
 Call(alleles=[1, 1], phased=False): 157472,
 None: 7567}

<br/>
RD_CN distributions for different GTs:

In [16]:
cnts={}
mtx = mt2.filter_entries(hl.is_defined(mt2.RD_CN))
for alts in range(3):
    field_name = 'GT'+str(alts)+'_RD_CN'
    mtx = mtx.annotate_entries(**{field_name: hl.if_else(mtx.GT.n_alt_alleles()==alts, mtx.RD_CN, -1)})
    cnts = sorted(mtx.aggregate_entries(hl.agg.counter(mtx[field_name])).items())
    cnts[11] = ('>9', sum(cnts[11:][1]))
    print('{}: {}'.format(field_name, cnts[:12]))

for field_name in ['GT0_RD_CN', 'GT1_RD_CN', 'GT2_RD_CN']:
    mtxx = mtx.filter_entries(mtx[field_name]>=0)
    dp_hist = mtxx.aggregate_entries(hl.expr.aggregators.hist(mtxx[field_name], start=0, end=10, bins=11))
    p = hl.plot.histogram(dp_hist, legend=field_name, title=field_name+' Histogram')
    show(p)

GT0_RD_CN: [(-1, 660908), (0, 8490), (1, 223817), (2, 3953419), (3, 199083), (4, 43557), (5, 20124), (6, 12082), (7, 7929), (8, 5431), (9, 4435), ('>9', 2885)]
GT1_RD_CN: [(-1, 4624911), (0, 14630), (1, 166020), (2, 206596), (3, 108528), (4, 22531), (5, 7503), (6, 3832), (7, 2535), (8, 1809), (9, 1244), ('>9', 668)]
GT2_RD_CN: [(-1, 5050345), (0, 28011), (1, 35488), (2, 17482), (3, 7404), (4, 13055), (5, 4638), (6, 2129), (7, 1156), (8, 700), (9, 444), ('>9', 266)]


In [22]:
mtx = mt2.filter_entries(hl.is_defined(mt2.RD_CN))
max_cn = 5
for cn in range(max_cn):
    field_name = 'Counters for RD_CN={} per genotype and SV type:'.format(cn) if cn<max_cn-1 else 'Counters for RD_CN>={} per genotype and SV type'.format(cn)
    mtx = mtx.annotate_entries(**{field_name: hl.if_else(mtx.RD_CN==cn if cn<max_cn-1 else mtx.RD_CN>=cn , hl.struct(alts=mtx.GT.n_alt_alleles(), svtype=mtx.info.SVTYPE),
                                                         hl.struct(alts=-1, svtype='NA'))})
    cnts = mtx.aggregate_entries(hl.agg.counter(mtx[field_name]))
    cnts = sorted(cnts.items(), key=lambda item: item[0].alts)
    pprint((field_name, cnts[1:]))

('Counters for RD_CN=0 per genotype and SV type:',
 [(Struct(alts=0, svtype='DUP'), 3171),
  (Struct(alts=0, svtype='CPX'), 4),
  (Struct(alts=0, svtype='INS'), 16),
  (Struct(alts=0, svtype='DEL'), 5299),
  (Struct(alts=1, svtype='DUP'), 116),
  (Struct(alts=1, svtype='DEL'), 14487),
  (Struct(alts=1, svtype='INS'), 27),
  (Struct(alts=2, svtype='DUP'), 8),
  (Struct(alts=2, svtype='DEL'), 28002),
  (Struct(alts=2, svtype='CPX'), 1)])
('Counters for RD_CN=1 per genotype and SV type:',
 [(Struct(alts=0, svtype='DUP'), 86321),
  (Struct(alts=0, svtype='CPX'), 191),
  (Struct(alts=0, svtype='INS'), 402),
  (Struct(alts=0, svtype='DEL'), 136903),
  (Struct(alts=1, svtype='DUP'), 2011),
  (Struct(alts=1, svtype='DEL'), 163754),
  (Struct(alts=1, svtype='CPX'), 43),
  (Struct(alts=1, svtype='INS'), 212),
  (Struct(alts=2, svtype='DUP'), 11),
  (Struct(alts=2, svtype='DEL'), 35471),
  (Struct(alts=2, svtype='INS'), 6)])
('Counters for RD_CN=2 per genotype and SV type:',
 [(Struct(alts=0, svt

In [13]:
rows = mt1.rows()
rows.show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,17000,,"[""RD""]",,,7000,"""DUP""",,,,,,,,,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[370],[2.59e-01],714.0,415.0,228.0,71.0,0.581,0.319,0.0994,772,[214],[2.77e-01],386.0,216.0,126.0,44.0,0.56,0.326,0.114,656,[156],[2.38e-01],328.0,199.0,102.0,27.0,0.607,0.311,0.0823,,
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_2""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,53500,,"[""BAF"",""RD""]",,,43500,"""DUP""",,,,,,,"[""FAM138A"",""MIR1302-2HG""]",,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[70],[4.90e-02],714.0,649.0,60.0,5.0,0.909,0.084,0.007,772,[46],[5.96e-02],386.0,344.0,38.0,4.0,0.891,0.0984,0.0104,656,[24],[3.66e-02],328.0,305.0,22.0,1.0,0.93,0.0671,0.00305,,
chr1:10602,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_1""",461.0,"{""UNRESOLVED"",""UNSTABLE_AF_PCRMINUS""}","[""manta""]","""chr12""",,,10602,10546.0,"[""SR""]",,"""+-""",-1,"""BND""","""SINGLE_ENDER_+-""",,,,,,,,,,,,,,,,,,,,False,,1428,[88],[6.16e-02],714.0,626.0,88.0,0.0,0.877,0.123,0.0,772,[51],[6.61e-02],386.0,335.0,51.0,0.0,0.868,0.132,0.0,656,[37],[5.64e-02],328.0,291.0,37.0,0.0,0.887,0.113,0.0,"""gnomAD-SV_v2.1_BND_1_1""",0.00679
chr1:41950,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_3""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,52000,,"[""BAF"",""RD""]",,,10050,"""DUP""",,,,,,,,,,,,,,,,,,,,"[""OR4F5""]",True,,1428,[28],[1.96e-02],714.0,687.0,26.0,1.0,0.962,0.0364,0.0014,772,[15],[1.94e-02],386.0,371.0,15.0,0.0,0.961,0.0389,0.0,656,[13],[1.98e-02],328.0,316.0,11.0,1.0,0.963,0.0335,0.00305,"""gnomAD-SV_v2.1_DUP_1_1""",0.069
chr1:44000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_4""",999.0,"{""UNSTABLE_AF_PCRMINUS"",""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,66000,,"[""RD""]",,,22000,"""DUP""",,,,,,,,"[""OR4F5""]",,,,,,,,,,"[""DNase""]",,,False,,1428,[96],[6.72e-02],714.0,641.0,50.0,23.0,0.898,0.07,0.0322,772,[54],[6.99e-02],386.0,345.0,28.0,13.0,0.894,0.0725,0.0337,656,[42],[6.40e-02],328.0,296.0,22.0,10.0,0.902,0.0671,0.0305,,
chr1:44250,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_5""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,116000,,"[""BAF"",""RD""]",,,71750,"""DUP""",,,,,,"[""OR4F5""]","[""AL627309.3""]",,"[""AL627309.1""]",,,,,,,,,"[""DNase""]",,,False,,1428,[82],[5.74e-02],714.0,646.0,54.0,14.0,0.905,0.0756,0.0196,772,[43],[5.57e-02],386.0,351.0,27.0,8.0,0.909,0.0699,0.0207,656,[39],[5.95e-02],328.0,295.0,27.0,6.0,0.899,0.0823,0.0183,,
chr1:51400,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,64000,,"[""RD""]",,,12600,"""DEL""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,1428,[306],[2.14e-01],714.0,443.0,236.0,35.0,0.62,0.331,0.049,772,[156],[2.02e-01],386.0,246.0,124.0,16.0,0.637,0.321,0.0415,656,[150],[2.29e-01],328.0,197.0,112.0,19.0,0.601,0.341,0.0579,,
chr1:52600,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_1""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,58000,,"[""RD""]",,,5400,"""CNV""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,
chr1:66234,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_2""",807.0,"{""UNRESOLVED""}","[""manta""]","""chr19""",,,66234,108051.0,"[""PE""]",,"""-+""",-1,"""BND""","""SINGLE_ENDER_-+""",,,,,,,,,,,,,,,,,,,,False,,1428,[236],[1.65e-01],714.0,514.0,164.0,36.0,0.72,0.23,0.0504,772,[131],[1.70e-01],386.0,275.0,91.0,20.0,0.712,0.236,0.0518,656,[105],[1.60e-01],328.0,239.0,73.0,16.0,0.729,0.223,0.0488,,
chr1:66350,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_2""",774.0,"{""FAIL_OUTLIER_REMOVAL""}","[""manta""]","""chr1""",,,66427,,"[""SR""]",,,77,"""DEL""",,,,,,,,,,,,"[""OR4F5""]",,,,,,,,,False,,1428,[2],[1.40e-03],714.0,712.0,2.0,0.0,0.997,0.0028,0.0,772,[1],[1.30e-03],386.0,385.0,1.0,0.0,0.997,0.00259,0.0,656,[1],[1.52e-03],328.0,327.0,1.0,0.0,0.997,0.00305,0.0,"""gnomAD-SV_v2.1_DEL_1_4""",0.000555


In [14]:
rows.info.get('PROTEIN_CODING__COPY_GAIN').dtype == hl.dtype('array<str>')

True

In [15]:
r1 = rows.annotate(t=hl.if_else(hl.is_defined(rows.info.PROTEIN_CODING__COPY_GAIN),
                           hl.len(rows.info.PROTEIN_CODING__COPY_GAIN), 0))
r1.show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 70_level_0
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF,t
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64,int32
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,17000,,"[""RD""]",,,7000,"""DUP""",,,,,,,,,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[370],[2.59e-01],714.0,415.0,228.0,71.0,0.581,0.319,0.0994,772,[214],[2.77e-01],386.0,216.0,126.0,44.0,0.56,0.326,0.114,656,[156],[2.38e-01],328.0,199.0,102.0,27.0,0.607,0.311,0.0823,,,0
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_2""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,53500,,"[""BAF"",""RD""]",,,43500,"""DUP""",,,,,,,"[""FAM138A"",""MIR1302-2HG""]",,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[70],[4.90e-02],714.0,649.0,60.0,5.0,0.909,0.084,0.007,772,[46],[5.96e-02],386.0,344.0,38.0,4.0,0.891,0.0984,0.0104,656,[24],[3.66e-02],328.0,305.0,22.0,1.0,0.93,0.0671,0.00305,,,0
chr1:10602,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_1""",461.0,"{""UNRESOLVED"",""UNSTABLE_AF_PCRMINUS""}","[""manta""]","""chr12""",,,10602,10546.0,"[""SR""]",,"""+-""",-1,"""BND""","""SINGLE_ENDER_+-""",,,,,,,,,,,,,,,,,,,,False,,1428,[88],[6.16e-02],714.0,626.0,88.0,0.0,0.877,0.123,0.0,772,[51],[6.61e-02],386.0,335.0,51.0,0.0,0.868,0.132,0.0,656,[37],[5.64e-02],328.0,291.0,37.0,0.0,0.887,0.113,0.0,"""gnomAD-SV_v2.1_BND_1_1""",0.00679,0
chr1:41950,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_3""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,52000,,"[""BAF"",""RD""]",,,10050,"""DUP""",,,,,,,,,,,,,,,,,,,,"[""OR4F5""]",True,,1428,[28],[1.96e-02],714.0,687.0,26.0,1.0,0.962,0.0364,0.0014,772,[15],[1.94e-02],386.0,371.0,15.0,0.0,0.961,0.0389,0.0,656,[13],[1.98e-02],328.0,316.0,11.0,1.0,0.963,0.0335,0.00305,"""gnomAD-SV_v2.1_DUP_1_1""",0.069,0
chr1:44000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_4""",999.0,"{""UNSTABLE_AF_PCRMINUS"",""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,66000,,"[""RD""]",,,22000,"""DUP""",,,,,,,,"[""OR4F5""]",,,,,,,,,,"[""DNase""]",,,False,,1428,[96],[6.72e-02],714.0,641.0,50.0,23.0,0.898,0.07,0.0322,772,[54],[6.99e-02],386.0,345.0,28.0,13.0,0.894,0.0725,0.0337,656,[42],[6.40e-02],328.0,296.0,22.0,10.0,0.902,0.0671,0.0305,,,0
chr1:44250,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_5""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,116000,,"[""BAF"",""RD""]",,,71750,"""DUP""",,,,,,"[""OR4F5""]","[""AL627309.3""]",,"[""AL627309.1""]",,,,,,,,,"[""DNase""]",,,False,,1428,[82],[5.74e-02],714.0,646.0,54.0,14.0,0.905,0.0756,0.0196,772,[43],[5.57e-02],386.0,351.0,27.0,8.0,0.909,0.0699,0.0207,656,[39],[5.95e-02],328.0,295.0,27.0,6.0,0.899,0.0823,0.0183,,,1
chr1:51400,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,64000,,"[""RD""]",,,12600,"""DEL""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,1428,[306],[2.14e-01],714.0,443.0,236.0,35.0,0.62,0.331,0.049,772,[156],[2.02e-01],386.0,246.0,124.0,16.0,0.637,0.321,0.0415,656,[150],[2.29e-01],328.0,197.0,112.0,19.0,0.601,0.341,0.0579,,,0
chr1:52600,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_1""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,58000,,"[""RD""]",,,5400,"""CNV""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,,0
chr1:66234,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_2""",807.0,"{""UNRESOLVED""}","[""manta""]","""chr19""",,,66234,108051.0,"[""PE""]",,"""-+""",-1,"""BND""","""SINGLE_ENDER_-+""",,,,,,,,,,,,,,,,,,,,False,,1428,[236],[1.65e-01],714.0,514.0,164.0,36.0,0.72,0.23,0.0504,772,[131],[1.70e-01],386.0,275.0,91.0,20.0,0.712,0.236,0.0518,656,[105],[1.60e-01],328.0,239.0,73.0,16.0,0.729,0.223,0.0488,,,0
chr1:66350,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_2""",774.0,"{""FAIL_OUTLIER_REMOVAL""}","[""manta""]","""chr1""",,,66427,,"[""SR""]",,,77,"""DEL""",,,,,,,,,,,,"[""OR4F5""]",,,,,,,,,False,,1428,[2],[1.40e-03],714.0,712.0,2.0,0.0,0.997,0.0028,0.0,772,[1],[1.30e-03],386.0,385.0,1.0,0.0,0.997,0.00259,0.0,656,[1],[1.52e-03],328.0,327.0,1.0,0.0,0.997,0.00305,0.0,"""gnomAD-SV_v2.1_DEL_1_4""",0.000555,0


In [16]:
rows.info.PROTEIN_CODING__COPY_GAIN.dtype

dtype('array<str>')

In [17]:
gene_cols = [gene_col for gene_col in rows.info if gene_col.startswith('PROTEIN_CODING__') and rows.info.get(gene_col).dtype==hl.dtype('array<str>')]
print(gene_cols)
r1 = rows.annotate(t=[hl.if_else(hl.is_defined(rows.info.get(gene)),
                           hl.len(rows.info.get(gene)), 0) for gene in gene_cols])
r1.aggregate(hl.agg.array_agg(lambda e: hl.agg.max(e), r1.t))

['PROTEIN_CODING__LOF', 'PROTEIN_CODING__DUP_LOF', 'PROTEIN_CODING__COPY_GAIN', 'PROTEIN_CODING__DUP_PARTIAL', 'PROTEIN_CODING__MSV_EXON_OVR', 'PROTEIN_CODING__INTRONIC', 'PROTEIN_CODING__INV_SPAN', 'PROTEIN_CODING__UTR', 'PROTEIN_CODING__NEAREST_TSS', 'PROTEIN_CODING__PROMOTER']


[1622, 3, 161, 22, 8, 84, 174, 2, 2, 8]

In [18]:
r2 = r1.filter(r1.t[0]>2)
r2.show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 70_level_0
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF,t
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64,array<int32>
chr1:12799082,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_333""",305.0,{},"[""depth"",""wham""]","""chr1""",,,12930898,,"[""PE"",""RD"",""SR""]",,,131816,"""DEL""",,"[""HNRNPCL1"",""PRAMEF10"",""PRAMEF11"",""PRAMEF2"",""PRAMEF4"",""PRAMEF7""]",,,,,,,,,,,"[""LINC01784""]",,,,,"[""DNase""]","[""DNase"",""Tommerup_TADanno""]",,False,,1428,[41],[2.87e-02],714,675,37,2,0.945,0.0518,0.0028,772,[24],[3.11e-02],386,363,22,1,0.94,0.057,0.00259,656,[17],[2.59e-02],328,312,15,1,0.951,0.0457,0.00305,,,"[6,0,0,0,0,0,0,0,0,0]"
chr1:12838726,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_338""",999.0,{},"[""depth"",""manta""]","""chr1""",,,12894031,,"[""PE"",""RD"",""SR""]",,,55305,"""DEL""",,"[""HNRNPCL1"",""PRAMEF10"",""PRAMEF2"",""PRAMEF4""]",,,,,,,,,,,,,,,,"[""DNase""]","[""Tommerup_TADanno""]",,False,,1428,[37],[2.59e-02],714,677,37,0,0.948,0.0518,0.0,772,[22],[2.85e-02],386,364,22,0,0.943,0.057,0.0,656,[15],[2.29e-02],328,313,15,0,0.954,0.0457,0.0,"""gnomAD-SV_v2.1_DEL_1_1083""",0.0159,"[4,0,0,0,0,0,0,0,0,0]"
chr1:13008000,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_344""",1.0,{},"[""depth""]","""chr1""",,,13167000,,"[""RD""]",,,159000,"""DEL""",,"[""HNRNPCL2"",""HNRNPCL3"",""HNRNPCL4"",""PRAMEF25"",""PRAMEF26"",""PRAMEF27""]",,,,,,,,,,,"[""AC245056.1""]",,,,,"[""DNase""]",,,False,,1428,[4],[2.80e-03],714,711,2,1,0.996,0.0028,0.0014,772,[1],[1.30e-03],386,385,1,0,0.997,0.00259,0.0,656,[3],[4.57e-03],328,326,1,1,0.994,0.00305,0.00305,,,"[6,0,0,0,0,0,0,0,0,0]"
chr1:13074384,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_347""",999.0,"{""FAIL_OUTLIER_REMOVAL""}","[""depth""]","""chr1""",,,13151000,,"[""RD""]",,,76616,"""DEL""",,"[""HNRNPCL2"",""PRAMEF25"",""PRAMEF26""]",,,,,,,,,,,"[""AC245056.1""]",,,,,,,,False,,1428,[1],[7.00e-04],714,713,1,0,0.999,0.0014,0.0,772,[0],[0.00e+00],386,386,0,0,1.0,0.0,0.0,656,[1],[1.52e-03],328,327,1,0,0.997,0.00305,0.0,"""gnomAD-SV_v2.1_DEL_1_1108""",0.0162,"[3,0,0,0,0,0,0,0,0,0]"
chr1:13260192,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_352""",143.0,{},"[""depth""]","""chr1""",,,13316384,,"[""RD""]",,,56192,"""DEL""",,"[""PRAMEF33"",""PRAMEF5"",""PRAMEF8""]",,,,,,,,,,,,,,"[""PRAMEF15""]",,"[""DNase""]",,,False,,1428,[33],[2.31e-02],714,681,33,0,0.954,0.0462,0.0,772,[17],[2.20e-02],386,369,17,0,0.956,0.044,0.0,656,[16],[2.44e-02],328,312,16,0,0.951,0.0488,0.0,"""gnomAD-SV_v2.1_DEL_1_1122""",0.00207,"[3,0,0,0,0,0,0,1,0,0]"
chr1:16619297,"[""N"",""<CPX>""]","""CMG.phase1_CMG_CPX_chr1_12""",999.0,{},"[""manta""]","""chr1""","[""DEL_chr1:16619297-16725244"",""INV_chr1:16725244-16951955"",""DEL_chr1:16951955-234776440""]","""delINVdel""",234776440,,"[""PE"",""SR""]",,,218157143,"""CPX""",,"[""A3GALT2"",""ABCA4"",""ABCB10"",""ABCD3"",""ABL2"",""AC092017.3"",""AC093155.3"",""AC093423.3"",""AC098484.3"",""AC114490.2"",""AC114490.3"",""AC118549.1"",""AC118553.2"",""AC119674.2"",""AC119676.1"",""AC239799.1"",""AC243547.3"",""AC243756.1"",""ACADM"",""ACBD3"",""ACBD6"",""ACKR1"",""ACOT11"",""ACP6"",""ACTA1"",""ACTL8"",""ADAM30"",""ADAMTS4"",""ADAMTSL4"",""ADAR"",""ADCY10"",""ADGRB2"",""ADGRL2"",""ADGRL4"",""ADIPOR1"",""ADORA1"",""ADORA3"",""ADPRHL2"",""AGBL4"",""AGL"",""AGO1"",""AGO3"",""AGO4"",""AGT"",""AHCYL1"",""AHDC1"",""AK4"",""AK5"",""AKIRIN1"",""AKNAD1"",""AKR1A1"",""AKR7A2"",""AKR7A3"",""AL031281.2"",""AL033529.1"",""AL049557.1"",""AL080251.1"",""AL117348.2"",""AL136373.1"",""AL136454.1"",""AL139011.2"",""AL139142.2"",""AL162596.1"",""AL357673.1"",""AL358075.4"",""AL358472.6"",""AL358472.7"",""AL391650.1"",""AL445685.3"",""AL449266.1"",""AL451062.3"",""AL512785.2"",""AL590132.1"",""AL591806.3"",""AL672043.1"",""AL691442.1"",""AL713999.1"",""ALDH4A1"",""ALDH9A1"",""ALG14"",""ALG6"",""ALPL"",""ALX3"",""AMIGO1"",""AMPD1"",""AMPD2"",""AMY1A"",""AMY1B"",""AMY1C"",""AMY2A"",""AMY2B"",""ANGEL2"",""ANGPTL1"",""ANGPTL3"",""ANKRD13C"",""ANKRD34A"",""ANKRD35"",""ANKRD45"",""ANP32E"",""ANXA9"",""AP4B1"",""APCS"",""APH1A"",""APOA2"",""APOBEC4"",""AQP10"",""ARF1"",""ARHGAP29"",""ARHGAP30"",""ARHGEF10L"",""ARHGEF11"",""ARID1A"",""ARL8A"",""ARMH1"",""ARNT"",""ARPC5"",""ARTN"",""ARV1"",""ASAP3"",""ASB17"",""ASCL5"",""ASH1L"",""ASPM"",""ASTN1"",""ATF3"",""ATF6"",""ATG4C"",""ATP13A2"",""ATP1A1"",""ATP1A2"",""ATP1A4"",""ATP1B1"",""ATP2B4"",""ATP5IF1"",""ATP5PB"",""ATP6V0B"",""ATP6V1G3"",""ATP8B2"",""ATPAF1"",""ATXN7L2"",""AUNIP"",""AXDND1"",""B3GALT2"",""B4GALT2"",""B4GALT3"",""BARHL2"",""BATF3"",""BCAN"",""BCAR3"",""BCAS2"",""BCL10"",""BCL9"",""BEND5"",""BEST4"",""BGLAP"",""BLZF1"",""BMP8A"",""BMP8B"",""BNIPL"",""BOLA1"",""BPNT1"",""BRDT"",""BRINP2"",""BRINP3"",""BROX"",""BSDC1"",""BSND"",""BTBD8"",""BTF3L4"",""BTG2"",""C1QA"",""C1QB"",""C1QC"",""C1orf105"",""C1orf109"",""C1orf115"",""C1orf116"",""C1orf122"",""C1orf123"",""C1orf131"",""C1orf141"",""C1orf146"",""C1orf162"",""C1orf185"",""C1orf189"",""C1orf194"",""C1orf198"",""C1orf21"",""C1orf210"",""C1orf216"",""C1orf226"",""C1orf232"",""C1orf43"",""C1orf50"",""C1orf53"",""C1orf54"",""C1orf56"",""C1orf68"",""C1orf74"",""C1orf87"",""C1orf94"",""C2CD4D"",""C4BPA"",""C4BPB"",""C8A"",""C8B"",""CA14"",""CACHD1"",""CACNA1E"",""CACNA1S"",""CACYBP"",""CADM3"",""CAMK1G"",""CAMK2N1"",""CAMSAP2"",""CAP1"",""CAPN2"",""CAPN8"",""CAPN9"",""CAPZA1"",""CAPZB"",""CASQ1"",""CASQ2"",""CATSPER4"",""CC2D1B"",""CCDC163"",""CCDC17"",""CCDC18"",""CCDC181"",""CCDC185"",""CCDC24"",""CCDC28B"",""CCDC30"",""CCSAP"",""CCT3"",""CD101"",""CD160"",""CD164L2"",""CD1A"",""CD1B"",""CD1C"",""CD1D"",""CD1E"",""CD2"",""CD244"",""CD247"",""CD34"",""CD46"",""CD48"",""CD52"",""CD53"",""CD55"",""CD58"",""CD84"",""CDA"",""CDC14A"",""CDC20"",""CDC42"",""CDC42BPA"",""CDC42SE1"",""CDC7"",""CDC73"",""CDCA8"",""CDCP2"",""CDK18"",""CDKN2C"",""CELA3A"",""CELA3B"",""CELF3"",""CELSR2"",""CENPF"",""CENPL"",""CEP350"",""CEP85"",""CEPT1"",""CERS2"",""CFAP126"",""CFAP45"",""CFAP57"",""CFH"",""CFHR1"",""CFHR2"",""CFHR3"",""CFHR4"",""CFHR5"",""CGN"",""CHD1L"",""CHI3L1"",""CHI3L2"",""CHIA"",""CHRNB2"",""CHTOP"",""CIART"",""CITED4"",""CKS1B"",""CLCA1"",""CLCA2"",""CLCA4"",""CLCC1"",""CLDN19"",""CLIC4"",""CLK2"",""CLSPN"",""CMPK1"",""CNIH4"",""CNKSR1"",""CNN3"",""CNR2"",""CNTN2"",""COA6"",""COA7"",""COG2"",""COL11A1"",""COL16A1"",""COL24A1"",""COL8A2"",""COL9A2"",""COLGALT2"",""COP1"",""COPA"",""COQ8A"",""CPT2"",""CR1"",""CR1L"",""CR2"",""CRABP2"",""CRB1"",""CRCT1"",""CREB3L4"",""CREG1"",""CRNN"",""CRP"",""CRTC2"",""CRYBG2"",""CRYZ"",""CSDE1"",""CSF1"",""CSF3R"",""CSMD2"",""CSRP1"",""CTBS"",""CTH"",""CTPS1"",""CTSE"",""CTSK"",""CTSS"",""CTTNBP2NL"",""CTXND2"",""CYB561D1"",""CYB5R1"",""CYB5RL"",""CYP2J2"",""CYP4A11"",""CYP4A22"",""CYP4X1"",""CYP4Z1"",""CYR61"",""DAP3"",""DARS2"",""DBT"",""DCAF6"",""DCAF8"",""DCDC2B"",""DCLRE1B"",""DCST1"",""DCST2"",""DDAH1"",""DDOST"",""DDR2"",""DDX20"",""DDX59"",""DEDD"",""DEGS1"",""DENND1B"",""DENND2C"",""DENND2D"",""DENND4B"",""DEPDC1"",""DHCR24"",""DHDDS"",""DHX9"",""DIO1"",""DIRAS3"",""DISC1"",""DLGAP3"",""DMAP1"",""DMBX1"",""DMRTA2"",""DMRTB1"",""DNAH14"",""DNAJC6"",""DNAJC8"",""DNALI1"",""DNASE2B"",""DNM3"",""DNTTIP2"",""DOCK7"",""DPH2"",""DPM3"",""DPT"",""DPYD"",""DR1"",""DRAM2"",""DSTYK"",""DTL"",""DUSP10"",""DUSP12"",""DUSP23"",""DUSP27"",""E2F2"",""ECE1"",""ECHDC2"",""ECM1"",""EDEM3"",""EDN2"",""EFCAB14"",""EFCAB7"",""EFNA1"",""EFNA3"",""EFNA4"",""EGLN1"",""EIF2B3"",""EIF2D"",""EIF3I"",""EIF4G3"",""ELAVL4"",""ELF3"",""ELK4"",""ELOA"",""ELOVL1"",""EMC1"",""ENAH"",""ENSA"",""EPB41"",""EPHA10"",""EPHA8"",""EPHB2"",""EPHX1"",""EPHX4"",""EPRS"",""EPS15"",""EPS8L3"",""ERI3"",""ERICH3"",""ERMAP"",""ESRRG"",""ETNK2"",""ETV3"",""ETV3L"",""EVA1B"",""EVI5"",""EXO5"",""EXOC8"",""EXTL1"",""EXTL2"",""EYA3"",""F11R"",""F13B"",""F3"",""F5"",""FAAH"",""FABP3"",""FAF1"",""FAM102B"",""FAM110D"",""FAM129A"",""FAM151A"",""FAM163A"",""FAM167B"",""FAM177B"",""FAM183A"",""FAM189B"",""FAM19A3"",""FAM20B"",""FAM43B"",""FAM69A"",""FAM71A"",""FAM72B"",""FAM72C"",""FAM72D"",""FAM76A"",""FAM78B"",""FAM89A"",""FASLG"",""FBXO28"",""FCAMR"",""FCER1A"",""FCER1G"",""FCGR1A"",""FCGR1B"",""FCGR2A"",""FCGR2B"",""FCGR3A"",""FCGR3B"",""FCMR"",""FCN3"",""FCRL1"",""FCRL2"",""FCRL3"",""FCRL4"",""FCRL5"",""FCRL6"",""FCRLA"",""FCRLB"",""FDPS"",""FGGY"",""FGR"",""FHL3"",""FLAD1"",""FLG"",""FLG2"",""FLVCR1"",""FMO1"",""FMO2"",""FMO3"",""FMO4"",""FMO5"",""FMOD"",""FNBP1L"",""FNDC5"",""FNDC7"",""FOXD2"",""FOXD3"",""FOXE3"",""FOXJ3"",""FPGT-TNNI3K"",""FRRS1"",""FUBP1"",""FUCA1"",""FYB2"",""G0S2"",""GABPB2"",""GADD45A"",""GALE"",""GALNT2"",""GATAD2B"",""GBA"",""GBP1"",""GBP2"",""GBP3"",""GBP4"",""GBP5"",""GBP6"",""GBP7"",""GCLM"",""GDAP2"",""GFI1"",""GJA4"",""GJA5"",""GJA8"",""GJA9"",""GJA9-MYCBP"",""GJB3"",""GJB4"",""GJB5"",""GJC2"",""GLIS1"",""GLMN"",""GLMP"",""GLRX2"",""GLUL"",""GMEB1"",""GNAI3"",""GNAT2"",""GNG12"",""GNG5"",""GNL2"",""GNPAT"",""GOLPH3L"",""GOLT1A"",""GON4L"",""GORAB"",""GPA33"",""GPATCH2"",""GPATCH3"",""GPATCH4"",""GPBP1L1"",""GPN2"",""GPR161"",""GPR25"",""GPR3"",""GPR37L1"",""GPR52"",""GPR61"",""GPR88"",""GPR89A"",""GPSM2"",""GPX7"",""GRHL3"",""GRIK3"",""GSTM2"",""GSTM3"",""GSTM4"",""GTF2B"",""GUCA2A"",""GUCA2B"",""GUK1"",""H3F3A"",""HAO2"",""HAPLN2"",""HAX1"",""HCN3"",""HCRTR1"",""HDAC1"",""HECTD3"",""HENMT1"",""HEYL"",""HFM1"",""HHAT"",""HHIPL2"",""HHLA3"",""HIPK1"",""HIST2H2AA3"",""HIST2H2AA4"",""HIST2H2AB"",""HIST2H2AC"",""HIST2H2BE"",""HIST2H2BF"",""HIST2H3A"",""HIST2H3C"",""HIST2H3D"",""HIST2H3PS2"",""HIST2H4A"",""HIST2H4B"",""HIST3H2A"",""HIST3H2BB"",""HIST3H3"",""HIVEP3"",""HJV"",""HLX"",""HMCN1"",""HMGB4"",""HMGCL"",""HMGCS2"",""HMGN2"",""HNRNPR"",""HOOK1"",""HORMAD1"",""HP1BP3"",""HPCA"",""HPCAL4"",""HPDL"",""HRNR"",""HS2ST1"",""HSD11B1"",""HSD3B1"",""HSD3B2"",""HSPA6"",""HSPB11"",""HSPG2"",""HTR1D"",""HTR6"",""HYI"",""IARS2"",""IBA57"",""ID3"",""IER5"",""IFFO2"",""IFI16"",""IFI44"",""IFI44L"",""IFI6"",""IFNLR1"",""IGFN1"",""IGSF21"",""IGSF3"",""IGSF8"",""IGSF9"",""IKBKE"",""IL10"",""IL12RB2"",""IL19"",""IL20"",""IL22RA1"",""IL23R"",""IL24"",""IL6R"",""ILDR2"",""ILF2"",""INAVA"",""INPP5B"",""INSL5"",""INSRR"",""INTS3"",""INTS7"",""IPO13"",""IPO9"",""IPP"",""IQCC"",""IQGAP3"",""IRF2BP2"",""IRF6"",""ISG20L2"",""ITGA10"",""ITLN1"",""ITLN2"",""ITPKB"",""IVL"",""IVNS1ABP"",""JAK1"",""JMJD4"",""JTB"",""JUN"",""KANK4"",""KCNA10"",""KCNA3"",""KCND3"",""KCNH1"",""KCNJ10"",""KCNJ9"",""KCNK1"",""KCNK2"",""KCNN3"",""KCNQ4"",""KCNT2"",""KCTD3"",""KDF1"",""KDM1A"",""KDM4A"",""KDM5B"",""KHDC4"",""KIAA0040"",""KIAA0319L"",""KIAA1324"",""KIAA1522"",""KIAA1614"",""KIF14"",""KIF17"",""KIF21B"",""KIF2C"",""KIFAP3"",""KIRREL1"",""KISS1"",""KLF17"",""KLF18"",""KLHDC7A"",""KLHDC8A"",""KLHL12"",""KLHL20"",""KPNA6"",""KPRP"",""KTI12"",""KYAT3"",""L1TD1"",""LACTBL1"",""LAD1"",""LAMB3"",""LAMC1"",""LAMC2"",""LAMTOR2"",""LAMTOR5"",""LAPTM5"",""LAX1"",""LBR"",""LCE1A"",""LCE1B"",""LCE1C"",""LCE1D"",""LCE1E"",""LCE1F"",""LCE2A"",""LCE2B"",""LCE2C"",""LCE2D"",""LCE3A"",""LCE3B"",""LCE3C"",""LCE3D"",""LCE3E"",""LCE4A"",""LCE5A"",""LCE6A"",""LCK"",""LDLRAD1"",""LDLRAD2"",""LDLRAP1"",""LEFTY1"",""LEFTY2"",""LELP1"",""LEMD1"",""LENEP"",""LEPR"",""LEPROT"",""LEXM"",""LGR6"",""LHX4"",""LHX8"",""LHX9"",""LIN28A"",""LIN9"",""LINGO4"",""LIX1L"",""LMNA"",""LMO4"",""LMOD1"",""LMX1A"",""LOR"",""LPAR3"",""LPGAT1"",""LRIF1"",""LRIG2"",""LRP8"",""LRRC39"",""LRRC40"",""LRRC41"",""LRRC42"",""LRRC52"",""LRRC53"",""LRRC7"",""LRRC71"",""LRRC8B"",""LRRC8D"",""LRRIQ3"",""LRRN2"",""LURAP1"",""LUZP1"",""LY9"",""LYPLA2"",""LYPLAL1"",""LYSMD1"",""MAB21L3"",""MACF1"",""MACO1"",""MAEL"",""MAGI3"",""MAGOH"",""MAN1A2"",""MAN1C1"",""MANEAL"",""MAP10"",""MAP3K21"",""MAP3K6"",""MAP7D1"",""MAPKAPK2"",""MARC1"",""MARC2"",""MARCKSL1"",""MARK1"",""MAST2"",""MATN1"",""MCL1"",""MCOLN2"",""MCOLN3"",""MDM4"",""MEAF6"",""MECR"",""MED18"",""MED8"",""MEF2D"",""METTL11B"",""METTL18"",""MEX3A"",""MFAP2"",""MFSD14A"",""MFSD2A"",""MFSD4A"",""MGST3"",""MIA3"",""MIER1"",""MIGA1"",""MINDY1"",""MINOS1"",""MINOS1-NBL1"",""MIXL1"",""MKNK1"",""MLLT11"",""MMACHC"",""MNDA"",""MOB3C"",""MOV10"",""MPC2"",""MPL"",""MPZ"",""MPZL1"",""MR1"",""MROH7"",""MROH7-TTC4"",""MROH9"",""MRPL24"",""MRPL55"",""MRPL9"",""MRPS14"",""MRPS15"",""MRPS21"",""MRTO4"",""MSH4"",""MSTO1"",""MTF1"",""MTF2"",""MTFR1L"",""MTX1"",""MUC1"",""MUL1"",""MUTYH"",""MYBPH"",""MYBPHL"",""MYCBP"",""MYCL"",""MYOC"",""MYOCOS"",""MYOG"",""MYOM3"",""MYSM1"",""NASP"",""NAV1"",""NAXE"",""NBL1"",""NBPF10"",""NBPF11"",""NBPF12"",""NBPF14"",""NBPF15"",""NBPF19"",""NBPF20"",""NBPF26"",""NBPF3"",""NBPF4"",""NBPF6"",""NBPF9"",""NCDN"",""NCF2"",""NCMAP"",""NCSTN"",""NDC1"",""NDUFS2"",""NDUFS5"",""NECTIN4"",""NEGR1"",""NEK2"",""NEK7"",""NENF"",""NES"",""NEXN"",""NFASC"",""NFIA"",""NFYC"",""NGF"",""NHLH1"",""NHLH2"",""NIPAL3"",""NIT1"",""NKAIN1"",""NME7"",""NMNAT2"",""NOS1AP"",""NOTCH2"",""NOTCH2NLA"",""NPHS2"",""NPR1"",""NR0B2"",""NR1I3"",""NR5A2"",""NRAS"",""NRDC"",""NSL1"",""NSUN4"",""NT5C1A"",""NTNG1"",""NTPCR"",""NTRK1"",""NUAK2"",""NUCKS1"",""NUDC"",""NUDT17"",""NUDT4B"",""NUF2"",""NUP133"",""NUP210L"",""NVL"",""OAZ3"",""OBSCN"",""OCLM"",""ODF2L"",""ODR4"",""OLFM3"",""OLFML2B"",""OPRD1"",""OPTC"",""OR10J1"",""OR10J5"",""OR10K2"",""OR10R2"",""OR10T2"",""OR10X1"",""OR10Z1"",""OR6K2"",""OR6K3"",""OR6K6"",""OR6N2"",""OR6P1"",""OR6Y1"",""ORC1"",""OSBPL9"",""OSCP1"",""OTUD3"",""OTUD7B"",""OVGP1"",""OXCT2"",""P3H1"",""PABPC4"",""PADI1"",""PADI2"",""PADI3"",""PADI4"",""PADI6"",""PAFAH2"",""PALMD"",""PAPPA2"",""PAQR6"",""PAQR7"",""PARP1"",""PARS2"",""PATJ"",""PAX7"",""PBX1"",""PBXIP1"",""PCNX2"",""PCP4L1"",""PCSK9"",""PDC"",""PDE4B"",""PDE4DIP"",""PDIK1L"",""PDZK1"",""PDZK1IP1"",""PEA15"",""PEAR1"",""PEF1"",""PEX11B"",""PEX19"",""PFDN2"",""PFKFB2"",""PGBD5"",""PGLYRP3"",""PGLYRP4"",""PGM1"",""PHACTR4"",""PHGDH"",""PHLDA3"",""PHTF1"",""PI4KB"",""PIAS3"",""PIFO"",""PIGK"",""PIGM"",""PIGR"",""PIGV"",""PIK3C2B"",""PIK3R3"",""PINK1"",""PIP5K1A"",""PITHD1"",""PKLR"",""PKN2"",""PKP1"",""PLA2G2A"",""PLA2G2C"",""PLA2G2D"",""PLA2G2E"",""PLA2G2F"",""PLA2G4A"",""PLEKHA6"",""PLEKHO1"",""PLK3"",""PLPPR4"",""PLPPR5"",""PLXNA2"",""PMF1"",""PMF1-BGLAP"",""PMVK"",""PNRC2"",""PODN"",""POGK"",""POGZ"",""POLR3C"",""POMGNT1"",""POU2F1"",""POU3F1"",""PPFIA4"",""PPIAL4A"",""PPIAL4C"",""PPIAL4D"",""PPIAL4E"",""PPIAL4F"",""PPIAL4G"",""PPIE"",""PPIH"",""PPM1J"",""PPOX"",""PPP1R12B"",""PPP1R15B"",""PPP1R8"",""PPP2R5A"",""PPT1"",""PQLC2"",""PRDX1"",""PRDX6"",""PRELP"",""PRG4"",""PRKAA2"",""PRKAB2"",""PRKACB"",""PRMT6"",""PROK1"",""PROX1"",""PRPF3"",""PRPF38A"",""PRPF38B"",""PRR9"",""PRRC2C"",""PRRX1"",""PRSS38"",""PRUNE1"",""PSEN2"",""PSMA5"",""PSMB2"",""PSMB4"",""PSMD4"",""PSRC1"",""PTAFR"",""PTBP2"",""PTCH2"",""PTGER3"",""PTGFR"",""PTGFRN"",""PTGS2"",""PTP4A2"",""PTPN14"",""PTPN22"",""PTPN7"",""PTPRC"",""PTPRF"",""PTPRU"",""PUM1"",""PYCR2"",""PYGO2"",""PYHIN1"",""QSOX1"",""RAB13"",""RAB25"",""RAB29"",""RAB3B"",""RAB3GAP2"",""RAB42"",""RAB4A"",""RAB7B"",""RABGAP1L"",""RABGGTB"",""RABIF"",""RAD54L"",""RALGPS2"",""RAP1A"",""RAP1GAP"",""RASAL2"",""RASSF5"",""RAVER2"",""RBBP4"",""RBBP5"",""RBM15"",""RBMXL1"",""RC3H1"",""RCAN3"",""RCC1"",""RCC2"",""RCOR3"",""RCSD1"",""RD3"",""REG4"",""REN"",""RFX5"",""RGL1"",""RGS1"",""RGS13"",""RGS16"",""RGS18"",""RGS2"",""RGS21"",""RGS4"",""RGS5"",""RGS8"",""RGSL1"",""RHBDL2"",""RHBG"",""RHCE"",""RHD"",""RHEX"",""RHOC"",""RHOU"",""RIIAD1"",""RIMKLA"",""RIMS3"",""RIT1"",""RLF"",""RNASEL"",""RNF11"",""RNF115"",""RNF186"",""RNF187"",""RNF19B"",""RNF2"",""RNF220"",""RNPC3"",""RNPEP"",""ROR1"",""RORC"",""RPA2"",""RPAP2"",""RPE65"",""RPF1"",""RPL11"",""RPL5"",""RPRD2"",""RPS27"",""RPS6KA1"",""RPS6KC1"",""RPS8"",""RPTN"",""RRAGC"",""RRNAD1"",""RRP15"",""RSBN1"",""RSPO1"",""RSRP1"",""RTCA"",""RUNX3"",""RUSC1"",""RWDD3"",""RXFP4"",""RXRG"",""S100A1"",""S100A10"",""S100A12"",""S100A14"",""S100A16"",""S100A2"",""S100A3"",""S100A4"",""S100A5"",""S100A6"",""S100A7"",""S100A7A"",""S100A7L2"",""S100A8"",""S100A9"",""S100PBP"",""SAMD13"",""SARS"",""SASS6"",""SCAMP3"",""SCMH1"",""SCNM1"",""SCP2"",""SCYL3"",""SDC3"",""SDE2"",""SDHB"",""SDHC"",""SEC16B"",""SEC22B"",""SELE"",""SELENBP1"",""SELENOF"",""SELENON"",""SELL"",""SELP"",""SEMA4A"",""SEMA6C"",""SERBP1"",""SERINC2"",""SERPINC1"",""SERTAD4"",""SESN2"",""SETDB1"",""SETSIP"",""SF3A3"",""SF3B4"",""SFN"",""SFPQ"",""SFT2D2"",""SGIP1"",""SH2D1B"",""SH2D2A"",""SH3BGRL3"",""SH3D21"",""SH3GLB1"",""SHC1"",""SHE"",""SHISA4"",""SIKE1"",""SIPA1L2"",""SLAMF1"",""SLAMF6"",""SLAMF7"",""SLAMF8"",""SLAMF9"",""SLC16A1"",""SLC16A4"",""SLC19A2"",""SLC1A7"",""SLC22A15"",""SLC25A24"",""SLC25A44"",""SLC26A9"",""SLC27A3"",""SLC2A1"",""SLC30A1"",""SLC30A2"",""SLC30A7"",""SLC35A3"",""SLC35D1"",""SLC35F3"",""SLC39A1"",""SLC41A1"",""SLC44A3"",""SLC44A5"",""SLC45A3"",""SLC50A1"",""SLC5A9"",""SLC6A17"",""SLC6A9"",""SLC9A1"",""SLC9C2"",""SLFNL1"",""SMAP2"",""SMCP"",""SMG5"",""SMG7"",""SMIM12"",""SMPDL3B"",""SNAP47"",""SNAPIN"",""SNHG28"",""SNIP1"",""SNRNP40"",""SNRPE"",""SNX27"",""SNX7"",""SOAT1"",""SORT1"",""SOX13"",""SPAG17"",""SPATA1"",""SPATA17"",""SPATA45"",""SPATA46"",""SPATA6"",""SPHAR"",""SPOCD1"",""SPRR1A"",""SPRR1B"",""SPRR2A"",""SPRR2B"",""SPRR2D"",""SPRR2E"",""SPRR2F"",""SPRR2G"",""SPRR3"",""SPRR4"",""SPRR5"",""SPRTN"",""SPTA1"",""SRGAP2"",""SRGAP2B"",""SRGAP2C"",""SRP9"",""SRRM1"",""SRSF10"",""SRSF11"",""SRSF4"",""SSBP3"",""SSR2"",""SSX2IP"",""ST3GAL3"",""ST6GALNAC3"",""ST6GALNAC5"",""ST7L"",""STIL"",""STK40"",""STMN1"",""STUM"",""STX12"",""STX6"",""STXBP3"",""SUCO"",""SUSD4"",""SV2A"",""SVBP"",""SWT1"",""SYCP1"",""SYDE2"",""SYF2"",""SYNC"",""SYPL2"",""SYT11"",""SYT14"",""SYT2"",""SYT6"",""SYTL1"",""SZT2"",""TACSTD2"",""TADA1"",""TAF12"",""TAF13"",""TAF1A"",""TAF5L"",""TAGLN2"",""TAL1"",""TARBP1"",""TARS2"",""TAS1R2"",""TATDN3"",""TBX15"",""TBX19"",""TCEA3"",""TCHH"",""TCHHL1"",""TCTEX1D1"",""TCTEX1D4"",""TDRD10"",""TDRD5"",""TDRKH"",""TEDDM1"",""TEKT2"",""TENT5B"",""TENT5C"",""TESK2"",""TEX35"",""TEX38"",""TEX46"",""TEX50"",""TFAP2E"",""TGFB2"",""TGFBR3"",""THBS3"",""THEM4"",""THEM5"",""THEMIS2"",""THRAP3"",""TIE1"",""TIMM17A"",""TINAGL1"",""TIPRL"",""TLR5"",""TM2D1"",""TMCC2"",""TMCO1"",""TMCO4"",""TMED5"",""TMEM125"",""TMEM167B"",""TMEM183A"",""TMEM200B"",""TMEM206"",""TMEM222"",""TMEM234"",""TMEM269"",""TMEM35B"",""TMEM50A"",""TMEM53"",""TMEM54"",""TMEM56"",""TMEM56-RWDD3"",""TMEM59"",""TMEM61"",""TMEM63A"",""TMEM69"",""TMEM81"",""TMEM9"",""TMIGD3"",""TMOD4"",""TNFAIP8L2"",""TNFSF18"",""TNFSF4"",""TNN"",""TNNI1"",""TNNI3K"",""TNNT2"",""TNR"",""TOE1"",""TOMM40L"",""TOR1AIP1"",""TOR1AIP2"",""TOR3A"",""TP53BP2"",""TPM3"",""TPR"",""TRABD2B"",""TRAF3IP3"",""TRAF5"",""TRAPPC3"",""TRIM11"",""TRIM17"",""TRIM33"",""TRIM45"",""TRIM46"",""TRIM62"",""TRIM63"",""TRIM67"",""TRIT1"",""TRMT13"",""TRMT1L"",""TRNAU1AP"",""TRNP1"",""TROVE2"",""TSACC"",""TSEN15"",""TSHB"",""TSNAX"",""TSNAX-DISC1"",""TSPAN1"",""TSPAN2"",""TSSK3"",""TSTD1"",""TTC13"",""TTC22"",""TTC24"",""TTC39A"",""TTC4"",""TTF2"",""TTLL7"",""TUFT1"",""TUT4"",""TXLNA"",""TXNDC12"",""TXNIP"",""TYW3"",""UAP1"",""UBAP2L"",""UBE2Q1"",""UBE2U"",""UBL4B"",""UBQLN4"",""UBR4"",""UBXN10"",""UBXN11"",""UCHL5"",""UCK2"",""UHMK1"",""UQCRH"",""URB2"",""UROD"",""USF1"",""USH2A"",""USP1"",""USP21"",""USP24"",""USP33"",""USP48"",""UTP11"",""UTP25"",""VAMP4"",""VANGL1"",""VANGL2"",""VASH2"",""VAV3"",""VCAM1"",""VHLL"",""VPS45"",""VSIG8"",""VTCN1"",""VWA5B1"",""WARS2"",""WASF2"",""WDR26"",""WDR3"",""WDR47"",""WDR63"",""WDR77"",""WDR78"",""WDTC1"",""WLS"",""WNT2B"",""WNT3A"",""WNT4"",""WNT9A"",""XCL1"",""XCL2"",""XKR8"",""XPR1"",""YARS"",""YBX1"",""YIPF1"",""YOD1"",""YRDC"",""YTHDF2"",""YY1AP1"",""ZBED6"",""ZBTB37"",""ZBTB40"",""ZBTB41"",""ZBTB7B"",""ZBTB8A"",""ZBTB8B"",""ZBTB8OS"",""ZC3H11A"",""ZC3H12A"",""ZCCHC17"",""ZDHHC18"",""ZFP69"",""ZFYVE9"",""ZMPSTE24"",""ZMYM1"",""ZMYM4"",""ZMYND12"",""ZNF281"",""ZNF326"",""ZNF362"",""ZNF436"",""ZNF593"",""ZNF644"",""ZNF648"",""ZNF678"",""ZNF683"",""ZNF684"",""ZNF687"",""ZNF691"",""ZNF697"",""ZNHIT6"",""ZRANB2"",""ZSCAN20"",""ZYG11A"",""ZYG11B""]",,,,,,,,,,"[""ADAM15"",""AIDA"",""AIM2"",""AK2"",""AL020996.2"",""AL603832.3"",""ARHGEF2"",""AVPR1B"",""AZIN2"",""BCL2L15"",""BTBD19"",""C1orf112"",""C1orf35"",""C1orf52"",""C1orf61"",""CCDC190"",""CD5L"",""CHIT1"",""CLEC20A"",""CNIH3"",""CYP4B1"",""DAB1"",""DISP1"",""DNAJB4"",""DPH5"",""DYRK3"",""EBNA1BP2"",""FAM229A"",""FAM72A"",""FOXO6"",""FPGT"",""GIPC2"",""GPR89B"",""GSTM1"",""GSTM5"",""HDGF"",""HSD17B7"",""INKA2"",""ITGB3BP"",""KCNA2"",""KCNC4"",""KHDRBS1"",""KLHDC9"",""KNCN"",""KRTCAP2"",""LRRC8C"",""LSM10"",""METTL13"",""MRPL37"",""MTMR11"",""NPL"",""OLFML3"",""OMA1"",""OR10K1"",""OR6N1"",""PHC2"",""PIGC"",""PLA2G5"",""PLPP3"",""PM20D1"",""POLR3GL"",""PPCS"",""PRCC"",""RBM8A"",""S100A11"",""S100A13"",""S1PR1"",""SH2D5"",""SHCBP1L"",""SHISAL2A"",""SLC30A10"",""SMYD2"",""STPG1"",""STRIP1"",""TCEANC2"",""TMCO2"",""TMEM39B"",""TMEM79"",""UBE2T"",""UFC1"",""VPS72"",""ZFP69B"",""ZMYM6"",""ZSWIM5""]","[""AC091614.1"",""AC091614.2"",""AC092265.1"",""AC092783.1"",""AC092803.1"",""AC092803.2"",""AC092807.1"",""AC092807.2"",""AC092807.3"",""AC092810.3"",""AC092811.2"",""AC092813.1"",""AC092813.2"",""AC093117.1"",""AC093151.3"",""AC093152.1"",""AC093154.1"",""AC093424.1"",""AC095032.2"",""AC096541.1"",""AC096543.1"",""AC096543.2"",""AC096631.1"",""AC096637.1"",""AC096639.1"",""AC096642.1"",""AC096644.1"",""AC096644.4"",""AC097059.1"",""AC097059.2"",""AC097065.2"",""AC097066.1"",""AC098484.2"",""AC098657.2"",""AC098936.1"",""AC099062.1"",""AC099566.1"",""AC099567.1"",""AC099568.2"",""AC099670.3"",""AC099671.1"",""AC099673.1"",""AC099786.3"",""AC099791.2"",""AC099791.3"",""AC099792.1"",""AC099793.1"",""AC099794.1"",""AC099795.1"",""AC099796.1"",""AC103591.4"",""AC104169.1"",""AC104333.4"",""AC104453.1"",""AC104454.2"",""AC104457.2"",""AC104458.1"",""AC104463.1"",""AC104836.1"",""AC105275.1"",""AC105277.1"",""AC114485.1"",""AC116099.1"",""AC117945.2"",""AC119428.1"",""AC119428.2"",""AC119428.3"",""AC119674.1"",""AC133865.1"",""AC135803.1"",""AC138393.3"",""AC239798.2"",""AC239799.2"",""AC239800.2"",""AC239803.1"",""AC239803.2"",""AC239803.3"",""AC239804.1"",""AC239809.3"",""AC239859.3"",""AC241644.1"",""AC241644.2"",""AC242988.2"",""AC244021.1"",""AC244034.2"",""AC244035.2"",""AC244394.2"",""AC244453.1"",""AC245008.1"",""AC245014.1"",""AC245014.3"",""AC245100.1"",""AC245100.6"",""AC245100.7"",""AC245297.2"",""AC245297.3"",""AC245595.1"",""AC246785.3"",""AL008626.1"",""AL021068.1"",""AL021154.1"",""AL021920.1"",""AL021940.1"",""AL022100.2"",""AL022310.1"",""AL023495.1"",""AL023754.1"",""AL023755.1"",""AL031275.1"",""AL031281.3"",""AL031289.1"",""AL031430.1"",""AL031432.3"",""AL031432.5"",""AL031599.1"",""AL031985.4"",""AL033527.4"",""AL033527.5"",""AL033527.6"",""AL033530.1"",""AL035412.1"",""AL035415.1"",""AL035706.1"",""AL049825.1"",""AL050344.1"",""AL050344.2"",""AL078644.1"",""AL096803.3"",""AL109659.1"",""AL109810.2"",""AL109936.2"",""AL109936.6"",""AL109945.1"",""AL118511.2"",""AL121983.1"",""AL121983.2"",""AL121999.1"",""AL122019.1"",""AL133383.1"",""AL133516.1"",""AL136115.1"",""AL136115.2"",""AL136115.3"",""AL136234.1"",""AL136372.2"",""AL136376.1"",""AL136379.1"",""AL136382.1"",""AL136529.1"",""AL136985.1"",""AL136985.2"",""AL136985.3"",""AL136987.1"",""AL137027.1"",""AL137076.1"",""AL137793.1"",""AL137793.2"",""AL137796.1"",""AL137798.2"",""AL138899.1"",""AL138900.1"",""AL138900.2"",""AL138900.3"",""AL138927.1"",""AL139130.1"",""AL139158.2"",""AL139158.3"",""AL139220.2"",""AL139254.1"",""AL139254.2"",""AL139254.3"",""AL139286.2"",""AL139412.1"",""AL139420.1"",""AL139420.2"",""AL157402.1"",""AL157904.1"",""AL157944.1"",""AL160408.1"",""AL160408.3"",""AL160408.4"",""AL160408.5"",""AL160408.6"",""AL161636.1"",""AL161638.1"",""AL161638.2"",""AL161640.1"",""AL161734.1"",""AL161793.1"",""AL162399.1"",""AL162400.1"",""AL162400.2"",""AL162431.1"",""AL353052.1"",""AL353052.2"",""AL353072.2"",""AL353604.1"",""AL353622.2"",""AL353651.1"",""AL353651.2"",""AL353651.3"",""AL353681.1"",""AL353708.1"",""AL353708.2"",""AL353708.3"",""AL354702.2"",""AL354714.3"",""AL354872.2"",""AL354919.1"",""AL355306.2"",""AL355482.1"",""AL355482.2"",""AL355526.1"",""AL355981.1"",""AL356108.1"",""AL356272.1"",""AL356275.1"",""AL356289.1"",""AL356289.2"",""AL356310.1"",""AL356364.1"",""AL356441.1"",""AL356479.1"",""AL356488.2"",""AL357055.3"",""AL357078.1"",""AL357078.3"",""AL357500.1"",""AL357509.1"",""AL357793.1"",""AL357793.2"",""AL358072.1"",""AL358215.2"",""AL358393.1"",""AL358472.2"",""AL359081.1"",""AL359258.1"",""AL359504.2"",""AL359762.2"",""AL359834.1"",""AL359853.2"",""AL359853.3"",""AL359915.1"",""AL359962.2"",""AL359979.1"",""AL359979.2"",""AL360012.1"",""AL360013.2"",""AL360093.1"",""AL360219.1"",""AL360294.1"",""AL365181.1"",""AL365181.2"",""AL365181.4"",""AL365361.1"",""AL390036.1"",""AL390038.1"",""AL390066.1"",""AL390115.1"",""AL390243.1"",""AL390718.1"",""AL390729.1"",""AL390856.1"",""AL390957.1"",""AL391069.4"",""AL391497.1"",""AL391597.1"",""AL391645.1"",""AL392172.1"",""AL445193.2"",""AL445205.1"",""AL445218.1"",""AL445228.1"",""AL445228.2"",""AL445423.1"",""AL445426.1"",""AL445471.2"",""AL445483.1"",""AL445493.2"",""AL445648.1"",""AL445686.2"",""AL450043.1"",""AL450468.2"",""AL450990.1"",""AL451050.2"",""AL451060.1"",""AL451074.6"",""AL451081.1"",""AL451085.1"",""AL512271.1"",""AL512306.2"",""AL513283.1"",""AL513285.1"",""AL513314.1"",""AL513314.2"",""AL513327.3"",""AL513329.1"",""AL513348.1"",""AL513493.1"",""AL583804.1"",""AL583808.1"",""AL583808.2"",""AL583808.3"",""AL589765.6"",""AL589986.2"",""AL590408.1"",""AL590434.1"",""AL590644.1"",""AL590644.3"",""AL590648.3"",""AL590666.3"",""AL590683.1"",""AL591167.1"",""AL591463.1"",""AL591504.1"",""AL591888.1"",""AL591896.1"",""AL592182.3"",""AL592287.1"",""AL592295.3"",""AL592295.4"",""AL592309.2"",""AL592402.1"",""AL592431.1"",""AL592431.2"",""AL592435.1"",""AL592494.1"",""AL592494.3"",""AL596211.1"",""AL596218.1"",""AL596257.1"",""AL596275.2"",""AL603840.1"",""AL606468.1"",""AL606491.1"",""AL606519.1"",""AL645944.2"",""AL662907.3"",""AL663023.1"",""AL670729.3"",""AL683887.1"",""AL691459.1"",""AL691515.1"",""AL691515.2"",""AL691520.1"",""AL713852.1"",""AL713866.1"",""AL831711.1"",""AL844170.1"",""AL929288.1"",""AL954650.1"",""BLACAT1"",""BX005019.1"",""BX470102.2"",""C1orf137"",""C1orf140"",""C1orf143"",""C1orf220"",""CYMP-AS1"",""CYP4A22-AS1"",""ERVMER61-1"",""FALEC"",""FLVCR1-DT"",""FO393419.2"",""FOXD2-AS1"",""FP700111.1"",""IBA57-DT"",""LINC00184"",""LINC00210"",""LINC00272"",""LINC00303"",""LINC00467"",""LINC00538"",""LINC00623"",""LINC00626"",""LINC00628"",""LINC00862"",""LINC00970"",""LINC01031"",""LINC01035"",""LINC01132"",""LINC01133"",""LINC01135"",""LINC01136"",""LINC01140"",""LINC01141"",""LINC01142"",""LINC01144"",""LINC01160"",""LINC01221"",""LINC01222"",""LINC01307"",""LINC01343"",""LINC01344"",""LINC01349"",""LINC01350"",""LINC01351"",""LINC01352"",""LINC01353"",""LINC01354"",""LINC01355"",""LINC01356"",""LINC01358"",""LINC01360"",""LINC01361"",""LINC01362"",""LINC01364"",""LINC01525"",""LINC01555"",""LINC01562"",""LINC01632"",""LINC01633"",""LINC01635"",""LINC01641"",""LINC01645"",""LINC01648"",""LINC01649"",""LINC01650"",""LINC01653"",""LINC01654"",""LINC01655"",""LINC01657"",""LINC01661"",""LINC01675"",""LINC01676"",""LINC01677"",""LINC01680"",""LINC01681"",""LINC01682"",""LINC01685"",""LINC01686"",""LINC01688"",""LINC01691"",""LINC01693"",""LINC01696"",""LINC01698"",""LINC01699"",""LINC01701"",""LINC01702"",""LINC01703"",""LINC01704"",""LINC01705"",""LINC01707"",""LINC01708"",""LINC01709"",""LINC01710"",""LINC01712"",""LINC01715"",""LINC01717"",""LINC01719"",""LINC01720"",""LINC01724"",""LINC01725"",""LINC01731"",""LINC01732"",""LINC01735"",""LINC01736"",""LINC01737"",""LINC01738"",""LINC01739"",""LINC01740"",""LINC01741"",""LINC01744"",""LINC01745"",""LINC01748"",""LINC01750"",""LINC01753"",""LINC01755"",""LINC01756"",""LINC01757"",""LINC01758"",""LINC01760"",""LINC01761"",""LINC01762"",""LINC01763"",""LINC01765"",""LINC01767"",""LINC01768"",""LINC01771"",""LINC01774"",""LINC01776"",""LINC01778"",""LINC01779"",""LINC01780"",""LINC01781"",""LINC01787"",""LINC01788"",""LINC02238"",""LINC02257"",""LINC02474"",""LINC02567"",""LINC02574"",""LINC02591"",""MDS2"",""MIR137HG"",""MIR181A1HG"",""MIR29B2CHG"",""MIR4422HG"",""OVAAL"",""PACERR"",""RASAL2-AS1"",""RNU6ATAC35P"",""RTCA-AS1"",""SLC2A1-AS1"",""SNHG28"",""TMEM78"",""Z97198.1"",""Z97200.1"",""Z98257.1"",""Z98257.2""]",,,,,"[""DNase"",""Enhancer"",""HAR"",""Tommerup_TADanno""]","[""DNase""]",,False,"[""CROCC""]",1428,[1262],[8.84e-01],714,0,166,548,0.0,0.232,0.768,772,[679],[8.80e-01],386,0,93,293,0.0,0.241,0.759,656,[583],[8.89e-01],328,0,73,255,0.0,0.223,0.777,,,"[1622,0,0,0,0,84,0,0,0,1]"
chr1:50768786,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_977""",1.0,{},"[""depth"",""manta""]","""chr1""",,,51157249,,"[""PE"",""RD"",""SR""]",,,388463,"""DEL""",,"[""C1orf185"",""CDKN2C"",""FAF1""]",,,,,,,,,,,,,,,,"[""DNase"",""Enhancer""]","[""DNase"",""Tommerup_TADanno""]",,False,,1428,[2],[1.40e-03],714,712,2,0,0.997,0.0028,0.0,772,[0],[0.00e+00],386,386,0,0,1.0,0.0,0.0,656,[2],[3.05e-03],328,326,2,0,0.994,0.0061,0.0,,,"[3,0,0,0,0,0,0,0,0,0]"
chr1:145510587,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_3101""",999.0,{},"[""depth""]","""chr1""",,,145816793,,"[""RD""]",,,306206,"""DEL""",,"[""CD160"",""GPR89A"",""PDZK1"",""RNF115""]",,,,,,,,,,,,,,,,"[""DNase"",""Enhancer""]","[""Tommerup_TADanno""]",,False,,1428,[2],[1.40e-03],714,712,2,0,0.997,0.0028,0.0,772,[2],[2.59e-03],386,384,2,0,0.995,0.00518,0.0,656,[0],[0.00e+00],328,328,0,0,1.0,0.0,0.0,"""gnomAD-SV_v2.1_DEL_1_7679""",0.000599,"[4,0,0,0,0,0,0,0,0,0]"
chr1:145822587,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_3107""",999.0,{},"[""depth""]","""chr1""",,,146064587,,"[""RD""]",,,242000,"""DEL""",,"[""AC243547.3"",""ANKRD34A"",""ANKRD35"",""HJV"",""ITGA10"",""LIX1L"",""NUDT17"",""PEX11B"",""PIAS3"",""POLR3C"",""RNF115"",""TXNIP""]",,,,,,,,,,"[""POLR3GL"",""RBM8A""]","[""LINC01719""]",,,,,"[""DNase"",""Enhancer""]","[""Tommerup_TADanno""]",,False,,1428,[1],[7.00e-04],714,713,1,0,0.999,0.0014,0.0,772,[1],[1.30e-03],386,385,1,0,0.997,0.00259,0.0,656,[0],[0.00e+00],328,328,0,0,1.0,0.0,0.0,"""gnomAD-SV_v2.1_DEL_1_7663""",0.000141,"[12,0,0,0,0,2,0,0,0,0]"
chr1:204145612,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_4603""",999.0,{},"[""depth"",""manta""]","""chr1""",,,204221287,,"[""PE"",""RD"",""SR""]",,,75675,"""DEL""",,"[""ETNK2"",""GOLT1A"",""KISS1"",""REN""]",,,,,,,,,,,,,,"[""PLEKHA6""]",,"[""DNase""]","[""Tommerup_TADanno""]",,False,,1428,[2],[1.40e-03],714,712,2,0,0.997,0.0028,0.0,772,[2],[2.59e-03],386,384,2,0,0.995,0.00518,0.0,656,[0],[0.00e+00],328,328,0,0,1.0,0.0,0.0,,,"[4,0,0,0,0,0,0,1,0,0]"


In [19]:
rows.aggreate(hl.agg.count_where(rows.info.get()))

AttributeError: Table instance has no field, method, or property 'aggreate'
    Did you mean:
        Table method: 'aggregate'

In [20]:
gene_cols = [gene_col for gene_col in rows.info if gene_col.startswith('PROTEIN_CODING__')]
r1 = rows.annotate(gene_info=hl.filter(lambda x: hl.is_defined(x.genes),
                [hl.struct(genes=rows.info.get(col), predicted_consequence=col.split('__')[-1])
                 for col in gene_cols if rows.info.get(col).dtype==hl.dtype('array<str>')]))
r1.show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 70_level_0
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF,gene_info
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64,"array<struct{genes: array<str>, predicted_consequence: str}>"
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,17000,,"[""RD""]",,,7000,"""DUP""",,,,,,,,,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[370],[2.59e-01],714.0,415.0,228.0,71.0,0.581,0.319,0.0994,772,[214],[2.77e-01],386.0,216.0,126.0,44.0,0.56,0.326,0.114,656,[156],[2.38e-01],328.0,199.0,102.0,27.0,0.607,0.311,0.0823,,,"[([""OR4F5""],""NEAREST_TSS"")]"
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_2""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,53500,,"[""BAF"",""RD""]",,,43500,"""DUP""",,,,,,,"[""FAM138A"",""MIR1302-2HG""]",,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[70],[4.90e-02],714.0,649.0,60.0,5.0,0.909,0.084,0.007,772,[46],[5.96e-02],386.0,344.0,38.0,4.0,0.891,0.0984,0.0104,656,[24],[3.66e-02],328.0,305.0,22.0,1.0,0.93,0.0671,0.00305,,,"[([""OR4F5""],""NEAREST_TSS"")]"
chr1:10602,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_1""",461.0,"{""UNRESOLVED"",""UNSTABLE_AF_PCRMINUS""}","[""manta""]","""chr12""",,,10602,10546.0,"[""SR""]",,"""+-""",-1,"""BND""","""SINGLE_ENDER_+-""",,,,,,,,,,,,,,,,,,,,False,,1428,[88],[6.16e-02],714.0,626.0,88.0,0.0,0.877,0.123,0.0,772,[51],[6.61e-02],386.0,335.0,51.0,0.0,0.868,0.132,0.0,656,[37],[5.64e-02],328.0,291.0,37.0,0.0,0.887,0.113,0.0,"""gnomAD-SV_v2.1_BND_1_1""",0.00679,[]
chr1:41950,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_3""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,52000,,"[""BAF"",""RD""]",,,10050,"""DUP""",,,,,,,,,,,,,,,,,,,,"[""OR4F5""]",True,,1428,[28],[1.96e-02],714.0,687.0,26.0,1.0,0.962,0.0364,0.0014,772,[15],[1.94e-02],386.0,371.0,15.0,0.0,0.961,0.0389,0.0,656,[13],[1.98e-02],328.0,316.0,11.0,1.0,0.963,0.0335,0.00305,"""gnomAD-SV_v2.1_DUP_1_1""",0.069,"[([""OR4F5""],""NEAREST_TSS"")]"
chr1:44000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_4""",999.0,"{""UNSTABLE_AF_PCRMINUS"",""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,66000,,"[""RD""]",,,22000,"""DUP""",,,,,,,,"[""OR4F5""]",,,,,,,,,,"[""DNase""]",,,False,,1428,[96],[6.72e-02],714.0,641.0,50.0,23.0,0.898,0.07,0.0322,772,[54],[6.99e-02],386.0,345.0,28.0,13.0,0.894,0.0725,0.0337,656,[42],[6.40e-02],328.0,296.0,22.0,10.0,0.902,0.0671,0.0305,,,"[([""OR4F5""],""DUP_PARTIAL"")]"
chr1:44250,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_5""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,116000,,"[""BAF"",""RD""]",,,71750,"""DUP""",,,,,,"[""OR4F5""]","[""AL627309.3""]",,"[""AL627309.1""]",,,,,,,,,"[""DNase""]",,,False,,1428,[82],[5.74e-02],714.0,646.0,54.0,14.0,0.905,0.0756,0.0196,772,[43],[5.57e-02],386.0,351.0,27.0,8.0,0.909,0.0699,0.0207,656,[39],[5.95e-02],328.0,295.0,27.0,6.0,0.899,0.0823,0.0183,,,"[([""OR4F5""],""COPY_GAIN"")]"
chr1:51400,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,64000,,"[""RD""]",,,12600,"""DEL""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,1428,[306],[2.14e-01],714.0,443.0,236.0,35.0,0.62,0.331,0.049,772,[156],[2.02e-01],386.0,246.0,124.0,16.0,0.637,0.321,0.0415,656,[150],[2.29e-01],328.0,197.0,112.0,19.0,0.601,0.341,0.0579,,,"[([""OR4F5""],""NEAREST_TSS"")]"
chr1:52600,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_1""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,58000,,"[""RD""]",,,5400,"""CNV""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,,"[([""OR4F5""],""NEAREST_TSS"")]"
chr1:66234,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_2""",807.0,"{""UNRESOLVED""}","[""manta""]","""chr19""",,,66234,108051.0,"[""PE""]",,"""-+""",-1,"""BND""","""SINGLE_ENDER_-+""",,,,,,,,,,,,,,,,,,,,False,,1428,[236],[1.65e-01],714.0,514.0,164.0,36.0,0.72,0.23,0.0504,772,[131],[1.70e-01],386.0,275.0,91.0,20.0,0.712,0.236,0.0518,656,[105],[1.60e-01],328.0,239.0,73.0,16.0,0.729,0.223,0.0488,,,[]
chr1:66350,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_2""",774.0,"{""FAIL_OUTLIER_REMOVAL""}","[""manta""]","""chr1""",,,66427,,"[""SR""]",,,77,"""DEL""",,,,,,,,,,,,"[""OR4F5""]",,,,,,,,,False,,1428,[2],[1.40e-03],714.0,712.0,2.0,0.0,0.997,0.0028,0.0,772,[1],[1.30e-03],386.0,385.0,1.0,0.0,0.997,0.00259,0.0,656,[1],[1.52e-03],328.0,327.0,1.0,0.0,0.997,0.00305,0.0,"""gnomAD-SV_v2.1_DEL_1_4""",0.000555,"[([""OR4F5""],""INTRONIC"")]"


In [21]:
rows.filter(rows.rsid=='CMG.phase1_CMG_INS_chr1_3').show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64
chr1:883440,"[""N"",""<INS:ME:ALU>""]","""CMG.phase1_CMG_INS_chr1_3""",325.0,"{""FAIL_minGQ""}","[""melt""]","""chr1""",,,883491,,"[""SR""]",,,281,"""INS""",,,,,,,,,,,,,,,,,,,,"[""SAMD11""]",True,,1428,[1],[7.00e-04],714,713,1,0,0.999,0.0014,0.0,772,[0],[0.00e+00],386,386,0,0,1.0,0.0,0.0,656,[1],[1.52e-03],328,327,1,0,0.997,0.00305,0.0,"""gnomAD-SV_v2.1_INS_1_13""",0.000323


In [23]:
rows.filter(rows.filters.contains('FAIL_minGQ')).show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64
chr1:52600,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_1""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,58000,,"[""RD""]",,,5400,"""CNV""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,
chr1:67000,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_2""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,94000,,"[""RD""]",,,27000,"""CNV""",,,,,,,,,,"[""OR4F5""]","[""AL627309.3"",""AL627309.1""]",,,,,,,"[""DNase""]",,,False,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,
chr1:132500,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_3""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,140000,,"[""BAF"",""RD""]",,,7500,"""CNV""",,,,,,,,,,,"[""AL627309.1""]",,,,,,,"[""DNase""]",,"[""SAMD11""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,
chr1:181000,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_4""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,190000,,"[""BAF"",""RD""]",,,9000,"""CNV""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""SAMD11""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,
chr1:181387,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_6""",637.0,"{""FAIL_minGQ""}","[""manta""]","""chr1""",,,181451,,"[""SR""]",,,64,"""DEL""",,,,,,,,,,,,,,,,,,,,"[""SAMD11""]",True,,1428,[5],[3.50e-03],714.0,709.0,5.0,0.0,0.993,0.007,0.0,772,[4],[5.18e-03],386.0,382.0,4.0,0.0,0.99,0.0104,0.0,656,[1],[1.52e-03],328.0,327.0,1.0,0.0,0.997,0.00305,0.0,,
chr1:181725,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_7""",624.0,"{""FAIL_minGQ""}","[""manta""]","""chr1""",,,181824,,"[""SR""]",,,99,"""DEL""",,,,,,,,,,,,,,,,,,,,"[""SAMD11""]",True,,1428,[2],[1.40e-03],714.0,712.0,2.0,0.0,0.997,0.0028,0.0,772,[0],[0.00e+00],386.0,386.0,0.0,0.0,1.0,0.0,0.0,656,[2],[3.05e-03],328.0,326.0,2.0,0.0,0.994,0.0061,0.0,,
chr1:196500,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_5""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,202500,,"[""BAF"",""RD""]",,,6000,"""CNV""",,,,,,,,,,,,,,,,,,,,"[""SAMD11""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,
chr1:284000,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_6""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,289666,,"[""RD""]",,,5666,"""CNV""",,,,,,,,,,,,,,,,,,,,"[""SAMD11""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,
chr1:823278,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_13""",674.0,"{""FAIL_minGQ""}","[""wham""]","""chr1""",,,823491,,"[""SR""]",,,213,"""DEL""",,,,,,,,,,,,,,,,,,,"[""DNase""]","[""SAMD11""]",True,,1428,[5],[3.50e-03],714.0,709.0,5.0,0.0,0.993,0.007,0.0,772,[2],[2.59e-03],386.0,384.0,2.0,0.0,0.995,0.00518,0.0,656,[3],[4.57e-03],328.0,325.0,3.0,0.0,0.991,0.00915,0.0,"""gnomAD-SV_v2.1_DEL_1_42""",0.0575
chr1:863579,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_12""",461.0,"{""FAIL_minGQ""}","[""manta""]","""chr8""",,,863579,293805.0,"[""PE""]",,"""--""",-1,"""BND""","""SINGLE_ENDER_--""",,,,,,,,,,,,,,,,,,,,False,,1428,[28],[1.96e-02],714.0,686.0,28.0,0.0,0.961,0.0392,0.0,772,[17],[2.20e-02],386.0,369.0,17.0,0.0,0.956,0.044,0.0,656,[11],[1.68e-02],328.0,317.0,11.0,0.0,0.966,0.0335,0.0,,


In [24]:
rows.filter(rows.filters.contains('PASS')).show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64


In [30]:
filters = hl.array(hl.filter(lambda x: x != 'UNRESOLVED', rows.filters))
filters.dtype

dtype('array<str>')

In [49]:
mt2.rows().show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 70_level_0
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF,samples
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64,"array<struct{id: str, gq: int32, num_alt: int32, cn: int32}>"
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,17000,,"[""RD""]",,,7000,"""DUP""",,,,,,,,,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[370],[2.59e-01],714,415,228,71,0.581,0.319,0.0994,772,[214],[2.77e-01],386,216,126,44,0.56,0.326,0.114,656,[156],[2.38e-01],328,199,102,27,0.607,0.311,0.0823,,,"[(""HK010_0026"",145,1,3),(""HK015_0037"",37,1,3),(""HK031_0079"",999,1,3),(""HK031_0080"",37,2,4),(""HK069-0178_1"",104,1,3),(""HK072-001_1"",999,1,3),(""HK072-003_1"",999,1,3),(""HK073-001_1"",110,1,3),(""HK075-001_1"",999,1,3),(""HK087-001_1"",92,1,3),(""HK101-001_1"",105,2,4),(""HK102-002_1"",54,2,4),(""HK104-003_1"",1,1,3),(""HK003_0009"",28,1,3),(""HK012_0031_2"",116,1,3),(""HK022_0060"",17,2,4),(""HK024_0067"",44,1,3),(""HK047_0116"",67,2,4),(""HK053-0134_1"",999,1,3),(""HK087-003_1"",87,1,3),(""HK090-002_1"",999,1,3),(""HK101-002_1"",999,1,3),(""HK101-003_1"",89,2,4),(""HK106-003_1"",999,1,3),(""HK003_0007"",999,1,3),(""HK003_0008"",53,1,3),(""HK009_0025"",999,1,3),(""HK010_0028"",5,1,3),(""HK011_0029"",1,2,4),(""HK011_0030"",5,1,3),(""HK018_0049"",8,2,4),(""HK022_0061"",140,1,3),(""HK024_0065"",999,1,3),(""HK025-0068_3"",48,1,3),(""HK029-0076_2"",999,1,3),(""HK035_0088"",999,1,3),(""HK047_0117"",137,2,4),(""HK047_0118"",999,1,3),(""HK071-002"",146,1,3),(""HK106-001_1"",999,1,3)]"
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_2""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,53500,,"[""BAF"",""RD""]",,,43500,"""DUP""",,,,,,,"[""FAM138A"",""MIR1302-2HG""]",,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[70],[4.90e-02],714,649,60,5,0.909,0.084,0.007,772,[46],[5.96e-02],386,344,38,4,0.891,0.0984,0.0104,656,[24],[3.66e-02],328,305,22,1,0.93,0.0671,0.00305,,,"[(""HK015_0037"",999,2,4),(""HK031_0080"",104,1,3),(""HK104-003_1"",36,1,3),(""HK088-003_1"",999,1,3),(""HK102-001_1"",141,1,3),(""OUN_HK120_2_1"",999,1,3),(""HK025-0068_3"",999,1,3),(""HK029_0076"",999,1,3)]"
chr1:10602,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_1""",461.0,"{""UNRESOLVED"",""UNSTABLE_AF_PCRMINUS""}","[""manta""]","""chr12""",,,10602,10546.0,"[""SR""]",,"""+-""",-1,"""BND""","""SINGLE_ENDER_+-""",,,,,,,,,,,,,,,,,,,,False,,1428,[88],[6.16e-02],714,626,88,0,0.877,0.123,0.0,772,[51],[6.61e-02],386,335,51,0,0.868,0.132,0.0,656,[37],[5.64e-02],328,291,37,0,0.887,0.113,0.0,"""gnomAD-SV_v2.1_BND_1_1""",0.00679,"[(""HK072-001_1"",999,1,NA),(""HK102-002_1"",1,1,NA),(""HK104-003_1"",1,1,NA),(""HK047_0116"",18,1,NA),(""HK088-003_1"",523,1,NA),(""HK095-001_1"",1,1,NA),(""OUN_HK120_2_1"",1,1,NA),(""OUN_HK120_3_1"",1,1,NA),(""HK008_0022_3"",1,1,NA),(""HK010_0028"",1,1,NA),(""HK029-0076_3"",1,1,NA),(""HK044_0111"",782,1,NA),(""OUN_HK120_1_1"",1,1,NA)]"
chr1:41950,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_3""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,52000,,"[""BAF"",""RD""]",,,10050,"""DUP""",,,,,,,,,,,,,,,,,,,,"[""OR4F5""]",True,,1428,[28],[1.96e-02],714,687,26,1,0.962,0.0364,0.0014,772,[15],[1.94e-02],386,371,15,0,0.961,0.0389,0.0,656,[13],[1.98e-02],328,316,11,1,0.963,0.0335,0.00305,"""gnomAD-SV_v2.1_DUP_1_1""",0.069,"[(""HK090-003_1"",116,1,3),(""HK029-0076_2"",999,1,3),(""HK036_0091"",999,1,3),(""HK036_0093"",999,1,3)]"
chr1:44000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_4""",999.0,"{""UNSTABLE_AF_PCRMINUS"",""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,66000,,"[""RD""]",,,22000,"""DUP""",,,,,,,,"[""OR4F5""]",,,,,,,,,,"[""DNase""]",,,False,,1428,[96],[6.72e-02],714,641,50,23,0.898,0.07,0.0322,772,[54],[6.99e-02],386,345,28,13,0.894,0.0725,0.0337,656,[42],[6.40e-02],328,296,22,10,0.902,0.0671,0.0305,,,"[(""HK031_0080"",36,1,3),(""HK088-003_1"",61,1,3),(""HK003_0007"",999,1,3),(""HK003_0008"",999,1,3),(""HK029-0076_2"",999,1,3),(""HK036_0091"",92,1,3)]"
chr1:44250,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_5""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,116000,,"[""BAF"",""RD""]",,,71750,"""DUP""",,,,,,"[""OR4F5""]","[""AL627309.3""]",,"[""AL627309.1""]",,,,,,,,,"[""DNase""]",,,False,,1428,[82],[5.74e-02],714,646,54,14,0.905,0.0756,0.0196,772,[43],[5.57e-02],386,351,27,8,0.909,0.0699,0.0207,656,[39],[5.95e-02],328,295,27,6,0.899,0.0823,0.0183,,,"[(""HK088-003_1"",999,1,3),(""HK003_0007"",999,1,3),(""HK029-0076_2"",999,1,3),(""HK036_0091"",114,1,3),(""HK071-002"",999,1,3)]"
chr1:51400,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,64000,,"[""RD""]",,,12600,"""DEL""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,1428,[306],[2.14e-01],714,443,236,35,0.62,0.331,0.049,772,[156],[2.02e-01],386,246,124,16,0.637,0.321,0.0415,656,[150],[2.29e-01],328,197,112,19,0.601,0.341,0.0579,,,"[(""HK015_0037"",999,1,1),(""HK069-0178_1"",125,1,1),(""HK072-002_1"",999,2,0),(""HK075-001_1"",1,1,1),(""HK075-003_1"",156,2,0),(""HK087-001_1"",999,2,0),(""HK022_0060"",1,1,1),(""HK031_0078"",1,1,1),(""HK047_0116"",999,2,0),(""HK069-0177_1"",999,1,1),(""HK069-0179_1"",112,1,1),(""HK073-002_1"",70,1,1),(""HK075-002_1"",137,1,1),(""HK087-002_1"",999,2,0),(""HK088-002_1"",999,1,1),(""HK095-001_1"",999,1,1),(""HK095-003_1"",999,1,1),(""HK101-002_1"",999,1,1),(""HK102-003_1"",142,1,1),(""HK103-001_1"",1,1,1),(""HK103-002_1"",56,1,1),(""OUN_HK120_3_1"",1,1,1),(""HK007_0019"",19,1,1),(""HK007_0020"",1,1,1),(""HK008_0022_2"",1,1,1),(""HK008_0022"",108,1,1),(""HK009_0023"",1,1,1),(""HK009_0024"",48,1,1),(""HK009_0025"",1,1,1),(""HK010_0027"",124,1,1),(""HK010_0028"",124,1,1),(""HK018_0047"",1,1,1),(""HK028_0073"",999,2,0),(""HK044_0110"",999,1,1),(""HK047_0117"",999,2,0),(""HK047_0118"",1,1,1),(""HK051_0127"",67,1,1)]"
chr1:66234,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_2""",807.0,"{""UNRESOLVED""}","[""manta""]","""chr19""",,,66234,108051.0,"[""PE""]",,"""-+""",-1,"""BND""","""SINGLE_ENDER_-+""",,,,,,,,,,,,,,,,,,,,False,,1428,[236],[1.65e-01],714,514,164,36,0.72,0.23,0.0504,772,[131],[1.70e-01],386,275,91,20,0.712,0.236,0.0518,656,[105],[1.60e-01],328,239,73,16,0.729,0.223,0.0488,,,"[(""HK010_0026"",688,1,NA),(""HK015_0037"",76,2,NA),(""HK031_0079"",381,1,NA),(""HK031_0080"",76,1,NA),(""HK003_0009"",487,2,NA),(""HK012_0031_2"",306,1,NA),(""HK022_0060"",815,1,NA),(""HK024_0067"",90,1,NA),(""HK031_0078"",962,1,NA),(""HK047_0116"",851,2,NA),(""HK003_0007"",141,2,NA),(""HK003_0008"",425,2,NA),(""HK006_0016"",283,1,NA),(""HK006_0017"",854,1,NA),(""HK006_0018"",926,1,NA),(""HK007_0019"",854,1,NA),(""HK007_0020"",354,1,NA),(""HK007_0021"",854,1,NA),(""HK008_0022_2"",141,1,NA),(""HK008_0022_3"",354,1,NA),(""HK008_0022"",212,1,NA),(""HK009_0023"",710,2,NA),(""HK009_0024"",854,2,NA),(""HK009_0025"",283,1,NA),(""HK010_0027"",854,1,NA),(""HK010_0028"",854,1,NA),(""HK011_0029"",496,1,NA),(""HK011_0030_3"",496,1,NA),(""HK011_0030"",567,1,NA),(""HK012_0031_3"",1,1,NA),(""HK012_0031"",283,1,NA),(""HK018_0047"",212,1,NA),(""HK018_0048"",1,1,NA),(""HK018_0049"",639,2,NA),(""HK022_0059"",212,2,NA),(""HK022_0061"",71,2,NA),(""HK024_0065"",567,1,NA),(""HK024_0066"",639,2,NA),(""HK025-0068_2"",639,1,NA),(""HK025-0068_3"",141,2,NA),(""HK025_0068"",141,2,NA),(""HK028_0073"",926,1,NA),(""HK028_0074"",1,1,NA),(""HK028_0075"",639,1,NA),(""HK029-0076_2"",854,1,NA),(""HK029-0076_3"",854,1,NA),(""HK029_0076"",283,1,NA),(""HK035_0088"",782,1,NA),(""HK036_0092"",496,1,NA),(""HK036_0093"",710,1,NA),(""HK037_0094"",854,1,NA),(""HK044_0109"",141,1,NA),(""HK044_0110"",354,2,NA),(""HK044_0111"",1,1,NA),(""HK047_0117"",425,1,NA),(""HK047_0118"",496,2,NA),(""HK051_0127"",926,1,NA),(""HK051_0128"",1,2,NA),(""HK051_0129"",141,1,NA),(""HK071-001"",496,1,NA),(""HK071-002"",639,1,NA),(""HK071-004"",71,2,NA),(""OUN_HK120_1_1"",1,1,NA)]"
chr1:66350,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_2""",774.0,"{""FAIL_OUTLIER_REMOVAL""}","[""manta""]","""chr1""",,,66427,,"[""SR""]",,,77,"""DEL""",,,,,,,,,,,,"[""OR4F5""]",,,,,,,,,False,,1428,[2],[1.40e-03],714,712,2,0,0.997,0.0028,0.0,772,[1],[1.30e-03],386,385,1,0,0.997,0.00259,0.0,656,[1],[1.52e-03],328,327,1,0,0.997,0.00305,0.0,"""gnomAD-SV_v2.1_DEL_1_4""",0.000555,"[(""HK031_0080"",99,1,7)]"
chr1:66531,"[""N"",""<INS>""]","""CMG.phase1_CMG_INS_chr1_1""",400.0,{},"[""manta""]","""chr1""",,,66576,,"[""SR""]",,,59,"""INS""",,,,,,,,,,,,"[""OR4F5""]",,,,,,,,,False,,1428,[18],[1.26e-02],714,696,18,0,0.975,0.0252,0.0,772,[9],[1.17e-02],386,377,9,0,0.977,0.0233,0.0,656,[9],[1.37e-02],328,319,9,0,0.973,0.0274,0.0,"""gnomAD-SV_v2.1_INS_1_3""",0.000234,"[(""HK015_0037"",49,1,NA),(""HK069-0178_1"",1,1,NA),(""HK053-0134_1"",1,1,NA),(""HK006_0018"",1,1,NA),(""HK008_0022"",1,1,NA),(""HK009_0023"",1,1,NA),(""HK009_0024"",1,1,NA),(""HK018_0048"",1,1,NA)]"


In [52]:
mt2.rows().samples.filter(lambda x: x.cn>4).show()

locus,alleles,Unnamed: 2_level_0
locus<GRCh38>,array<str>,"array<struct{id: str, gq: int32, num_alt: int32, cn: int32}>"
chr1:10000,"[""N"",""<DUP>""]",[]
chr1:10000,"[""N"",""<DUP>""]",[]
chr1:10602,"[""N"",""<BND>""]",[]
chr1:41950,"[""N"",""<DUP>""]",[]
chr1:44000,"[""N"",""<DUP>""]",[]
chr1:44250,"[""N"",""<DUP>""]",[]
chr1:51400,"[""N"",""<DEL>""]",[]
chr1:66234,"[""N"",""<BND>""]",[]
chr1:66350,"[""N"",""<DEL>""]","[(""HK031_0080"",99,1,7)]"
chr1:66531,"[""N"",""<INS>""]",[]


In [44]:
alleles = mt1.GT[0] + mt1.GT[1]
alleles.show()

Unnamed: 0_level_0,Unnamed: 1_level_0,'HK010_0026','HK015_0037','HK031_0079','HK031_0080','HK069-0178_1'
locus,alleles,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
locus<GRCh38>,array<str>,int32,int32,int32,int32,int32
chr1:10000,"[""N"",""<DUP>""]",1.0,1.0,1.0,2.0,1.0
chr1:10000,"[""N"",""<DUP>""]",0.0,2.0,0.0,1.0,0.0
chr1:10602,"[""N"",""<BND>""]",0.0,0.0,0.0,0.0,0.0
chr1:41950,"[""N"",""<DUP>""]",0.0,0.0,0.0,0.0,0.0
chr1:44000,"[""N"",""<DUP>""]",0.0,0.0,0.0,1.0,0.0
chr1:44250,"[""N"",""<DUP>""]",0.0,0.0,0.0,0.0,0.0
chr1:51400,"[""N"",""<DEL>""]",0.0,1.0,0.0,0.0,1.0
chr1:52600,"[""N"",""<CNV>""]",,,,,
chr1:66234,"[""N"",""<BND>""]",1.0,2.0,1.0,1.0,0.0
chr1:66350,"[""N"",""<DEL>""]",0.0,0.0,0.0,1.0,0.0


In [37]:
rows = mt1.rows()
rows = rows.annotate(_filters=hl.array(hl.filter(lambda x: x != 'UNRESOLVED', rows.filters)))

In [38]:
rows = rows.annotate(__filters=hl.if_else(hl.len(rows._filters)>0, rows._filters, hl.null(hl.dtype('array<str>'))))

In [39]:
rows.show()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,info,Unnamed: 70_level_0,Unnamed: 71_level_0
locus,alleles,rsid,qual,filters,ALGORITHMS,CHR2,CPX_INTERVALS,CPX_TYPE,END,END2,EVIDENCE,SOURCE,STRANDS,SVLEN,SVTYPE,UNRESOLVED_TYPE,PROTEIN_CODING__LOF,LINCRNA__LOF,PROTEIN_CODING__DUP_LOF,LINCRNA__DUP_LOF,PROTEIN_CODING__COPY_GAIN,LINCRNA__COPY_GAIN,PROTEIN_CODING__DUP_PARTIAL,LINCRNA__DUP_PARTIAL,PROTEIN_CODING__MSV_EXON_OVR,LINCRNA__MSV_EXON_OVR,PROTEIN_CODING__INTRONIC,LINCRNA__INTRONIC,PROTEIN_CODING__INV_SPAN,LINCRNA__INV_SPAN,PROTEIN_CODING__UTR,LINCRNA__UTR,NONCODING_SPAN,NONCODING_BREAKPOINT,PROTEIN_CODING__NEAREST_TSS,PROTEIN_CODING__INTERGENIC,PROTEIN_CODING__PROMOTER,AN,AC,AF,N_BI_GENOS,N_HOMREF,N_HET,N_HOMALT,FREQ_HOMREF,FREQ_HET,FREQ_HOMALT,MALE_AN,MALE_AC,MALE_AF,MALE_N_BI_GENOS,MALE_N_HOMREF,MALE_N_HET,MALE_N_HOMALT,MALE_FREQ_HOMREF,MALE_FREQ_HET,MALE_FREQ_HOMALT,FEMALE_AN,FEMALE_AC,FEMALE_AF,FEMALE_N_BI_GENOS,FEMALE_N_HOMREF,FEMALE_N_HET,FEMALE_N_HOMALT,FEMALE_FREQ_HOMREF,FEMALE_FREQ_HET,FEMALE_FREQ_HOMALT,gnomAD_V2_SVID,gnomAD_V2_AF,_filters,__filters
locus<GRCh38>,array<str>,str,float64,set<str>,array<str>,str,array<str>,str,int32,int32,array<str>,str,str,int32,str,str,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,array<str>,bool,array<str>,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,int32,array<int32>,array<float64>,int32,int32,int32,int32,float64,float64,float64,str,float64,array<str>,array<str>
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,17000,,"[""RD""]",,,7000,"""DUP""",,,,,,,,,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[370],[2.59e-01],714.0,415.0,228.0,71.0,0.581,0.319,0.0994,772,[214],[2.77e-01],386.0,216.0,126.0,44.0,0.56,0.326,0.114,656,[156],[2.38e-01],328.0,199.0,102.0,27.0,0.607,0.311,0.0823,,,"[""LOW_CALL_RATE""]","[""LOW_CALL_RATE""]"
chr1:10000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_2""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,53500,,"[""BAF"",""RD""]",,,43500,"""DUP""",,,,,,,"[""FAM138A"",""MIR1302-2HG""]",,,,,,,,,,,"[""DNase""]","[""DNase""]","[""OR4F5""]",True,,1428,[70],[4.90e-02],714.0,649.0,60.0,5.0,0.909,0.084,0.007,772,[46],[5.96e-02],386.0,344.0,38.0,4.0,0.891,0.0984,0.0104,656,[24],[3.66e-02],328.0,305.0,22.0,1.0,0.93,0.0671,0.00305,,,"[""LOW_CALL_RATE""]","[""LOW_CALL_RATE""]"
chr1:10602,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_1""",461.0,"{""UNRESOLVED"",""UNSTABLE_AF_PCRMINUS""}","[""manta""]","""chr12""",,,10602,10546.0,"[""SR""]",,"""+-""",-1,"""BND""","""SINGLE_ENDER_+-""",,,,,,,,,,,,,,,,,,,,False,,1428,[88],[6.16e-02],714.0,626.0,88.0,0.0,0.877,0.123,0.0,772,[51],[6.61e-02],386.0,335.0,51.0,0.0,0.868,0.132,0.0,656,[37],[5.64e-02],328.0,291.0,37.0,0.0,0.887,0.113,0.0,"""gnomAD-SV_v2.1_BND_1_1""",0.00679,"[""UNSTABLE_AF_PCRMINUS""]","[""UNSTABLE_AF_PCRMINUS""]"
chr1:41950,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_3""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,52000,,"[""BAF"",""RD""]",,,10050,"""DUP""",,,,,,,,,,,,,,,,,,,,"[""OR4F5""]",True,,1428,[28],[1.96e-02],714.0,687.0,26.0,1.0,0.962,0.0364,0.0014,772,[15],[1.94e-02],386.0,371.0,15.0,0.0,0.961,0.0389,0.0,656,[13],[1.98e-02],328.0,316.0,11.0,1.0,0.963,0.0335,0.00305,"""gnomAD-SV_v2.1_DUP_1_1""",0.069,"[""LOW_CALL_RATE""]","[""LOW_CALL_RATE""]"
chr1:44000,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_4""",999.0,"{""UNSTABLE_AF_PCRMINUS"",""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,66000,,"[""RD""]",,,22000,"""DUP""",,,,,,,,"[""OR4F5""]",,,,,,,,,,"[""DNase""]",,,False,,1428,[96],[6.72e-02],714.0,641.0,50.0,23.0,0.898,0.07,0.0322,772,[54],[6.99e-02],386.0,345.0,28.0,13.0,0.894,0.0725,0.0337,656,[42],[6.40e-02],328.0,296.0,22.0,10.0,0.902,0.0671,0.0305,,,"[""LOW_CALL_RATE"",""UNSTABLE_AF_PCRMINUS""]","[""LOW_CALL_RATE"",""UNSTABLE_AF_PCRMINUS""]"
chr1:44250,"[""N"",""<DUP>""]","""CMG.phase1_CMG_DUP_chr1_5""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,116000,,"[""BAF"",""RD""]",,,71750,"""DUP""",,,,,,"[""OR4F5""]","[""AL627309.3""]",,"[""AL627309.1""]",,,,,,,,,"[""DNase""]",,,False,,1428,[82],[5.74e-02],714.0,646.0,54.0,14.0,0.905,0.0756,0.0196,772,[43],[5.57e-02],386.0,351.0,27.0,8.0,0.909,0.0699,0.0207,656,[39],[5.95e-02],328.0,295.0,27.0,6.0,0.899,0.0823,0.0183,,,"[""LOW_CALL_RATE""]","[""LOW_CALL_RATE""]"
chr1:51400,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_1""",999.0,"{""LOW_CALL_RATE""}","[""depth""]","""chr1""",,,64000,,"[""RD""]",,,12600,"""DEL""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,1428,[306],[2.14e-01],714.0,443.0,236.0,35.0,0.62,0.331,0.049,772,[156],[2.02e-01],386.0,246.0,124.0,16.0,0.637,0.321,0.0415,656,[150],[2.29e-01],328.0,197.0,112.0,19.0,0.601,0.341,0.0579,,,"[""LOW_CALL_RATE""]","[""LOW_CALL_RATE""]"
chr1:52600,"[""N"",""<CNV>""]","""CMG.phase1_CMG_CNV_chr1_1""",999.0,"{""FAIL_minGQ""}","[""depth""]","""chr1""",,,58000,,"[""RD""]",,,5400,"""CNV""",,,,,,,,,,,,,,,,,,"[""DNase""]",,"[""OR4F5""]",True,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,0,[0],[0.00e+00],,,,,,,,,,"[""FAIL_minGQ""]","[""FAIL_minGQ""]"
chr1:66234,"[""N"",""<BND>""]","""CMG.phase1_CMG_BND_chr1_2""",807.0,"{""UNRESOLVED""}","[""manta""]","""chr19""",,,66234,108051.0,"[""PE""]",,"""-+""",-1,"""BND""","""SINGLE_ENDER_-+""",,,,,,,,,,,,,,,,,,,,False,,1428,[236],[1.65e-01],714.0,514.0,164.0,36.0,0.72,0.23,0.0504,772,[131],[1.70e-01],386.0,275.0,91.0,20.0,0.712,0.236,0.0518,656,[105],[1.60e-01],328.0,239.0,73.0,16.0,0.729,0.223,0.0488,,,[],
chr1:66350,"[""N"",""<DEL>""]","""CMG.phase1_CMG_DEL_chr1_2""",774.0,"{""FAIL_OUTLIER_REMOVAL""}","[""manta""]","""chr1""",,,66427,,"[""SR""]",,,77,"""DEL""",,,,,,,,,,,,"[""OR4F5""]",,,,,,,,,False,,1428,[2],[1.40e-03],714.0,712.0,2.0,0.0,0.997,0.0028,0.0,772,[1],[1.30e-03],386.0,385.0,1.0,0.0,0.997,0.00259,0.0,656,[1],[1.52e-03],328.0,327.0,1.0,0.0,0.997,0.00305,0.0,"""gnomAD-SV_v2.1_DEL_1_4""",0.000555,"[""FAIL_OUTLIER_REMOVAL""]","[""FAIL_OUTLIER_REMOVAL""]"


In [40]:
row = rows.take(1)

In [42]:
pprint(row)

[{'__filters': ['LOW_CALL_RATE'],
  '_filters': ['LOW_CALL_RATE'],
  'alleles': ['N', '<DUP>'],
  'filters': {'LOW_CALL_RATE'},
  'info': {'AC': [370],
           'AF': [0.259104],
           'ALGORITHMS': ['depth'],
           'AN': 1428,
           'CHR2': 'chr1',
           'CPX_INTERVALS': None,
           'CPX_TYPE': None,
           'END': 17000,
           'END2': None,
           'EVIDENCE': ['RD'],
           'FEMALE_AC': [156],
           'FEMALE_AF': [0.237805],
           'FEMALE_AN': 656,
           'FEMALE_FREQ_HET': 0.310976,
           'FEMALE_FREQ_HOMALT': 0.0823171,
           'FEMALE_FREQ_HOMREF': 0.606707,
           'FEMALE_N_BI_GENOS': 328,
           'FEMALE_N_HET': 102,
           'FEMALE_N_HOMALT': 27,
           'FEMALE_N_HOMREF': 199,
           'FREQ_HET': 0.319328,
           'FREQ_HOMALT': 0.0994398,
           'FREQ_HOMREF': 0.581232,
           'LINCRNA__COPY_GAIN': None,
           'LINCRNA__DUP_LOF': None,
           'LINCRNA__DUP_PARTIAL': None,
     

In [92]:
a=hl.struct(type='a',chrom='b', start=1, end=2)
a.dtype

dtype('struct{type: str, chrom: str, start: int32, end: int32}')

In [63]:
a=hl.literal([1,2,3,4,5,6])
b=[hl.if_else(a[i]%2==0, a[i], hl.null(hl.tint)) for i in range(6)]
hl.eval(b)

[None, 2, None, 4, None, 6]

In [68]:
hl.eval(hl.filter(lambda x: x%2==0, a))

[2, 4, 6]