-
Notifications
You must be signed in to change notification settings - Fork 0
/
resources.py
107 lines (71 loc) · 5.7 KB
/
resources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from gnomad_qc.v2.resources import get_gnomad_data, get_gnomad_meta, pbt_phased_trios_mt_path
from chet_utils import extract_pbt_probands
import hail as hl
LEAST_CONSEQUENCE = '3_prime_UTR_variant'
MAX_FREQ = 0.05
def mini_mt_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'mt', 'mini_mt', pbt, least_consequence, max_freq, chrom)
def vp_list_ht_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'ht', 'list', pbt, least_consequence, max_freq, chrom)
def vp_ann_ht_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'ht', 'ann', pbt, least_consequence, max_freq, chrom)
def full_mt_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'mt', '', pbt, least_consequence, max_freq, chrom)
def vp_count_ht_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'ht', 'counts', pbt, least_consequence, max_freq, chrom)
def phased_vp_count_ht_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None, release: bool = False):
if release:
return f"gs://gnomad/release/2.1.1/ht/{data_type}_phased_counts_{max_freq}_{least_consequence}_vp{f'_chrom{chrom}' if chrom else ''}.ht"
else:
return _chets_out_path(data_type, 'ht', 'phased_counts', pbt, least_consequence, max_freq, chrom)
def pbt_phase_count_ht_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
# Keeping pbt arg just so the signature mimics others
return _chets_out_path(data_type, 'ht', 'pbt_phase_count', False, least_consequence, max_freq, chrom)
def pbt_trio_mt_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
# Keeping pbt arg just so the signature mimics others
return _chets_out_path(data_type, 'mt', 'pbt_trio', False, least_consequence, max_freq, chrom)
def pbt_trio_et_path(data_type: str, pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
# Keeping pbt arg just so the signature mimics others
return _chets_out_path(data_type, 'ht', 'pbt_trio', False, least_consequence, max_freq, chrom)
def pbt_comparison_full_mt_path(data_type: str, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'mt', 'pbt_comparison', False, least_consequence, max_freq, chrom)
# def pbt_comparison_vp_ann_ht_path(data_type: str, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
# return _chets_out_path(data_type, 'ht', 'pbt_comparison_ann', False, least_consequence, max_freq, chrom)
#
def pbt_comparison_vp_count_ht_path(data_type: str, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'ht', 'pbt_comparison_counts', False, least_consequence, max_freq, chrom)
def pbt_comparison_phased_vp_count_ht_path(data_type: str, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return _chets_out_path(data_type, 'ht', 'pbt_comparison_phased_counts', False, least_consequence, max_freq, chrom)
def get_adj_missing_mt(data_type: str, pbt: bool) -> hl.MatrixTable:
mt = get_gnomad_data(data_type).select_cols() if not pbt else hl.read_matrix_table(pbt_phased_trios_mt_path(data_type))
mt = mt.select_rows()
mt = mt.select_entries(
GT=hl.or_missing(mt.GT.is_non_ref(), mt.GT),
missing=hl.is_missing(mt.GT),
adj=mt.adj
).select_cols().select_rows()
if pbt:
mt = mt.key_cols_by('s', trio_id=mt.source_trio.id)
mt = extract_pbt_probands(mt, data_type)
mt = mt.filter_rows(hl.agg.any(mt.GT.is_non_ref()))
mt = mt.key_cols_by(s=mt.s, trio_id=mt.source_trio.id)
else:
meta = get_gnomad_meta('exomes')
mt = mt.filter_cols(meta[mt.col_key].high_quality)
return mt
def get_revel_annotations_path(data_type: str) -> hl.Table:
return f"gs://gnomad/annotations/hail-0.2/ht/exomes/gnomad.{data_type}.revel.ht"
def vp_per_gene_path(data_type: str, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, test: bool = False, extension: str = "ht"):
return _chets_out_path(data_type, extension, "chet_per_gene", False, least_consequence, max_freq, f"20_test" if test else None)
def het_hom_per_gene_path(data_type: str, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, test: bool = False, extension: str = "ht"):
return _chets_out_path(data_type, extension, "het_hom_per_gene", False, least_consequence, max_freq, f"20_test" if test else None)
def _chets_out_path(data_type: str, extension: str, stage: str = '', pbt: bool = False, least_consequence: str = LEAST_CONSEQUENCE, max_freq: float = MAX_FREQ, chrom: str = None):
return 'gs://gnomad{}/compound_hets/{}{}{}_{}_{}_vp{}.{}'.format(
'-tmp/' if stage == 'mini_mt' else '/projects',
data_type,
'_pbt' if pbt else '',
f'_{stage}' if stage else '',
max_freq,
least_consequence,
f'_chrom{chrom}' if chrom else '',
extension)