## Setup

In [1]:
%run setup.ipynb

In [2]:
region_vgsc = '2L', 2358158, 2431617
phase2_ar1.load_geneset_agamp44(attributes=['ID', 'Parent'])
geneset_agamp44 = geneset_to_pandas(phase2_ar1.geneset_agamp44)
geneset_agamp44_vgsc = geneset_agamp44.query("(seqid == %r) & (start >= %s) & (end <= %s)" % region_vgsc).copy()
geneset_davies = geneset_to_pandas(allel.FeatureTable.from_gff3('davies_vgsc_model.gff3', attributes=['ID', 'Parent']))
geneset_vgsc_combined = pandas.concat([geneset_agamp44_vgsc, geneset_davies])

In [3]:
geneset_vgsc_combined.query("type == 'mRNA'")

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent
672,2L,VectorBase,mRNA,2358158,2431617,-1,+,-1,AGAP004707-RA,AGAP004707
673,2L,VectorBase,mRNA,2358158,2431617,-1,+,-1,AGAP004707-RB,AGAP004707
674,2L,VectorBase,mRNA,2358158,2431617,-1,+,-1,AGAP004707-RC,AGAP004707
0,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C1N2,AGAP004707
1,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C3N2,AGAP004707
2,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C5N2,AGAP004707
3,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C7N2,AGAP004707
4,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C8N2,AGAP004707
5,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C10N2,AGAP004707
6,2L,Davies et al. (2007),mRNA,2358158,2431617,-1,+,-1,Davies-C11N2,AGAP004707


In [4]:
def tabulate_variants(callset, snpeff, chrom, start, stop):
    variants = callset[chrom]['variants']
    ann = snpeff[chrom]['variants']['ANN']
    pos = allel.SortedIndex(variants['POS'])
    loc = pos.locate_range(start, stop)
    cols = [
        variants['CHROM'][loc],
        variants['POS'][loc],
        variants['num_alleles'][loc],
        variants['FILTER_PASS'][loc],
        variants['REF'][loc],
        variants['ALT'][loc],
        variants['AC'][loc],
        ann[loc]['Annotation'],
        ann[loc]['HGVS_p'],
    ]

    def split_alleles(row):
        for i in range(row.num_alleles - 1):
            yield row.chrom, row.pos, row.num_alleles, row.ref, row.alt[i], i, row.ac[i], row.filter_pass, (row.annotation if i == 0 else None), (row.hgvs_p if i == 0 else None)

        
    tbl = (
        etl
        .fromcolumns(cols, header=['chrom', 'pos', 'num_alleles', 'filter_pass', 'ref', 'alt', 'ac', 'annotation', 'hgvs_p'])
        .rowmapmany(split_alleles, header=['chrom', 'pos', 'num_alleles', 'ref', 'alt', 'alt_idx', 'alt_ac', 'filter_pass', 'annotation', 'hgvs_p'])
        .convert('chrom ref alt annotation hgvs_p'.split(), lambda v: str(v, 'ascii'))
    )
    
    return tbl


In [5]:
import veff

In [6]:
combined_genome = veff.Genome(fasta_path=phase2_ar1.genome_agamp3_fn,
                              gff3_path=[phase2_ar1.geneset_agamp44_fn, 'davies_vgsc_model.gff3'],
                              seqid='2L')
combined_genome

<veff.Genome at 0x7f3344004c88>

In [7]:
transcript_ids = [f.feature_id for f in combined_genome.get_children('AGAP004707')]
transcript_ids

['AGAP004707-RA',
 'AGAP004707-RB',
 'AGAP004707-RC',
 'Davies-C1N2',
 'Davies-C3N2',
 'Davies-C5N2',
 'Davies-C7N2',
 'Davies-C8N2',
 'Davies-C10N2',
 'Davies-C11N2',
 'Davies-C1N9',
 'Davies-C8N9',
 'Davies-C1N9ck']

In [8]:
tbl_davies_exons = (
    etl
    .fromdataframe(geneset_davies)
    .eq('type', 'CDS')
    .cutout('Parent', 'source', 'type', 'score', 'strand', 'phase')
    .merge(key=('start', 'end'))
    .rename('ID', 'exon')
    .movefield('seqid', 0)
)
tbl_davies_exons.displayall()

0|seqid,1|start,2|end,3|exon
2L,2358158,2358304,1
2L,2359640,2359672,2j
2L,2361989,2362144,3
2L,2381065,2381270,4
2L,2382270,2382398,5
2L,2385694,2385785,6
2L,2390129,2390341,7
2L,2390425,2390485,8
2L,2390594,2390738,9
2L,2391156,2391320,10


In [9]:
def lpop(l, default=None):
    """Pop the first item from a list if not empty."""
    try:
        return l[0]
    except IndexError:
        return default


In [10]:
tbl_variants_phase1 = tabulate_variants(phase1_ar31.callset, phase1_ar31.callset, *region_vgsc)
tbl_variants_phase1

0|chrom,1|pos,2|num_alleles,3|ref,4|alt,5|alt_idx,6|alt_ac,7|filter_pass,8|annotation,9|hgvs_p
2L,2358254,2,G,A,0,1,True,missense_variant,p.Asp33Asn
2L,2358316,2,T,G,0,73,True,intron_variant,.
2L,2358328,2,T,C,0,2,True,intron_variant,.
2L,2358353,2,C,T,0,1,True,intron_variant,.
2L,2358405,2,T,A,0,1,True,intron_variant,.


In [11]:
snpeff_phase2 = h5py.File(
    os.path.join(phase2_ar1.release_dir, 'variation', 'main', 'hdf5', 'all_snpeff', 'ag1000g.phase2.ar1.snpeff.AgamP4.2.2L.h5'),
    mode='r'
)
snpeff_phase2

<HDF5 file "ag1000g.phase2.ar1.snpeff.AgamP4.2.2L.h5" (mode r)>

In [12]:
tbl_variants_phase2 = tabulate_variants(phase2_ar1.callset, snpeff_phase2, *region_vgsc)
tbl_variants_phase2

0|chrom,1|pos,2|num_alleles,3|ref,4|alt,5|alt_idx,6|alt_ac,7|filter_pass,8|annotation,9|hgvs_p
2L,2358254,2,G,A,0,1,True,missense_variant,p.Asp33Asn
2L,2358309,2,A,G,0,1,True,splice_region_variant&intron_varia,.
2L,2358316,2,T,G,0,81,True,intron_variant,.
2L,2358328,2,T,C,0,8,True,intron_variant,.
2L,2358353,2,C,T,0,1,True,intron_variant,.


In [13]:
tbl_variants_all = (
    tbl_variants_phase2.outerjoin(tbl_variants_phase1, key=('chrom', 'pos', 'ref', 'alt'), rprefix='p1_')
    .cutout('p1_alt_idx', 'p1_num_alleles')
)
tbl_variants_all

0|chrom,1|pos,2|num_alleles,3|ref,4|alt,5|alt_idx,6|alt_ac,7|filter_pass,8|annotation,9|hgvs_p,10|p1_alt_ac,11|p1_filter_pass,12|p1_annotation,13|p1_hgvs_p
2L,2358254,2,G,A,0,1,True,missense_variant,p.Asp33Asn,1.0,True,missense_variant,p.Asp33Asn
2L,2358309,2,A,G,0,1,True,splice_region_variant&intron_varia,.,,,,
2L,2358316,2,T,G,0,81,True,intron_variant,.,73.0,True,intron_variant,.
2L,2358328,2,T,C,0,8,True,intron_variant,.,2.0,True,intron_variant,.
2L,2358353,2,C,T,0,1,True,intron_variant,.,1.0,True,intron_variant,.


## Coding variation, all SNPs (i.e., including non-PASS) 

In [14]:
selected_effects = [
    'NON_SYNONYMOUS_CODING', 
    'SYNONYMOUS_CODING'
]


tbl_variants_all_eff = (
    tbl_variants_all
    .intervalleftjoin(
        # don't include short exon variations otherwise screws up join
        tbl_davies_exons.select('exon', lambda v: v[-1] != '-'),
        lkey='chrom', rkey='seqid', lstart='pos', rstart='start', lstop='pos', rstop='end', include_stop=True)
    .cutout(14)
    .rename({14: 'exon_start', 15: 'exon_end'})
    .addfield('eff', lambda row: [e for e in veff.get_effects(combined_genome, chrom=row.chrom, pos=row.pos, ref=row.ref, alt=row.alt) if e.effect in selected_effects])
    .addfield(transcript_ids[0].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[0]], ''))
    .addfield(transcript_ids[1].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[1]], ''))
    .addfield(transcript_ids[2].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[2]], ''))
    .addfield(transcript_ids[3].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[3]], ''))
    .addfield(transcript_ids[4].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[4]], ''))
    .addfield(transcript_ids[5].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[5]], ''))
    .addfield(transcript_ids[6].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[6]], ''))
    .addfield(transcript_ids[7].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[7]], ''))
    .addfield(transcript_ids[8].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[8]], ''))
    .addfield(transcript_ids[9].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[9]], ''))
    .addfield(transcript_ids[10].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[10]], ''))
    .addfield(transcript_ids[11].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[11]], ''))
    .addfield(transcript_ids[12].split('-')[1], lambda row: lpop([e.aa_change for e in row.eff if e.transcript_id == transcript_ids[12]], ''))
    .cutout('eff')
    .select(lambda row: any(row[k] and row[k][0] != row[k][-1] for k in [t.split('-')[1] for t in transcript_ids]))
    .cache()
)

In [15]:
tbl_variants_all_eff.display()

0|chrom,1|pos,2|num_alleles,3|ref,4|alt,5|alt_idx,6|alt_ac,7|filter_pass,8|annotation,9|hgvs_p,10|p1_alt_ac,11|p1_filter_pass,12|p1_annotation,13|p1_hgvs_p,14|exon_start,15|exon_end,16|exon,17|RA,18|RB,19|RC,20|C1N2,21|C3N2,22|C5N2,23|C7N2,24|C8N2,25|C10N2,26|C11N2,27|C1N9,28|C8N9,29|C1N9ck
2L,2358254,2,G,A,0,1,True,missense_variant,p.Asp33Asn,1,True,missense_variant,p.Asp33Asn,2358158,2358304,1,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N
2L,2359670,2,G,A,0,7,False,intron_variant,.,7,False,intron_variant,.,2359640,2359672,2j,,,,,,,,E60K,,,,E60K,
2L,2362002,2,A,T,0,3,True,splice_region_variant&intron_varia,.,2,True,splice_region_variant&intron_varia,.,2361989,2362144,3,,,,D54V,D54V,D54V,D54V,D65V,D54V,D54V,D54V,D65V,D54V
2L,2362019,2,G,T,0,3,True,missense_variant,p.Gly54Cys,2,True,missense_variant,p.Gly54Cys,2361989,2362144,3,G54C,G54C,G54C,G60C,G60C,G60C,G60C,G71C,G60C,G60C,G60C,G71C,G60C
2L,2362023,2,C,T,0,1,True,missense_variant,p.Pro55Leu,1,True,missense_variant,p.Pro55Leu,2361989,2362144,3,P55L,P55L,P55L,P61L,P61L,P61L,P61L,P72L,P61L,P61L,P61L,P72L,P61L


 

In [16]:
def veff_aa_style(v):
    return 'background-color: #%s' % ('fff' if v in {None, '.', ''} else 'ddf' if v[0] == v[-1] else 'faa')


tbl_variants_all_eff.displayall(
    td_styles={
        'num_alleles': lambda v: 'background-color: #%s' % ('ff0' if v > 2 else 'fff'),
        'alt_ac': lambda v: 'background-color:rgba(0, 255, 0, %.3f)' % (min(1, v/100)),
        'filter_pass': lambda v: 'background-color: %s' % ('white' if v else 'red'),
        'hgvs_p': lambda v: 'background-color: #%s' % ('fff' if v in {None, '.'} else 'ddf' if v[2:5] == v[-3:] else 'faa'),
        'p1_filter_pass': lambda v: 'background-color: %s' % ('white' if v else 'red'),
        'p1_alt_ac': lambda v: 'background-color:' + ('rgba(0, 255, 0, %.3f)' % (min(1, v/100)) if v is not None else 'red'),
        'RA': veff_aa_style,
        'RB': veff_aa_style,
        'RC': veff_aa_style,
        'C1N2': veff_aa_style,
        'C3N2': veff_aa_style,
        'C5N2': veff_aa_style,
        'C7N2': veff_aa_style,
        'C8N2': veff_aa_style,
        'C10N2': veff_aa_style,
        'C11N2': veff_aa_style,
        'C1N9': veff_aa_style,
        'C8N9': veff_aa_style,
        'C1N9ck': veff_aa_style,
    }
)

0|chrom,1|pos,2|num_alleles,3|ref,4|alt,5|alt_idx,6|alt_ac,7|filter_pass,8|annotation,9|hgvs_p,10|p1_alt_ac,11|p1_filter_pass,12|p1_annotation,13|p1_hgvs_p,14|exon_start,15|exon_end,16|exon,17|RA,18|RB,19|RC,20|C1N2,21|C3N2,22|C5N2,23|C7N2,24|C8N2,25|C10N2,26|C11N2,27|C1N9,28|C8N9,29|C1N9ck
2L,2358254,2,G,A,0,1,True,missense_variant,p.Asp33Asn,1.0,True,missense_variant,p.Asp33Asn,2358158,2358304,1,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N,D33N
2L,2359670,2,G,A,0,7,False,intron_variant,.,7.0,False,intron_variant,.,2359640,2359672,2j,,,,,,,,E60K,,,,E60K,
2L,2362002,2,A,T,0,3,True,splice_region_variant&intron_varia,.,2.0,True,splice_region_variant&intron_varia,.,2361989,2362144,3,,,,D54V,D54V,D54V,D54V,D65V,D54V,D54V,D54V,D65V,D54V
2L,2362019,2,G,T,0,3,True,missense_variant,p.Gly54Cys,2.0,True,missense_variant,p.Gly54Cys,2361989,2362144,3,G54C,G54C,G54C,G60C,G60C,G60C,G60C,G71C,G60C,G60C,G60C,G71C,G60C
2L,2362023,2,C,T,0,1,True,missense_variant,p.Pro55Leu,1.0,True,missense_variant,p.Pro55Leu,2361989,2362144,3,P55L,P55L,P55L,P61L,P61L,P61L,P61L,P72L,P61L,P61L,P61L,P72L,P61L
2L,2390168,2,A,G,0,2,True,missense_variant,p.Lys251Arg,2.0,True,missense_variant,p.Lys251Arg,2390129,2390341,7,K251R,K251R,K251R,K257R,K214R,K257R,K257R,K268R,K257R,K257R,K257R,K268R,K257R
2L,2390177,2,G,A,0,215,True,missense_variant,p.Arg254Lys,198.0,True,missense_variant,p.Arg254Lys,2390129,2390341,7,R254K,R254K,R254K,R260K,R217K,R260K,R260K,R271K,R260K,R260K,R260K,R271K,R260K
2L,2390305,2,A,T,0,1,True,missense_variant,p.Thr297Ser,,,,,2390129,2390341,7,T297S,T297S,T297S,T303S,T260S,T303S,T303S,T314S,T303S,T303S,T303S,T314S,T303S
2L,2390311,2,G,A,0,1,True,missense_variant,p.Glu299Lys,1.0,True,missense_variant,p.Glu299Lys,2390129,2390341,7,E299K,E299K,E299K,E305K,E262K,E305K,E305K,E316K,E305K,E305K,E305K,E316K,E305K
2L,2390448,2,G,A,0,6,True,missense_variant,p.Gly317Ser,6.0,True,missense_variant,p.Gly317Ser,2390425,2390485,8,G317S,G317S,G317S,G323S,G280S,G323S,G323S,G334S,G323S,G323S,G323S,G334S,G323S


Observations:

* One SNP (position 2359670, E60K) is observed in optional exon 2j not in vectorbase transcripts, but at low frequency so can probably ignore.
* One SNP (2362002, D54V) is observed in the extra bit of exon 3 not in vectorbase transcripts, but low frequency so can probably ignore.
* G238G in the paper figure could probably be dropped, it's a splice region variant.
* The multiallelic SNP at position 2391228, both variant alleles cause V402L and are again at appreciable frequency. Would be worth phasing these.
* The multiallelic SNP at position 2400071, both variant alleles cause M490L. Worth phasing.
* Two adjacent SNPs (2424383, 2424384, both in codon A1125 causing A1125S and A1125V) passed filters in phase 1 but fail filters in phase 2, why?
* SNPs at 2424401 and 2424720 fail filters in both phases, why?
* There are two adjacent SNPs (2429896, 2429897) in codon E1597, but we've only plotted one in Fig 5, would be worth plotting the other one too.
* There is a high frequency mutation at position 2430106 (L1667M) which fails filters. Would be worth investigating why this (and neighbouring) variants fail.
* Position 2431061 (codon A1934) has become multi-allelic in phase 2. The previously observed A1934V mutation has gone to even higher frequency. A new A1934D mutation is observed. Will need to phase this.


## Splice site variation

In [17]:
selected_effects = [
    'INTRONIC', 
    'SPLICE_CORE',
    'SPLICE_REGION',
]


def transcript_effect(transcript_id):
    def f(row):
        return lpop([(e.effect, e.intron_cds_5prime, e.intron_5prime_dist, e.intron_cds_3prime, e.intron_3prime_dist) 
                     for e in row.eff if e.transcript_id == transcript_id], '')
    return f


tbl_variants_all_eff_intronic = (
    tbl_variants_all
    .addfield('eff', lambda row: [e for e in veff.get_effects(combined_genome, chrom=row.chrom, pos=row.pos, ref=row.ref, alt=row.alt) if e.effect in selected_effects])
    .addfield('RA', transcript_effect('AGAP004707-RB'))
    .addfield('RB', transcript_effect('AGAP004707-RB'))
    .addfield('RC', transcript_effect('AGAP004707-RC'))
    .addfield('C1N2', transcript_effect('Davies-C1N2'))
    .addfield('C3N2', transcript_effect('Davies-C3N2'))
    .addfield('C5N2', transcript_effect('Davies-C5N2'))
    .addfield('C7N2', transcript_effect('Davies-C7N2'))
    .addfield('C8N2', transcript_effect('Davies-C8N2'))
    .addfield('C10N2', transcript_effect('Davies-C10N2'))
    .addfield('C11N2', transcript_effect('Davies-C11N2'))
    .addfield('C1N9', transcript_effect('Davies-C1N9'))
    .addfield('C8N9', transcript_effect('Davies-C8N9'))
    .addfield('C1N9ck', transcript_effect('Davies-C1N9ck'))
    .select(lambda row: (any([(row[k] and row[k][0].startswith('SPLICE')) for k in 'RA RB RC C1N2 C3N2 C5N2 C7N2 C8N2 C10N2 C11N2 C1N9 C8N9 C1N9ck'.split()]) or (row.annotation and 'splice' in row.annotation)))
    .cutout('eff')
    .cache()
)

In [18]:
tbl_variants_all_eff_intronic

0|chrom,1|pos,2|num_alleles,3|ref,4|alt,5|alt_idx,6|alt_ac,7|filter_pass,8|annotation,9|hgvs_p,10|p1_alt_ac,11|p1_filter_pass,12|p1_annotation,13|p1_hgvs_p,14|RA,15|RB,16|RC,17|C1N2,18|C3N2,19|C5N2,20|C7N2,21|C8N2,22|C10N2,23|C11N2,24|C1N9,25|C8N9,26|C1N9ck
2L,2358309,2,A,G,0,1,True,splice_region_variant&intron_varia,.,,,,,"('SPLICE_REGION', 'AGAP004707-PB', 5, 'AGAP004707-PB', -3698)","('SPLICE_REGION', 'AGAP004707-PB', 5, 'AGAP004707-PB', -3698)","('SPLICE_REGION', 'AGAP004707-PC', 5, 'AGAP004707-PC', -3698)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '2j', -1331)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '2j', -1331)","('SPLICE_REGION', '1', 5, '3', -3680)"
2L,2362002,2,A,T,0,3,True,splice_region_variant&intron_varia,.,2.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 3698, 'AGAP004707-PB', -5)","('SPLICE_REGION', 'AGAP004707-PB', 3698, 'AGAP004707-PB', -5)","('SPLICE_REGION', 'AGAP004707-PC', 3698, 'AGAP004707-PC', -5)",,,,,,,,,,
2L,2362003,2,C,T,0,3,True,splice_region_variant&intron_varia,.,2.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 3699, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PB', 3699, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PC', 3699, 'AGAP004707-PC', -4)",,,,,,,,,,
2L,2382263,2,A,G,0,180,True,splice_region_variant&intron_varia,.,166.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 993, 'AGAP004707-PB', -7)","('SPLICE_REGION', 'AGAP004707-PB', 993, 'AGAP004707-PB', -7)","('SPLICE_REGION', 'AGAP004707-PC', 993, 'AGAP004707-PC', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('INTRONIC', '4', 993, '6', -3431)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)"
2L,2390125,2,A,C,0,1,True,splice_region_variant&intron_varia,.,,,,,"('SPLICE_REGION', 'AGAP004707-PB', 4340, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PB', 4340, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PC', 4340, 'AGAP004707-PC', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)"


In [20]:
def veff_intron_style(v):
    return 'background-color: #%s' % ('fff' if v in {None, '.', ''} else 'ddf' if v[0] == 'INTRONIC' else 'fda' if v[0] == 'SPLICE_REGION' else 'faa')


(tbl_variants_all_eff_intronic
 .displayall(
    td_styles={
        'num_alleles': lambda v: 'background-color: #%s' % ('ff0' if v and v > 2 else 'fff'),
        'alt_ac': lambda v: 'background-color:rgba(0, 255, 0, %.3f)' % (min(1, v/100)) if v else '',
        'filter_pass': lambda v: 'background-color: %s' % ('white' if v else 'red'),
        'p1_filter_pass': lambda v: 'background-color: %s' % ('white' if v else 'red'),
        'p1_alt_ac': lambda v: 'background-color:' + ('rgba(0, 255, 0, %.3f)' % (min(1, v/100)) if v is not None else 'red'),
        'RA': veff_intron_style,
        'RB': veff_intron_style,
        'RC': veff_intron_style,
        'C1N2': veff_intron_style,
        'C3N2': veff_intron_style,
        'C5N2': veff_intron_style,
        'C7N2': veff_intron_style,
        'C8N2': veff_intron_style,
        'C10N2': veff_intron_style,
        'C11N2': veff_intron_style,
        'C1N9': veff_intron_style,
        'C8N9': veff_intron_style,
        'C1N9ck': veff_intron_style,
    }                      
))

0|chrom,1|pos,2|num_alleles,3|ref,4|alt,5|alt_idx,6|alt_ac,7|filter_pass,8|annotation,9|hgvs_p,10|p1_alt_ac,11|p1_filter_pass,12|p1_annotation,13|p1_hgvs_p,14|RA,15|RB,16|RC,17|C1N2,18|C3N2,19|C5N2,20|C7N2,21|C8N2,22|C10N2,23|C11N2,24|C1N9,25|C8N9,26|C1N9ck
2L,2358309,2,A,G,0,1,True,splice_region_variant&intron_varia,.,,,,,"('SPLICE_REGION', 'AGAP004707-PB', 5, 'AGAP004707-PB', -3698)","('SPLICE_REGION', 'AGAP004707-PB', 5, 'AGAP004707-PB', -3698)","('SPLICE_REGION', 'AGAP004707-PC', 5, 'AGAP004707-PC', -3698)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '2j', -1331)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '3', -3680)","('SPLICE_REGION', '1', 5, '2j', -1331)","('SPLICE_REGION', '1', 5, '3', -3680)"
2L,2362002,2,A,T,0,3,True,splice_region_variant&intron_varia,.,2.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 3698, 'AGAP004707-PB', -5)","('SPLICE_REGION', 'AGAP004707-PB', 3698, 'AGAP004707-PB', -5)","('SPLICE_REGION', 'AGAP004707-PC', 3698, 'AGAP004707-PC', -5)",,,,,,,,,,
2L,2362003,2,C,T,0,3,True,splice_region_variant&intron_varia,.,2.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 3699, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PB', 3699, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PC', 3699, 'AGAP004707-PC', -4)",,,,,,,,,,
2L,2382263,2,A,G,0,180,True,splice_region_variant&intron_varia,.,166.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 993, 'AGAP004707-PB', -7)","('SPLICE_REGION', 'AGAP004707-PB', 993, 'AGAP004707-PB', -7)","('SPLICE_REGION', 'AGAP004707-PC', 993, 'AGAP004707-PC', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('INTRONIC', '4', 993, '6', -3431)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)","('SPLICE_REGION', '4', 993, '5', -7)"
2L,2390125,2,A,C,0,1,True,splice_region_variant&intron_varia,.,,,,,"('SPLICE_REGION', 'AGAP004707-PB', 4340, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PB', 4340, 'AGAP004707-PB', -4)","('SPLICE_REGION', 'AGAP004707-PC', 4340, 'AGAP004707-PC', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)","('SPLICE_REGION', '6', 4340, '7', -4)"
2L,2390126,2,C,T,0,2,True,splice_region_variant&intron_varia,.,2.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 4341, 'AGAP004707-PB', -3)","('SPLICE_REGION', 'AGAP004707-PB', 4341, 'AGAP004707-PB', -3)","('SPLICE_REGION', 'AGAP004707-PC', 4341, 'AGAP004707-PC', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)","('SPLICE_REGION', '6', 4341, '7', -3)"
2L,2390130,2,C,T,0,111,True,splice_region_variant&synonymous_v,p.Gly238Gly,40.0,True,splice_region_variant&synonymous_v,p.Gly238Gly,,,,,,,,,,,,,
2L,2390594,2,A,G,0,6,False,splice_region_variant&synonymous_v,p.Gly329Gly,3.0,True,splice_region_variant&synonymous_v,p.Gly329Gly,,,,,,,,,,,,,
2L,2400176,2,A,G,0,1,True,splice_region_variant&intron_varia,.,1.0,True,splice_region_variant&intron_varia,.,"('SPLICE_REGION', 'AGAP004707-PB', 3, 'AGAP004707-PB', -2271)","('SPLICE_REGION', 'AGAP004707-PB', 3, 'AGAP004707-PB', -2271)","('SPLICE_REGION', 'AGAP004707-PC', 3, 'AGAP004707-PC', -2271)","('SPLICE_REGION', '11i+', 3, '12', -1373)","('SPLICE_REGION', '11i+', 3, '14', -2910)","('SPLICE_REGION', '11i+', 3, '14', -2910)","('SPLICE_REGION', '11i+', 3, '14', -2910)","('SPLICE_REGION', '11i+', 3, '14', -2910)","('SPLICE_REGION', '11i+', 3, '14', -2910)","('SPLICE_REGION', '11i+', 3, '14', -2910)","('SPLICE_REGION', '11i+', 3, '12', -1373)","('SPLICE_REGION', '11i+', 3, '14', -2910)","('SPLICE_REGION', '11i+', 3, '12', -1373)"
2L,2402508,2,A,T,0,2,False,missense_variant&splice_region_var,p.Gln545Leu,1.0,True,missense_variant&splice_region_var,p.Gln545Leu,,,,,"('INTRONIC', '11i+', 2335, '14', -578)","('INTRONIC', '11i+', 2335, '14', -578)","('INTRONIC', '11i+', 2335, '14', -578)","('INTRONIC', '11i+', 2335, '14', -578)","('INTRONIC', '11i+', 2335, '14', -578)","('INTRONIC', '11i+', 2335, '14', -578)",,"('INTRONIC', '11i+', 2335, '14', -578)",


Observations:

* There is a splice region variant at position 2417362 at very high frequency. It fails filters, why?
* There is a splice region variant at position 2425766 at high frequency near optional exon *k*, would be good to plot with haplotypes and see which haplotype(s) it's linked to.