In [5]:
import gosling as gos
import pandas as pd

# Data Manipulation Using Pandas

In [24]:
# Data Reference: TODO
df = pd.read_csv('https://somatic-browser-test.s3.amazonaws.com/SVTYPE_SV_test_tumor_normal_with_panel.bedpe', sep='\t')

"""
Infer SV categories

Reference: https://software.broadinstitute.org/software/igv/interpreting_pair_orientations
"""
strands_to_sv = {
    '+-': 'Deletion',
    '--': 'Inversion (TtT)',
    '++': 'Inversion (HtH)',
    '-+': 'Duplication',
}
df['sv_type'] = df["strand1"].str.cat(df["strand2"])
df.sv_type = df.sv_type.apply(lambda x: strands_to_sv[x])

# Exception if chromosomes of breakpoints are different
df.loc[df.chrom1 != df.chrom2, 'sv_type'] = 'Translocation'

df

Unnamed: 0,chrom1,start1,end1,chrom2,start2,end2,sv_id,pe_support,strand1,strand2,svclass,svmethod,sv_type
0,chr1,1117830,1117831,chr22,20272152,20272153,bnd_0,59,-,-,TRA,Sentieon_TNscope,Translocation
1,chr1,3721049,3721050,chr1,3734333,3734334,bnd_2,56,-,+,DUP,Sentieon_TNscope,Duplication
2,chr1,9357665,9357666,chr1,9377061,9377062,bnd_4,60,+,-,DEL,Sentieon_TNscope,Deletion
3,chr1,9495570,9495571,chr1,36665594,36665595,bnd_6,46,-,+,DUP,Sentieon_TNscope,Duplication
4,chr1,22416022,22416023,chr1,22416181,22416182,bnd_8,38,+,-,DEL,Sentieon_TNscope,Deletion
...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,chrX,146248380,146248381,chrX,146248692,146248693,bnd_1074,38,+,-,DEL,Sentieon_TNscope,Deletion
581,chrX,148428176,148428177,chrX,148429716,148429717,bnd_1078,60,+,-,DEL,Sentieon_TNscope,Deletion
582,chrX,150382867,150382868,chrX,150383204,150383205,bnd_1080,37,+,-,DEL,Sentieon_TNscope,Deletion
583,chrX,151125466,151125467,chrX,151127243,151127244,bnd_1082,60,+,-,DEL,Sentieon_TNscope,Deletion


# Visualize Data

In [50]:
sv_types = ['Deletion', 'Inversion (TtT)', 'Inversion (HtH)', 'Duplication', 'Translocation']
sv_colors = ['#CC7DAA', '#3275B4', '#409F7A', '#E6A01B', 'lightgrey']

data = df.gos.csv(genomicFieldsToConvert=[
    { "chromosomeField": "chrom1", "genomicFields": ["start1", "end1"] },
    { "chromosomeField": "chrom2", "genomicFields": ["start2", "end2"] }
])
gos.Track(data).mark_withinLink(
    
).encode(
    x="start1:G",
    xe="end2:G",
    color=gos.Stroke('sv_type:N', domain=sv_types, range=sv_colors, legend=True),
    stroke=gos.Stroke('sv_type:N', domain=sv_types, range=sv_colors)
).properties(
    style=gos.Style(
        # linkStyle="elliptical"
    )
).view()