In [1]:
import pandas as pd

mini_bed_columns = ['Chr','Pos','End', 'F1', 'F2', 'AligPath','PathLength','MappingStrandSamplCtg', 'Ctg1_Strain','Ctg2_Sq','Ctg3_Chrom','AproxCtgStart','AproxCtgEnd']
mini_types = {'Chr': 'string',
              'Pos':'int',
              'End':'int',
              'SChrom': 'string'}

data = pd.read_csv('sample_data/minigraph/DBA_2J_Chr19.bed', delimiter='\t|:|#', names=mini_bed_columns, dtype=mini_types, header=None, engine='python', index_col=None, keep_default_na=False, na_values='')

data['PathLength'] = data['PathLength'].fillna(0)
data['PathLength'] = data['PathLength'].astype(int)

data['Ctg2_Sq'] = data['Ctg2_Sq'].fillna(0)
data['Ctg2_Sq'] = data['Ctg2_Sq'].astype(int)

data['AproxCtgStart'] = data['AproxCtgStart'].fillna(0)
data['AproxCtgStart'] = data['AproxCtgStart'].astype(int)
data['AproxCtgEnd'] = data['AproxCtgEnd'].fillna(0)
data['AproxCtgEnd'] = data['AproxCtgEnd'].astype(int)

S = Segment
===========

| Column | Field        | Type      | Regexp              | Description
|--------|--------------|-----------|---------------------|------------
| 1      | `RecordType` | Character | `S`                 | Record type
| 2      | `Name`       | String    | `[!-)+-<>-~][!-~]*` | Segment name
| 3      | `Sequence`   | String    | `\*\|[A-Za-z=.]+`    | Optional nucleotide sequence


L = Link
========

| Column | Field        | Type      | Regexp                   | Description
|--------|--------------|-----------|--------------------------|------------------
| 1      | `RecordType` | Character | `L`                      | Record type
| 2      | `From`       | String    | `[!-)+-<>-~][!-~]*`      | Name of segment
| 3      | `FromOrient` | String    | `+\|-`                    | Orientation of From segment
| 4      | `To`         | String    | `[!-)+-<>-~][!-~]*`      | Name of segment
| 5      | `ToOrient`   | String    | `+\|-`                    | Orientation of `To` segment
| 6      | `Overlap`    | String    | `\*\|([0-9]+[MIDNSHPX=])+`| Optional `CIGAR` string describing overlap

The Overlap field is optional and can be `*`, meaning that the CIGAR string is not specified.

| Tag   | Type | Description
|-------|------|------------
| `MQ`  | `i`  | Mapping quality
| `NM`  | `i`  | Number of mismatches/gaps
| `RC`  | `i`  | Read count
| `FC`  | `i`  | Fragment count
| `KC`  | `i`  | k-mer count
| `ID`  | `Z`  | Edge identifier



# CURRENT APPROACH:

If End-Start > AligPath, then it's a DEL
If End-Start < AligPath, then it's an INS


Length = AligPath.Size - End - Start => +INS, -DEL


## Sample 1:
---------------

### Validated Data
    Type: DEL H1
    Coordinates: 19:10331722-10331765
    Size: 45

### Graph Segments
```
S	s1635995	GTTTTTTGTTTGTTTGTTTGTTTGTTTTTTGTTTGTTTTTGGTTTTTTGGTTTTG	LN:i:55	SN:Z:AKR_J#1#chr19	SO:i:7131039	SR:i:2
L	s1448319	+	s1635995	+	0M	SR:i:2	L1:i:5792	L2:i:55
L	s1635995	+	s1448321	+	0M	SR:i:2	L1:i:55	L2:i:1079

S	s1448320	TTTTGTTGTTGTTGTTTTTTGTTTTTTGTTTTTCTTT	LN:i:37	SN:Z:19	SO:i:10331714	SR:i:0
L	s1448319	+	s1448320	+	0M	SR:i:0	L1:i:5792	L2:i:37
L	s1448320	+	s1448321	+	0M	SR:i:0	L1:i:37	L2:i:1079
```
### SV Bubble Data:
10331751 - 10331714 = 37

### Expected output:
18 bp INS

Formula = PathLength - End-Pos

In [77]:
data.loc[data['Pos']==10331714]

Unnamed: 0,Chr,Pos,End,F1,F2,AligPath,PathLength,MappingStrandSamplCtg,Ctg1_Strain,Ctg2_Sq,Ctg3_Chrom,AproxCtgStart,AproxCtgEnd
1883,19,10331714,10331751,>s1448319,>s1448321,>s1635995,55,+,DBA_2J,1,chr19,7510514,7510645


## Sample 2:
---------------

### Validated Data
Type: DEL H1
Coordinates: 19:59971890-59972028
Size: 139

### Graph Segments
```

S	s1481409	TGGGTAAGTGTATGTATGTATGAGTGTGTGTTAATTTGTGTGTTGTGTGTATGTGTGAGTGTGGGCATGAGTGTGTGTATGGATGTGTGTGAGTGTATGTATGAGTGTATGTGTGTTTATATTTGTGTGATTGTATGTGCATATAAGTGAGAGTGCATGAATGTGTGTAAGTGTGTTGTGTGAGTGCATGTGTGTGTACAGAACTATCTTTGTGTGTGTAGGATATTTTCTCACAGGAACAGAAATGAAACTAAAAATAAAAACCCTCAGAGACACACAGTCTAATCTTTTTACCTACACTAGAAACACACACAACCTACTGAATCTATTTGGTGTTGTTCTTATGTACATGTGTTAGGGGTGACCACTTGTGAGAGGTAACCTATCAGGGGCTCAGTCTTGAAGACTTATTCTCCCTCTCTTGGCATGAGTACCTTGTGAGATTTCCTCTATTCACATTGGCATGCCAGCTGGTATTGTCATTGTGTGGGTCTTGTTTAGGCTGCCATATTATTGAGATCTCATAGAATTCACAAAAGGAAGATAA	LN:i:547	SN:Z:19	SO:i:59971925	SR:i:0
L	s1481407	+	s1481409	+	0M	SR:i:1	L1:i:3492	L2:i:547
L	s1481408	+	s1481409	+	0M	SR:i:0	L1:i:135	L2:i:547
L	s1481409	+	s2459456	+	0M	SR:i:15	L1:i:547	L2:i:25
L	s1481409	+	s1481410	+	0M	SR:i:0	L1:i:547	L2:i:38


S	s1481407	GTAGAACCTTGGACCATTTAAAATAAGTTCAGCATCACTGCGGTTTTAAGATCACTAAATACACCAAAAGCTATGTAAGTACATACTAGGATTTTTATAATCCAACTGTCTAAAGAACTTGAAAACAAACATAAAATAGTCTAAATTTATAAACAAAGCTATTTAAAAGGTATTTAAATGTAAAACCAAATATTAATCAAAACCTTATTAAAAACAATTTGCATTTGCTATTCCAAGACTGAAAAGATTCATGAACAGGACATAAAGACATAAAGATGAAAGATCTAAGAGACCATAACCCTTTTTCATTTTTTGACAAGTTTAAGGTTCTCTTTACAGAAACAGAATTTCAAATGTCTAATCCAAAACTAAAAAGAGAAGCAGTCTTTTGGTATGGAGACTACCCAAAAGGACACCAAATAGTGACCTATAAGTTGTTCTGCTTGATAGTCTGATCACATAGCCTTTCATTAGGCCTAAATGTCTCACAGTGGTTTTTATTGTATTTTATTTCTCTAACAAAAATGATCTTTGAGAATGTAGTTCATCCTGCAAGCTTGACCAGATCAGGGCTTTCAGCAAAAAGACCAGAGAACCACTGAGTTTCCTACTGTAGTACTGATGGGCACTCAGAGCCACTTCCACACAGGGAAGGATATCTAATTTTGAATGTGTACTTAGACTATCGTGACCAGTAAGAACGGCCTCTAGCCTGTCTTCTTCTTCCATTCTTTGTAGTATTTATGGCCACTGGCTTATTGGTTTTGCAATTGTGTCTTTGGTTGCAACCTGGCATTCCATGCTCATATGTCTTTTGTGGACCTGCAGAGTAGCTGGCAAATGTCACAGGTTACAACTGATTGCTGCTTGCTTCCCAATGGAGGAGGCTGATTGATGACTCCCAAGACAAAATATCTTGTTTTATTTTGTGTTGTCCAAAACCTAATCCTATGTTGGAGATGCTAAGAAAAAAAAATGTTCTATGACGTATTTACTGTCGTCGTAACAGACATGCTTACATCACACTGAAGCAACCGTGGCCTCCTAATGCACCCTCAGACAGTTTAAACCTGTAGTGGTTTCAGCAAAATGACCAAATCAATGCTATGAAGACATTTTATCAGTAGGAATCTGAAATGTTTTTGAAACCCTTCCCATGTAGTTTATTACAAAGGACAGACTTTTCTCAGAGATTAGTATTTACTTTTAAAAGGAATGATTTCTTAAATTATAGTTTTGAAACATTAATAACTGCCAGTGGTCCCCAACAGGCTTCTCTTTATTGAAGAGCTGGAAACTTTGAGCCCACTAGACTTCCTTGATCTTAGTCCATATGAATTGTTTAGTTTTATTTGGTGCCACAGGGACTCTTCCATACTTATCAGAAGCTTCAGTTTAATGCAGAAAAAAGAGAGGCTCTGACATGTCAATATAATACTGTTAATGTTTTCCACGTTTGCAATCCCAGGGTCAAAGTCAAACCTCTTATTGTATTAATAATAGCAAAGCAAACATAAGCTTTGTAGTTGTGAATTCATGGTTTTAAGGAATAATTTCCCTACGAGTTTATTAATTCAAGCCATTCATTGACCCACTCGGAATTCTGTTATGTGTTCAGTTGATTTATCTCATGTAGTACCTTCAACTCTGAAAGACCATGAGAATGGATACCTGGATGATGCCTTACAGTCACCAACATGGACCATTGCCTTGTCTAGAAGTTGTGTCAATTTGTAATATTGAAACTCATGTTCAGTAAATGTTAGAACCTAAAAGTGAAATCATGAAGTCCCCTTCGCTCTCTGCATCCAGAAAATGCAGATTTGGCAGTGTGGTTCAGATGCTGGGAATTACAAAGCATGGGTGCTTGTCTCTGGATTCTGTTAGCCTCTGACTCATAGACTAATAATAATCAGAATTTTAATGAGACAGTCACCAAAGAAACCCCAGACAGCCCCAGCTCAGCAAGTCTTCTTTCTTTGAGTTTATATGGAACCTGTGTCTCACTCAGCACTGTCTAACAGTAAAGTTAAACCAAATGCTCCAGGGGCATTTTCTCAGCCCATGCTCTTGCTCAGGATGAAGGTGGCATTGACAATGTGTGACAATGCCTCTCTTTCTACCTGTAGGTCTTGGTGCTCTACCATCCCCCTCCCCACATCCTTTTAAACTTCAAATTTGTTCTTGATGGGTGGAAGGGGAAATTCACAGTGCAGCTAAAGATTATCTGAGCTCCATGCTCTACAGGACAAATTGGAACTAAGTGGGTGCCTCACAGAGGAACAAGCCTGTCTTCTCTGCTCTCCTATATGTGTGGCTGGGGATGCATCTCAGTTGGTACAGTTCTTGTCTAGCATGTACAAAGCCCTGGATCTATTCCCCAGCACCACCTAAACTAGCCATGGTGGTGCACAGCCCTTCGGGGGTACAGGCAGTAGGATTAGAAATTTATGGTCATCCCGGAATTTGTAGGTATATATAGTGAGTCTGAGGCCAGCCTAGTCTATATGAGACCCTGCCTCAAGAGGAAAAGTAGGGCTGTAAGGAACAAGGAAGGGAGGAGGAGGGGAAGGGAGGCAGGGAAGCAAGGAGAGAGGGAGGAATGGAGGGTTAGAGGTCGGAAGGAAGAGAGGAAGAAAGGTCAACAGGCAGGCCAGTCTAATTCTCTCTATTAGAGGTCACTCACAGACATGCACCATAAAATAGCATTATAACATTAGATTTTAATACTAGGTCCAGTTAAATCCAGATGGTGGTTTTGCATTAGTTAGCAGAGACAGGGAGGGTCTTTCCTCCTCTCCTTATTTGATAGGGAAGTTATAGGACACACAGAGACTGACATGTGTATATGTGTGAGTGTGCACACTCCATGTTTACAAGGTCACGCGCCCGACTGGAAGGACCAAGGAAATGATGTGTTTCTGTGTTTGCCGCACTGTTCTCTAGAGATATCAATAGGTACACTGCAGTTAAGTGAGTTAGGAAACACTGGGCTAACTGGAGCACACAGTGCTGCTCTCCTGAGTCACAACAGGACTCCAGAGTCCTCAGTAGTAGGCAGCTGGAGTCCATTCCGACTCTACCTGACTGAGGGCGTAGTGCTGGGCAGGTCACTCACATTTTCTACAGTTCAATTTTCTCATCTGCAAGATGAAGCCAGAGCACATCAGCTCTCACAGTAATTATGATAATCTTTAGGACAGAAACCAAATAGGTACCTTGTAGGGATGCATATATGTTTGTACAGATGGCCACTGTCATCCTTTTTAATTTAAAGCTTTTTGAGAATTTTGTATATGAGAACCACATTTCTATCATTTCCACTCCCCCTCTCCTTCTAATTCCTTCCATGTCTCCCTACCCTGTCTCAACTTACTAACCTCCTCCTCTCTCATTTTTATCATTATCTGTGTGTAAATGTGTCTGTGTGAGAGAGATGTGTATATGTGTGAGTGTGTACATGAGTGTGTGCGTGAGTGTATCAGTGTATGT	LN:i:3492	SN:Z:19	SO:i:59968298	SR:i:0
L	s1481406	+	s1481407	+	0M	SR:i:0	L1:i:1703	L2:i:3492
L	s1481407	+	s1481409	+	0M	SR:i:1	L1:i:3492	L2:i:547
L	s1481407	+	s1481408	+	0M	SR:i:0	L1:i:3492	L2:i:135
L	s2459455	+	s1481407	+	0M	SR:i:15	L1:i:6499	L2:i:3492


S	s1481408	ATGAGTGTGTATGTATGTGCACAAGTATGTGTATATGAGTGTGTAAGTGTGTGTGAGTGTATGTATGAGTGCATGTGTGTGTGAGTATGTGTGTGTATAAGTGTATGAATGTAGCAGTGTGTGAGTATACGTTTALN:i:135	SN:Z:19	SO:i:59971790	SR:i:0
L	s1481407	+	s1481408	+	0M	SR:i:0	L1:i:3492	L2:i:135
L	s1481408	+	s1481409	+	0M	SR:i:0	L1:i:135	L2:i:547

```
### SV Bubble Data:

AproxCtgEnd - AproxCtgEnd ->  57226895 - 57226887 = 8

End - Pos -> 59971925 - 59971790 = 135

### Expected output:
135 bp DEL

## Additional comments:
For *:0 length events, the final SV size can be either queried from the graph (missing segment) or subtracting Pos from End

In [78]:
data.loc[data['Pos']==59971790]

Unnamed: 0,Chr,Pos,End,F1,F2,AligPath,PathLength,MappingStrandSamplCtg,Ctg1_Strain,Ctg2_Sq,Ctg3_Chrom,AproxCtgStart,AproxCtgEnd
16678,19,59971790,59971925,>s1481407,>s1481409,*,0,+,DBA_2J,1,chr19,57226887,57226895


## Sample 3:
---------------

### Validated Data
Type: DEL H1
Coordinates: 19:60867244-60868908
Size: 1665

### Graph Segments
```
S	s1481894	TAGCAATGTGGAAAGTGTAAGCTGAATAAACCCTTTCCTCCCCAACTTGCTTCTTGGTCATGATGTTTTGTGCAAGAATAGAAACCCTGACTAAGACAACTGGTATCCTAAACACCCAATATAGTCACTGGACAGCGGCTAAGACTTTCTGTTGGCTTAAGCACATGTCTAAGCAGTCTTCTCTGGTGAATACCCAACAACAGAACCCTAAAGCAGACAAGGCCAACATGAAGCAGCATATGAACTATACCATCCAGAATTCTGTCATCTTTCCAGTTCCTTTCAAGTCCCTTAGAGGCCATTTCTTTGTGCAGCAATCCCTCTCCCCACAATGAAGGGTAGATTCAGCTCCAAACTTTAGTTAAAAATAAGTTAGCTTGATTTTCAAATAACTAAAAGCTCTTTAATTTTTCCTTTTCAAGCAAAGGAACCTCAGATGAGTCACCTTCAGATGTGCTTTCTTCAGTTCCTACTATGGAGGTGGGATGAGGGACCATGGAAAGGGAAATTGAACTAAATAGCTGATTCCTCTTGGCTATCTACATTTCAGAATTATTGGTGCCTAGTCTCAAAGCACACAACAGATACAAAACAACCTTACTGGTTTCAAAGACTTTTTTATGGTCTCCATTCTGTTACTCTGTAAATCATTTAGTGAAGGCCTTGAATTTTGTGAAGAAAAAACTTGCTCTGGAAGGCTCAAGATACTGGGGTCAGAGGTGGAAAATTTGAGTATAGGTGAGATAAGATTTTCTAAGAATGAAAGTGCAGGATTTTTGTTGTTGTTGTTGTTGTTGTTTTTTGTTCCTTCTTTGATCCCAACTGGGAAAAAGGACATGTCTCAGAAAGAAGGGAGAATGAGAGAGATGTTAGAGGAGAGATTACAGAGTAGCCCCTCCAAGGCAATGGGGTAGCAAGGGTTCTAACCTGCAACTATTTTTTTGGTGGTTGTGGGGGACAACAGGGTCTTCTATAGCCCAGGCTGGCCTCAAACTCACCTTGAACTCCTTCTGGATCCTCCACACCCAGTTTATGTTGTACTGGGGATTGTATCAAGGGTTCATATATGTTAGGTAAGTACTTACTAACTGAGCTGCATGCCCTGTTCCTAATGTGGGGTTTCTCTCTGTCTTTCTCTGTCTCTCTGTCTTTCTCTGTTTCTCTCTGTGTCTCTCTCTCTCTCTTTCTCTGTCTCTCTCTGTGTGTCTCTCTGTCTCTGTCTCTGTCTCTTCTCTGGCCTGATCCTTCTGTGTGACAACCTAGAACTCACTTTGCAGATCAGGCTGGCCTCAGCCTCTGCCTTCCAAGCGCTGGAATTAAAGGAGTGCACCATAATGGGCAATTTCTTAATGGAAAACACTTACAATGATTTAATATTCCTCTCCTTCATGGAGTCTCACCCCACCCATCTACATGGTAGTTTGTGACTGGTCGCCCCAGATCCTCATTCCATTCTGCATGCTGGGGTTGAGTTCTTTGGGTAATTGAATTGTCCTTCTCCGGTCTCTCTTTCCCTAATTACTGATCTGTTCCTATGGAGTCCAGGGTTCCTGCCATATTCTTGCTTTTTAGGAAAATCCAGCCCCTATAATTCAAGAACACCATGCAGTCATGTGGTCTTGGAACCCCAAATCTACAGCTTTCTGATTTCTCTGGTCTGTCATTTCATGATCTATAAAGTTACA	LN:i:1685	SN:Z:19	SO:i:60867229	SR:i:0
L	s1481893	+	s1481894	+	0M	SR:i:0	L1:i:1142	L2:i:1685
L	s1481894	+	s1481895	+	0M	SR:i:0	L1:i:1685	L2:i:797



S	s1636428	CAGCAGTATGGAAGTGTAACCTGAATAAACCCTTTCCTCCCCAACTTGCTTCTTGGTCATGATGTTTGTGCAGGAATAGAAACCCTGACTAAGACAATGACATAATAGTTCCTACCTC	LN:i:118	SN:Z:AKR_J#1#chr19	SO:i:59397716	SR:i:2
L	s1481893	+	s1636428	+	0M	SR:i:2	L1:i:1142	L2:i:118
L	s1636428	+	s1481895	+	0M	SR:i:2	L1:i:118	L2:i:797

1685 - 118 = 1567

```
### SV Bubble Data:

AproxCtgEnd - AproxCtgEnd ->  58153459 - 58153250 = 209

End - Pos -> 60868914 - 60867229 = 1685

FINAL FORMULA = End - Pos - PathLength

### Expected output:
1567 bp DEL


In [79]:
data.loc[data['Pos']==60867229]

Unnamed: 0,Chr,Pos,End,F1,F2,AligPath,PathLength,MappingStrandSamplCtg,Ctg1_Strain,Ctg2_Sq,Ctg3_Chrom,AproxCtgStart,AproxCtgEnd
16912,19,60867229,60868914,>s1481893,>s1481895,>s1636428,118,+,DBA_2J,1,chr19,58153250,58153459


## Sample 4:
---------------

### Validated Data
Type: INS H6
Coordinates: 19:38036364-38036364
Size: 178

### Graph Segments
```
S	s1574900	GGGCTGGTGAGATGGCTCAGTGGGTAAGAGCACTGACTGCTCTTCCGAAGGTTCAGAGTTCAAATCCCAGCAACCACATGGTGGCTCACAACCATCCGTAAAAAGATCTGACTCCCTCTTCTGGAGTGTCTGAAGACAGCTACAGTGTACTTACATATAATAAATAAATGAATCTTTAAAAAAAAAAGAAAAGAAAATGCAATTG	LN:i:205	SN:Z:129S1_SvImJ#1#chr19	SO:i:34830030	SR:i:1
L	s1466176	+	s1574900	+	0M	SR:i:1	L1:i:1313	L2:i:205
L	s1574900	+	s1466177	+	0M	SR:i:1	L1:i:205	L2:i:89

S	s1466177	ATTGCACACGTGGGTATGCATGCATGCATGCGTGCGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGCGCGTGAGT	LN:i:89	SN:Z:19	SO:i:38036376	SR:i:0
L	s1466176	+	s1466177	+	0M	SR:i:0	L1:i:1313	L2:i:89
L	s1466177	+	s1466178	+	0M	SR:i:0	L1:i:89	L2:i:128
L	s1574900	+	s1466177	+	0M	SR:i:1	L1:i:205	L2:i:89
```
### SV Bubble Data:

AproxCtgEnd - AproxCtgEnd ->  35333742 - 35333444 = 298

End - Pos -> 38036465 - 38036376 = 89

### Expected output:
205 bp INS

## Additional comments:


In [80]:
data.loc[data['Pos']==38036376]

Unnamed: 0,Chr,Pos,End,F1,F2,AligPath,PathLength,MappingStrandSamplCtg,Ctg1_Strain,Ctg2_Sq,Ctg3_Chrom,AproxCtgStart,AproxCtgEnd
9954,19,38036376,38036465,>s1466176,>s1466178,>s1574900>s1466177,294,+,DBA_2J,1,chr19,35333444,35333742


## Sample 5:
---------------

### Validated Data
Type: INS H7
Coordinates: 19:23289094-23289103
Size: 373

### Graph Segments
```
REF
S	s1683058	ATACACACACACACACACACACACACACACACACACACACACACACACACTATATATATATATATATATATATATATATATATATATATATATATATATATA	LN:i:102	SN:Z:C3H_HeJ#1#chr19	SO:i:19894285	SR:i:5
L	s1456617	+	s1683058	+	0M	SR:i:5	L1:i:181	L2:i:102
L	s1683058	+	s1456619	+	0M	SR:i:5	L1:i:102	L2:i:3690

NEW PATH
S	s1683053	GAATTACCCCCTCTCTTACACGCGTTCTCGCGACCGGCCAGGAAAGACGCAACAAACCGGAATCTTCTGCGACAAAAGCTTTATTGCTTACATCTTCAGGAGCCAGAGAGCAAGAGAGCAAGAAAGCAAGAGAGCAAGAGCAAGAGAGCAAGAGAAAGAATGGCAAAACCCCGTCCCTTTTAAGGAGAATTATCCTCTGCCTAGGACGTGTCACTCCCTGATTAGCTGCAGCCCATCGGCCCAGTTGTCATCACGAGAAAGGCAGAACACGTGGCGGGAAAACTGCCCCTGCACGTGTGCAGATTATTTACTACTTAGAACACAGCTGTCAGCGCCATCTTATAATGGCAAATGTGAGGGCGGCTCCCCACA	LN:i:372	SN:Z:C3H_HeJ#1#chr19	SO:i:19866035	SR:i:5
L	s1456607	+	s1683053	+	0M	SR:i:5	L1:i:5464	L2:i:372
L	s1683053	+	s1456609	+	0M	SR:i:5	L1:i:372	L2:i:2889

```
### SV Bubble Data:

AproxCtgEnd - AproxCtgEnd ->  20527374 - 20526993 = 381

End - Pos -> 23289102 - 23289093 = 9

372-102 = 270

381-9 = 372

### Expected output:
372 bp INS

## Additional comments:


In [81]:
data.loc[data['Pos']==23289093]

Unnamed: 0,Chr,Pos,End,F1,F2,AligPath,PathLength,MappingStrandSamplCtg,Ctg1_Strain,Ctg2_Sq,Ctg3_Chrom,AproxCtgStart,AproxCtgEnd
5598,19,23289093,23289102,>s1456607,>s1456609,>s1683053,372,+,DBA_2J,1,chr19,20526993,20527374


## Sample 6:
--------------

### Validated Data
None

### Graph Segments
```
REF
S	s1462593	GAGAAAAGAGACTCCCTTATTTAGTCAGGGTTTGGCACACTGCTGGCTAAGAACATGTTACTACCCTACACGGGCACACGACTTAGTCAGACGACCTTGTTAAGTTCTCAAGAGAAGGAGTCTGGGAAGAGTGACAGCCCAGGACCGACATGGCATGGAATCATGAGAATCCAAACCTAGTGGGTAGACACTCTAAGGAAAGGGTGACATTAGAAGGCTTGCTTTGTGTTTCTGGATTTTGTTTGTTTCTTCAATCCAGGGGAAAATCATTCCGTGAGGCTTCCCTGAGCAGATACCCCAGTCCATCCCTCAGCTCTTTTCCTAAGAGGATTCCTTTGGAGTCTCACAGGCCTATGAAATTCTCAGGGTACATCTCTTTGAGGTCCAGCCCTGTTCAAGTTATAAACAGCATTCTCTTTGTTGGCTCTCGGCTAAGAACTTCCTATCTCTGACTTTCCCATGGACATAACGCAAATGTATCTGATTGCTCTTTCTGCTGAGATATCTGCTCCTTAGAGCACAAGCTAAGGCTTTAAGAAGTATCTCTCTCAGGTCCCTGGACTGATTATCCAGCAGCTCAGCACTTTAGAATCCACACTGAGTTCTCACACATGAGCCTTAGCTCACACATGTAATCCCAGCATTTGAGAGGCAGGAGGATTGTCATGAGTTCAAGGCCAGTCTCGACTATACTGCACTTTCCAGGACAACTAGGATATAGTAGTCAGAAAGACCCTGTCTCAAAAAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAAAAGACAATATACAACATACAAAAATCACAATTAATGGAAGTTAACTATAAGAGATCATTTCAGTATAGCAGTGGTAATTTAGAATTTATAGAAACTTTAAAACTACCACAGCAACTAAAATGTTCAAGATATTGGATCCATGCTATGATCCGAAGAACCAAACCAAAGTATATTTCATACACTTCCAAGAATGTTTAGCAATAAAAAAACCACACTAAAAATTCTGCAACTCATTAGTCATGAATTGCTTTGATCAGGGATTTCTGTAGCTATTGGTAGTTTGTACAGTTTAGATGAGAAGTGACAGGGCCTGCTTGCTCATGTGTGAGTCACTCACAGAACAGAGCCCGACATGTCCTTTGTGACTAGGCTCCTGATGATGGTCTTCTCAATATTTCTTTTTTATTTTTAAAGATTTATTGATTATATGTAAGTACACTATACCTGTCTTCAGACACTCCAGAAGAGGGCGCCAGATCTCGTTACGGATGGTTGTGAGCCTTCATGTGGTTGCTGGGATTTGAACTCAGGACCTTTGGAAGAACAGTCGGTGCTCTTAACCGCTGAGCCATCTCTCCAGCTCCCTTTTTTTTTTTTTTTTCTTCTCAGTATTTCTACATTATAACAAAAGGTAACAGTATAGGGATGGCCATCATATTCCTGTGTTTAGGACTGGATAGGAAGGGCACTCCTGGGGTAGACAAGCAAGGCAGGTGAAGCCAGCCCAGCTTGGAGGGGAGTCTTCTTGGATTCACCACAGCCCAAGGTGGGGCCCTAATCATCTGTGCTAAAACACAGCCCAAGTAGCACTGAGGTAAAAATGGCAACTGAATAATGTGAGAACAGTAAACTATAATGCAGGTCATGATGGTACCTGCTGACTGGAGAGGGCGACAAGCTCCACACACTTGGCATGGAGCAGCCATCCATGATGGCGTCTACCTGAATGGCCAGACATGGCACCATGTGAGGTTAAGATACACCCTGTGGTGTTCAGGGACAGCAGCAAGCCCACACTCCAAGTCAGGCTTTGTACGTAGGCGATCTGAACTCCAAAGCATACGGACAAGCCCACCCTCAGTCAGGCTGATAAAGGCCTGATAGTGCATCCTAGGACAAGGTCATTTCTCTCTGGACTGGCCAGACTGGCCTCTGAAAATTATATTCCTGGGGTTGAAGAAATAGTCTGACAGCTAACAGTATTATATGCTCTTCCAGAGGGCCTGGTTTAGTTATCAGCACCCACACTGCAGCTTACAACCATTCCTAACTCCAGTTCCAGGGTATATGACCCCTCTTCTGGTTCCTTGGGCATTGTACACATGTGATGCATATATGCAGACAAAACATTCATACACATAAGATAATAAAATAAAAAATTGTTAAATTGGTCTTCTACCTCAGGCTCCACATCCTGATTCTTTTCATTTCTATTAGCAGCTAGTTCCCTTCAGAGCCACGTGCCATTGGATTTCCACTTAACTCCACAGCTTCTCTGTGACCTATTCATTGCAGGCTTATTTTTACATTCTTTCAGCTAGCTGGTGATACTGCCCAAGCAAAAACAGCTATTCCAATTTCACAAACACAGCAAGCATTCAACTGTGAAGCCGGGATTTCTCAACCCTGTATTACTGACAGTTTGGACAGGGCAAGTCCTTGTCATGGGACCATCCTGTGCCTGTAAGATGGATAGCAGCATGCTTGGCTCCACTTATGTCCAGATGTTATATGTCCTCTAGAAGGTAACACTGGTCTCTAAGGAGAACTACAGTCAGTTGACCAAATTGCACAAAGAAAATCTGTACTAAATTCAGGAACTAGTCTGCTACCCCCTAATTCAGCACATATTCCTCTTAAAGCCAAGCCACAGCTCTTCACTGGCAAACACCAGAGGGCACATGCAGAATTTGGTGTAGGCTATCTGCATCATTCTACTAAACAAGCAACATCTTATCTAGCTTCTTGACAGTGAGCAAACTCAGTATCTTTGACTCGCAGCTCAAATGATTACTGCACTGTGCACACTGGATGTAGATTTCAAGGCAAACCCAGGACAAGCTACCTTTTGCTCAAGGAGTCCTGTGAAAGCAAAAGAGGCCACAGCAACAGCGAATCACCAAGGGTAGGGAATGACGTCTGCTACTGCTCTGGAGCACTTCTCCAGAGAATTTTCAACGTTAAATCCTTCATGTCGTCCACCTCTAATGTGATAAGTATTCTTCTATTTCTGTTTTGTGACTAAGGAAATGGAGGCCCAAGGTGTTCAATAATAAGTCATCCAAGACCTGGTTCCCTGGTCCAGAGTCCCAGTTCTTTGCTGTTAGCTGCAGTGAATCCAAGAACTACGGAAACAGCATCTTCTAGATACTCCACCTCTAGCAATCCGAGCTCCCCAGATTTATCAAATCATGTCCATCAGGATTGCTGTTTCAGTCTAACACTACTGTCCTCTCGGCCAGCTCACATATCCATAGCGCTATCCAACCCACATTTTCCCTCTTGGATAGAGAACACCCAGCACTTTAAATGGCGTACTAAGGCTGACACGCCACACCTATGTCACTGTCCTGAAATACTTGCCTGACGCCTCCATTAAGAAAGAAGTGAGGTAATGAACACAAGTTCCTTAGCAAGCACATGTGCAATACTGAGATGTTGAGGGTCACACCATGAAGCCCCGGCTGGCCCTGAGCTCCAGATGTGCTTGCTTCTGCTTTGTAAGGAGTAAGATTACAGGCATGGGCTAACACCCCAGCTCTCATCGCTGGCTTTCTATCTTTTCTTTCTATCCTCACCCATCTTTTCACAAACATATTTTACAATTCTGCTAGACAACGCCACACACTCTGGTCCAATGCCTTTAGAACTGACCATCTACCTGCTTTTGAAAACCTACTTTCTGACATCCTTCTTTCCTGTGATTGTTCTAAGATTATTCCCATGCAATTAGACCATCACACTAGCATGTTCTGTTTGTGAGTTCTTTTCTTGTGTGTATTTTTGGTTTTGGTTTTCTTTTTTGCTGTTGCTTGTTTTGGTGCAAGGAGGTGACTGAGATCCTCATGGGCGCTCTGAAAATGCTCTATCACTGAGCTACACCCTCAAGCCCTGGTATTCCCTTTCAACATCCTGGAGCATAATTGTCACTTCAGATAGCTAGGATGCAGAGATCTGAGGGGGTGGCCAAGGCATTCTTGGCTGGAGAGGTGGGGAGTTCCAAGTAAGACAGATGGAAAAGGCGAAGAGGTGGGGTTGAGTTGGACTAGGTGCCCCAGTGGGTCTTCCAGGCAGGTGGCTTTGCTTTCCTGCATCACGGTGTGCTTGCAGAAGGCCACCTCTGCTGAGAGATGCTGCACCTTGCCCATCTTCTAACTTTAAATCTTATCCACTCCCATGTCCTTTCCCTCATTATTCCCCCTTTAAAGTTATCTGTATTGTTCTTAATTATGTATACAGGGGCATAAATGCAGATGCCCTTGGAAGCTAGGGGCACCGTATTTCCCAGGTGATGGAGTTACAGATGGTTGTAAGCTGCCTGGCATGGGTGCTGGAAACGAACTCAGGAACTCTACAAACACAGTGTGTACTCTCAGCCAATGAGTATGCATGGAGCCATCATTCTGGCCCCTACCTCATCATTTGAAAAAGCAAGTTTAACACTAAATGAAGGACCAACCTAACTTGAAAACTATTCCCACCAGGCATGTCAAACGTTGCAGGTGTGCTCTTATGAAAGGGCATTTCCTGGAAAGCCCAGGATCCAGTGTTTGGCTTCACACTTGGAAAGACCCAGTGTGTACTTCCTGAGGAGCGGGAAGCACAAATTCAGAGCTAAAGAGCTTCCGGAAAAAAAGTCAACAAAGATCAATGGCCCCGAGGCAGCCACGTGTATCAAAATACCCATCAAGAGTCTCCTGCTTGCTCAGTTAATATTTGAAAACCTCAAGCTTCCTGAGTAAATATATTGTTCTGTGAACTTTGAGACTCAGGCATGGAGGCACCAAGGACCTTTTAGATCGCTTGGAAAATCTGTTTCAAACCAAAATTATCCTGGGGATCATTTCACATAAGGAAATCTGGCACTGTTTGACAGCTATAAGAACCACAGGGGGAAATGACAATGCTTGATGGTGACTTCTAACAAAGAGAGTGGACCATCCAAATCTTTAAATACAAGGCGGGGCAATTCACCTCCCTCTGAGCCACTGAGAAACATCTCAACAGCTGAGATGAAGGATCACCCCTTGGTGTCCAGGCTTTGTAACACCAATAGATGAGATGGACCATCTTGATTTAATTATGCCTTTAGATTTTAGATGTTTAAAACTCAACAAGTTATTTTCCCATTTCCCATTCACCACCTAAGGGGGCCTTCCAACCGCCAGCGGTGTCCTTTGGGCTACCTCAGAGGCCGTTGTTCCTCAAGCCGTCCAAGTGTTACCTTTGACCCAGACACTGGATCAGTGTAAAGCAGCACAGCTGGCTTAACAATTAGCCCAGGAGCCTGCAGAAGGCGCTGGGTGCTCAGAGGCTAACTAATGTCTCCAATCATCAACAAGTAGTCTTAAATATTCCACTTAGAAGTGATTTCTTTCTAGAGAAGTCTCCTTTAGAGACCTAAACAAGGTCAGTTGCATAGAATGATACTTTACTCTCCCTCCACTGACTGAAAGCATCTGTCTATACCTTGTAAAAGATGCAGGAGTGCATAGATAATGTC	LN:i:5531	SN:Z:19	SO:i:32175046	SR:i:0
L	s1462592	+	s1462593	+	0M	SR:i:0	L1:i:29	L2:i:5531
L	s1462593	+	s1462594	+	0M	SR:i:0	L1:i:5531	L2:i:6400
L	s1462593	+	s1462595	+	0M	SR:i:1	L1:i:5531	L2:i:2194
L	s1983135	+	s1462593	+	0M	SR:i:11	L1:i:54	L2:i:5531


```
### SV Bubble Data:

AproxCtgEnd - AproxCtgEnd ->  29444711 - 29444702 = 9

End - Pos -> 32186977 - 32180577 = 6400


### Expected output:
6400 bp DEL

## Additional comments:


In [5]:
data.loc[data['Pos']==32180577]

Unnamed: 0,Chr,Pos,End,F1,F2,AligPath,PathLength,MappingStrandSamplCtg,Ctg1_Strain,Ctg2_Sq,Ctg3_Chrom,AproxCtgStart,AproxCtgEnd
8321,19,32180577,32186977,>s1462593,>s1462595,*,0,+,DBA_2J,1,chr19,29444702,29444711
