In [1]:
import pandas as pd
import numpy as np
import scipy.stats as st
import seaborn as sns
import sys
import os
import matplotlib.pyplot as plt
import swan_vis as swan
import yaml
from snakemake.io import expand
import cerberus
import pyranges as pr
import upsetplot

p = os.path.dirname(os.getcwd())
sys.path.append(p)

from scripts.utils import *
from scripts.vcf_utils import *
from scripts.plotting import *

In [2]:
config = load_config()
od = '../'

def proc_cfg(entry, od):
    entry = entry.replace('../../', '')
    entry = od+entry
    return entry

## We want to get the dist. of distances between novel SSs and annotated SSs.

Hopefully they are generally close


In [28]:
df = load_meta()
df = df.loc[~df['sample'].str.contains('_')]
df['lab_sample'] = df['lab_number_sample'].astype(str)+'_'+\
                      df['lab_sampleid'].astype(str)+'_'+\
                      df['cell_line_id'].astype(str)
df.columns
df = df[['cell_line_id', 'sample', 'hapmap_DNA_ID',
          'map_reads_assemblymap','lab_sample', 'population']].drop_duplicates()

temp_df = pd.read_csv('../snakemake/transcript_discovery_personal/cell_line_ids.txt', header=None, names=['cell_line_id'])

# make a 1000g cell line id col
df['cell_line_id_1000g'] = df.cell_line_id

inds = df.loc[~df.cell_line_id_1000g.isin(temp_df.cell_line_id.tolist())].index
df.loc[inds, 'cell_line_id_1000g'] = df.loc[inds, 'hapmap_DNA_ID']
len(df.index)

# limit to just those in 1000g
df = df.loc[df.cell_line_id_1000g.isin(temp_df.cell_line_id.tolist())]
assert len(df.index) == 30

# TODO bad sample that hasn't finished on espresso
# bad_samples = ['NA19328']
# df = df.loc[~df.cell_line_id_1000g.isin(bad_samples)]

hap = ['hap1', 'hap2']

meta_df = df.copy(deep=True)

In [70]:
file = proc_cfg(config['lr']['td_personal']['sqanti']['sj_summary'],od)
df = pd.read_csv(file)
df.drop('Unnamed: 0', axis=1, inplace=True)
df['detected'] = True

# 1.5 only novel sss
temp = df.loc[(df.start_site_novelty=='novel')|(df.end_site_category=='novel')]

# only keep unique novel sjs
temp[['Chromosome', 'Strand', 'Start', 'End']] = temp.sj_id.str.split('_', expand=True)
temp = temp[['Chromosome', 'Strand', 'Start', 'End', 'sj_id', 
             'start_site_novelty', 'end_site_category', 'cell_line_id']].drop_duplicates()

temp.Start = temp.Start.astype(int)
temp.End = temp.End.astype(int)
assert len(temp.loc[temp.Start>temp.End])==0

# melt to 5' and 3'
temp = temp.melt(id_vars=['Chromosome', 'Strand', 'sj_id',
                          'start_site_novelty', 'end_site_category', 'cell_line_id'],
                 value_vars=['Start', 'End'])
temp['sj_loc'] = ''


temp.loc[temp.variable=='Start', 'sj_loc'] = 'start'
temp.loc[temp.variable=='End', 'sj_loc'] = 'end'

temp['novelty_category'] = ''
temp.loc[temp.variable=='Start', 'novelty_category'] = temp.loc[temp.variable=='Start', 'start_site_novelty']
temp.loc[temp.variable=='End', 'novelty_category'] = temp.loc[temp.variable=='End', 'end_site_category']

# only get the novel ones now
temp = temp.loc[temp.novelty_category=='novel']

temp.rename({'value':'Start'}, axis=1, inplace=True)
# need to verify that this is working using like one motif or something make
# sure I don't have off-by-one errors
# verified
temp['Start'] = temp.Start-2
temp['End'] = temp.Start+1

# verified
temp.loc[temp.sj_loc=='start', 'Start'] = temp.loc[temp.sj_loc=='start', 'Start']-0
temp.loc[temp.sj_loc=='start', 'End'] = temp.loc[temp.sj_loc=='start', 'End']+0


temp.loc[temp.sj_loc=='end', 'End'] = temp.loc[temp.sj_loc=='end', 'End']+2
temp.loc[temp.sj_loc=='end', 'Start'] = temp.loc[temp.sj_loc=='end', 'Start']+2


In [73]:
nov_ss_df = temp.copy(deep=True)
nov_ss_df.head()

Unnamed: 0,Chromosome,Strand,sj_id,start_site_novelty,end_site_category,cell_line_id,variable,Start,sj_loc,novelty_category,End
0,GL000195.1,-,GL000195.1_-_49120_86517,novel,known,NA12273,Start,49118,start,novel,49119
1,chr17,-,chr17_-_1821508_1827860,novel,known,NA12273,Start,1821506,start,novel,1821507
2,chr17,-,chr17_-_1821512_1827860,novel,known,NA12273,Start,1821510,start,novel,1821511
3,chr17,-,chr17_-_1823205_1827860,novel,known,NA12273,Start,1823203,start,novel,1823204
4,chr17,-,chr17_-_1825231_1827860,novel,known,NA12273,Start,1825229,start,novel,1825230


In [62]:
# temp = pr.PyRanges(temp)
# temp.to_bed('test_novel_ss.bed')

In [78]:
# get annotated (v47) sss
gtf_file = proc_cfg(config['ref']['gtf'], od)

# read in ref. gtf and get SJs from there
temp = pr.read_gtf(gtf_file)

# get the introns
temp = temp.features.introns()
temp = temp.df

temp.Start = temp.Start.astype(int)
temp.End = temp.End.astype(int)
assert len(temp.loc[temp.Start>temp.End])==0

# melt to 5' and 3'
temp = temp.melt(id_vars=['Chromosome', 'Strand'],
                 value_vars=['Start', 'End'])
temp['sj_loc'] = ''
temp.loc[temp.variable=='Start', 'sj_loc'] = 'start'
temp.loc[temp.variable=='End', 'sj_loc'] = 'end'

temp.rename({'value':'Start'}, axis=1, inplace=True)

temp['Start'] = temp.Start-2
temp['End'] = temp.Start+1

# need to verify
temp.loc[temp.sj_loc=='start', 'Start'] = temp.loc[temp.sj_loc=='start', 'Start']+1
temp.loc[temp.sj_loc=='start', 'End'] = temp.loc[temp.sj_loc=='start', 'End']+1


temp.loc[temp.sj_loc=='end', 'End'] = temp.loc[temp.sj_loc=='end', 'End']+2
temp.loc[temp.sj_loc=='end', 'Start'] = temp.loc[temp.sj_loc=='end', 'Start']+2


temp = pr.PyRanges(temp)



In [79]:
temp.to_bed('test_v47_ss.bed')


In [85]:
nov_ss_df = pr.PyRanges(nov_ss_df)

In [98]:
# get closest entry from novel sss to known sss
df = nov_ss_df.nearest(temp, strandedness=False)
df = df.df

In [99]:
# what % of these are w/i 12 bp of an annotated one
assert len(df.loc[df.Distance<0]) == 0 # no neg. dists
n_num = len(df.loc[df.Distance<=12].index)
n = len(df.index)
print(f'{(n_num/n)*100:.2f}% of novel personalized haplotype SJs are w/i 12bp of an annotated one')

8.17% of novel personalized haplotype SJs are w/i 12bp of an annotated one


In [93]:
# plot dist. 
sns.displot(df,
            x='Distance',
            kind='kde')
            

360601

In [97]:
df.loc[df.Distance<12]

Unnamed: 0,Chromosome,Strand,sj_id,start_site_novelty,end_site_category,cell_line_id,variable,Start,sj_loc,novelty_category,End,Strand_b,variable_b,Start_b,sj_loc_b,End_b,Distance
203,GL000194.1,-,GL000194.1_-_112851_114981,known,novel,NA12778,End,114981,end,novel,114982,-,End,114985,end,114986,4
204,GL000194.1,-,GL000194.1_-_112851_114981,known,novel,HG04216,End,114981,end,novel,114982,-,End,114985,end,114986,4
205,GL000194.1,-,GL000194.1_-_112851_114981,known,novel,HG01975,End,114981,end,novel,114982,-,End,114985,end,114986,4
206,GL000194.1,-,GL000194.1_-_112851_114981,known,novel,HG03729,End,114981,end,novel,114982,-,End,114985,end,114986,4
207,GL000194.1,-,GL000194.1_-_112851_114981,known,novel,HG04217,End,114981,end,novel,114982,-,End,114985,end,114986,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360421,chrY,+,chrY_+_20575888_20579616,known,novel,NA19307,End,20579616,end,novel,20579617,+,End,20579607,end,20579608,9
360424,chrY,+,chrY_+_20579692_20582591,known,novel,HG00621,End,20582591,end,novel,20582592,+,End,20582589,end,20582590,2
360425,chrY,+,chrY_+_20579692_20582591,known,novel,HG01952,End,20582591,end,novel,20582592,+,End,20582589,end,20582590,2
360426,chrY,+,chrY_+_20579692_20582591,known,novel,NA18486,End,20582591,end,novel,20582592,+,End,20582589,end,20582590,2


## Try specifically subsetting on the SJs that are exclusively found in hap1/hap2?

In [223]:
file = proc_cfg(config['lr']['td_personal']['sqanti']['sj_summary'],od)
df = pd.read_csv(file)
df.drop('Unnamed: 0', axis=1, inplace=True)

In [224]:
# dedupe w/ sqanti genome cause no one cares
df = df.drop(['sqanti_genome', 'splice_motif', 'canonical'], axis=1)
print(len(df.index))
df = df.drop_duplicates()
print(len(df.index))

24934518
8311506


In [225]:
# # try just w/ one cell line for now
# df = df.loc[df.cell_line_id=='NA12273']
# df_back = df.copy(deep=True)

In [275]:
df = df_back.copy(deep=True)
df.head()

Unnamed: 0,sj_id,splice_motif,canonical,junction_novelty,start_site_novelty,end_site_category,cell_line_id,map_genome
0,KI270741.1_+_44470_47849,GTAG,canonical,known,known,known,NA12273,hg38
1,KI270741.1_+_48008_52058,GTAG,canonical,known,known,known,NA12273,hg38
2,KI270741.1_+_52137_130639,GTAG,canonical,known,known,known,NA12273,hg38
3,KI270741.1_+_44470_47948,GTAG,canonical,known,known,known,NA12273,hg38
4,GL000224.1_+_102592_105978,GTAG,canonical,known,known,known,NA12273,hg38


In [276]:
# TODO put me in coach
# # 1. filter for sjs w/ at least one novel ss 
# df = df.loc[(df.start_site_novelty=='novel')|(df.end_site_category=='novel')]

In [277]:
# melt to 5' and 3'
df[['Chromosome', 'Strand', 'Start', 'End']] = df.sj_id.str.split('_', expand=True)
df.tail()

Unnamed: 0,sj_id,splice_motif,canonical,junction_novelty,start_site_novelty,end_site_category,cell_line_id,map_genome,Chromosome,Strand,Start,End
8784437,chr2_-_225040714_225041288,GTAG,canonical,novel,novel,novel,NA12273,hap1,chr2,-,225040714,225041288
8784718,chr2_-_231513633_231514081,ATAG,non_canonical,known,known,known,NA12273,hap1,chr2,-,231513633,231514081
8800830,chr10_-_132324374_132331650,GTAG,canonical,novel,known,novel,NA12273,hap1,chr10,-,132324374,132331650
8800831,chr10_-_132331529_132331650,GTAG,canonical,novel,novel,novel,NA12273,hap1,chr10,-,132331529,132331650
8802477,chr16_+_29482592_29486512,GTAG,canonical,novel,novel,known,NA12273,hap1,chr16,+,29482592,29486512


In [278]:
# # TODO remove
# df['ss_1_id'] = df.sj_id.str.rsplit('_', n=1, expand=True)[0]
# df['ss_2_id'] = df.sj_id.str.rsplit('_', n=2, expand=True)[0]+'_'+df.sj_id.str.rsplit('_', n=1, expand=True)[1]

# df['n_ss_1'] = df.groupby('ss_1_id')['sj_id'].transform('nunique')
# df['n_ss_2'] = df.groupby('ss_2_id')['sj_id'].transform('nunique')
# df.loc[df.n_ss_1>1].sort_values(by='ss_1_id').head()

In [279]:
df = df.melt(id_vars=['Chromosome', 'Strand', 'map_genome',
                      'start_site_novelty', 'end_site_category', 'cell_line_id'],
                 value_vars=['Start', 'End'])
df['sj_loc'] = ''
df['detected'] = True


df.loc[df.variable=='Start', 'sj_loc'] = 'start'
df.loc[df.variable=='End', 'sj_loc'] = 'end'

df['novelty_category'] = ''
df.loc[df.variable=='Start', 'novelty_category'] = df.loc[df.variable=='Start', 'start_site_novelty']
df.loc[df.variable=='End', 'novelty_category'] = df.loc[df.variable=='End', 'end_site_category']

# only get the novel ones now
df = df.loc[df.novelty_category=='novel']

df.rename({'value':'Start'}, axis=1, inplace=True)

# create id for each ss
df['ss_id'] = 'ss_'+\
                df.sj_loc+'_'+\
                df.Chromosome+'_'+\
                df.Strand+'_'+\
                df.Start.astype(str)

df = df[['ss_id', 'cell_line_id', 'map_genome', 'detected']]

# check for sss that originated from different sjs
assert len(df.loc[df.ss_id.duplicated()]) > 0

# then dedupe and pivot
df = df.drop_duplicates()
df = df.pivot(index=['ss_id', 'cell_line_id'], 
                columns=['map_genome'],
                values=['detected'])

# flatten
df.columns = df.columns.get_level_values(1)
df.columns.name = None

# reset index to make it a flat DataFrame
df = df.reset_index()

# fill missing values with False
df = df.fillna(False)

In [None]:
df.head()

In [269]:
df.loc[df.ss_id=='ss_start_chr17_+_5426072']

Unnamed: 0,ss_id,cell_line_id,hap1,hap2,hg38
5156,ss_start_chr17_+_5426072,NA12273,True,True,True


In [262]:
df['ss_count'] = df.groupby('ss_id')['map_genome'].transform('count')
df.loc[df.ss_count>3].head()
    

Unnamed: 0,Chromosome,Strand,map_genome,start_site_novelty,end_site_category,cell_line_id,variable,Start,sj_loc,detected,novelty_category,ss_id,ss_count
430,chr17,+,hg38,novel,known,NA12273,Start,5426072,start,True,novel,ss_start_chr17_+_5426072,6
431,chr17,+,hg38,novel,known,NA12273,Start,5426072,start,True,novel,ss_start_chr17_+_5426072,6
779,chr17,-,hg38,novel,known,NA12273,Start,8174262,start,True,novel,ss_start_chr17_-_8174262,6
781,chr17,-,hg38,novel,known,NA12273,Start,8174262,start,True,novel,ss_start_chr17_-_8174262,6
981,chr17,+,hg38,novel,novel,NA12273,Start,16381116,start,True,novel,ss_start_chr17_+_16381116,12


In [204]:
# # take max. of detection for the same ss id and map genome (in case one side of a splice junction was part of a different
# # sj in some other context)
# assert len(df.loc[df.ss_id.duplicated()]) > 0
# print(df.loc[df.ss_id.duplicated(keep=False), 'ss_id'].head())
# df = df.groupby(['ss_id', 'cell_line_id']).max().reset_index()
# assert len(df.index) == len(df.drop_duplicates().index)

AssertionError: 

In [192]:
# 1. get those that are detected uniquely in hap1/2 
# (ie those not in hg38)
df = df.loc[(df.hg38==False)&
                ((df.hap1+df.hap2)>0)]

In [197]:
# now check one of these guys in the og df
df.head()
df.loc[df.ss_id.str.contains('97738675')]

Unnamed: 0,ss_id,cell_line_id,hap1,hap2,hg38


In [196]:
df_back.loc[df_back.sj_id.str.contains('97742567')]

Unnamed: 0,sj_id,splice_motif,canonical,junction_novelty,start_site_novelty,end_site_category,cell_line_id,map_genome
8361277,chr10_+_97738675_97742567,GTAG,canonical,novel,known,novel,NA12273,hap2
8625246,chr10_+_97738675_97742567,GTAG,canonical,novel,known,novel,NA12273,hap1


In [104]:
# 1.5 only novel sss
temp = df.loc[(df.start_site_novelty=='novel')|(df.end_site_category=='novel')]

In [105]:
# temp_back = temp.copy(deep=True)

In [150]:
temp = temp_back.copy(deep=True)

In [151]:
temp[['Chromosome', 'Strand', 'Start', 'End']] = temp.sj_id.str.split('_', expand=True)

temp.Start = temp.Start.astype(int)
temp.End = temp.End.astype(int)
assert len(temp.loc[temp.Start>temp.End])==0

# melt to 5' and 3'
temp = temp.melt(id_vars=['Chromosome', 'Strand', 'sj_id',
                          'start_site_novelty', 'end_site_category', 'cell_line_id',
                          'map_genome', 'sqanti_genome'],
                 value_vars=['Start', 'End'])
temp['sj_loc'] = ''
temp['detected'] = True

In [152]:
temp.loc[temp.variable=='Start', 'sj_loc'] = 'start'
temp.loc[temp.variable=='End', 'sj_loc'] = 'end'

temp['novelty_category'] = ''
temp.loc[temp.variable=='Start', 'novelty_category'] = temp.loc[temp.variable=='Start', 'start_site_novelty']
temp.loc[temp.variable=='End', 'novelty_category'] = temp.loc[temp.variable=='End', 'end_site_category']

# only get the novel ones now
temp = temp.loc[temp.novelty_category=='novel']

temp.rename({'value':'Start'}, axis=1, inplace=True)

In [153]:
# create id for each ss
temp['ss_id'] = 'ss_'+\
                temp.sj_loc+'_'+\
                temp.Chromosome+'_'+\
                temp.Strand+'_'+\
                temp.Start.astype(str)

In [155]:

temp = temp[['ss_id', 'sj_id', 'cell_line_id', 'map_genome', 'detected']]
temp = temp.drop_duplicates()
temp = temp.pivot(index=['ss_id', 'cell_line_id', 'sj_id'], 
                columns=['map_genome'],
                values=['detected'])

# flatten
temp.columns = temp.columns.get_level_values(1)
temp.columns.name = None

# reset index to make it a flat DataFrame
temp = temp.reset_index()

# fill missing values with False
temp = temp.fillna(False)


In [160]:
len(temp.loc[temp[['hap1', 'hap2', 'hg38']].sum(axis=1)>1])

8333

In [158]:
# kept in sj_id just to do some checking back in parent dataframe to make sure I'm not
# doing anything insane
temp_back.loc[(temp_back.sj_id=='KI270726.1_+_26127_26229')&(temp_back.cell_line_id=='HG02261')]

Unnamed: 0,sj_id,splice_motif,canonical,junction_novelty,start_site_novelty,end_site_category,cell_line_id,map_genome,sqanti_genome,detected
21436766,KI270726.1_+_26127_26229,GTAG,canonical,novel,novel,novel,HG02261,hap1,hg38,True
21540547,KI270726.1_+_26127_26229,GTAG,canonical,novel,novel,novel,HG02261,hap1,hap2,True
21644328,KI270726.1_+_26127_26229,GTAG,canonical,novel,novel,novel,HG02261,hap1,hap1,True


In [156]:
# 1. get those that are detected uniquely in hap1/2 
# (ie those not in hg38)
temp = temp.loc[(temp.hg38==False)&
                ((temp.hap1+temp.hap2)>0)]

In [None]:

# need to verify that this is working using like one motif or something make
# sure I don't have off-by-one errors
# verified
temp['Start'] = temp.Start-2
temp['End'] = temp.Start+1

# verified
temp.loc[temp.sj_loc=='start', 'Start'] = temp.loc[temp.sj_loc=='start', 'Start']-0
temp.loc[temp.sj_loc=='start', 'End'] = temp.loc[temp.sj_loc=='start', 'End']+0


temp.loc[temp.sj_loc=='end', 'End'] = temp.loc[temp.sj_loc=='end', 'End']+2
temp.loc[temp.sj_loc=='end', 'Start'] = temp.loc[temp.sj_loc=='end', 'Start']+2


In [102]:
file = proc_cfg(config['lr']['td_personal']['sqanti']['sj_summary'],od)
df = pd.read_csv(file)
df.drop('Unnamed: 0', axis=1, inplace=True)
df['detected'] = True

# 1.5 only novel sss
temp = df.loc[(df.start_site_novelty=='novel')|(df.end_site_category=='novel')]

# only keep unique novel sjs
temp[['Chromosome', 'Strand', 'Start', 'End']] = temp.sj_id.str.split('_', expand=True)
temp = temp[['Chromosome', 'Strand', 'Start', 'End', 'sj_id', 
             'start_site_novelty', 'end_site_category', 'cell_line_id']].drop_duplicates()

temp.Start = temp.Start.astype(int)
temp.End = temp.End.astype(int)
assert len(temp.loc[temp.Start>temp.End])==0

# melt to 5' and 3'
temp = temp.melt(id_vars=['Chromosome', 'Strand', 'sj_id',
                          'start_site_novelty', 'end_site_category', 'cell_line_id'],
                 value_vars=['Start', 'End'])
temp['sj_loc'] = ''


temp.loc[temp.variable=='Start', 'sj_loc'] = 'start'
temp.loc[temp.variable=='End', 'sj_loc'] = 'end'

temp['novelty_category'] = ''
temp.loc[temp.variable=='Start', 'novelty_category'] = temp.loc[temp.variable=='Start', 'start_site_novelty']
temp.loc[temp.variable=='End', 'novelty_category'] = temp.loc[temp.variable=='End', 'end_site_category']

# only get the novel ones now
temp = temp.loc[temp.novelty_category=='novel']

temp.rename({'value':'Start'}, axis=1, inplace=True)
# need to verify that this is working using like one motif or something make
# sure I don't have off-by-one errors
# verified
temp['Start'] = temp.Start-2
temp['End'] = temp.Start+1

# verified
temp.loc[temp.sj_loc=='start', 'Start'] = temp.loc[temp.sj_loc=='start', 'Start']-0
temp.loc[temp.sj_loc=='start', 'End'] = temp.loc[temp.sj_loc=='start', 'End']+0


temp.loc[temp.sj_loc=='end', 'End'] = temp.loc[temp.sj_loc=='end', 'End']+2
temp.loc[temp.sj_loc=='end', 'Start'] = temp.loc[temp.sj_loc=='end', 'Start']+2


Unnamed: 0,sj_id,cell_line_id,hap1,hap2,hg38,n_novel_sj
67,GL000194.1_-_53893_55445,HG03729,False,True,False,15825
68,GL000194.1_-_53893_55445,HG04217,False,True,False,15175
73,GL000194.1_-_54956_55445,HG04217,False,True,False,15175
470,KI270726.1_+_26127_26229,HG02261,True,False,False,19818
515,chr10_+_100987611_100989643,NA19307,True,True,False,11472


In [None]:
# # 1.5 only novel sss
# temp = df.loc[(df.start_site_novelty=='novel')|(df.end_site_category=='novel')]

# # only keep unique novel sjs
# temp[['Chromosome', 'Strand', 'Start', 'End']] = temp.sj_id.str.split('_', expand=True)
# temp = temp[['Chromosome', 'Strand', 'Start', 'End', 'sj_id', 
#              'start_site_novelty', 'end_site_category', 'cell_line_id']].drop_duplicates()

# temp.Start = temp.Start.astype(int)
# temp.End = temp.End.astype(int)
# assert len(temp.loc[temp.Start>temp.End])==0

# # melt to 5' and 3'
# temp = temp.melt(id_vars=['Chromosome', 'Strand', 'sj_id',
#                           'start_site_novelty', 'end_site_category', 'cell_line_id'],
#                  value_vars=['Start', 'End'])
# temp['sj_loc'] = ''


# temp.loc[temp.variable=='Start', 'sj_loc'] = 'start'
# temp.loc[temp.variable=='End', 'sj_loc'] = 'end'

# temp['novelty_category'] = ''
# temp.loc[temp.variable=='Start', 'novelty_category'] = temp.loc[temp.variable=='Start', 'start_site_novelty']
# temp.loc[temp.variable=='End', 'novelty_category'] = temp.loc[temp.variable=='End', 'end_site_category']

# # only get the novel ones now
# temp = temp.loc[temp.novelty_category=='novel']

# temp.rename({'value':'Start'}, axis=1, inplace=True)
# # need to verify that this is working using like one motif or something make
# # sure I don't have off-by-one errors
# # verified
# temp['Start'] = temp.Start-2
# temp['End'] = temp.Start+1

# # verified
# temp.loc[temp.sj_loc=='start', 'Start'] = temp.loc[temp.sj_loc=='start', 'Start']-0
# temp.loc[temp.sj_loc=='start', 'End'] = temp.loc[temp.sj_loc=='start', 'End']+0


# temp.loc[temp.sj_loc=='end', 'End'] = temp.loc[temp.sj_loc=='end', 'End']+2
# temp.loc[temp.sj_loc=='end', 'Start'] = temp.loc[temp.sj_loc=='end', 'Start']+2
