In [2]:
import os, glob
import pandas as pd
from io import StringIO
import pyBigWig
import subprocess
import pybedtools 
pybedtools.helpers.set_tempdir('/data/projects/temp')

In [3]:
enhancer_file_path = "/data/projects/Enhancer/RECOMB_2024/Enhancer_BED.tsv"

In [4]:
df_enhancer = pd.read_csv(enhancer_file_path, sep="\t")
df_enhancer

Unnamed: 0,chromosome_name,enhancer_start,enhancer_end
0,chr6,167157937,167158136
1,chr8,140206853,140207052
2,chr5,170638695,170638894
3,chr1,63043547,63043746
4,chr19,19238266,19238465
...,...,...,...
7774,chr1,204552002,204552201
7775,chr9,38725772,38725971
7776,chr7,27950006,27950205
7777,chr2,171047833,171048032


## For Core Promoter Region

In [5]:
df_CoreProm = pd.read_csv("/data/private/pdutta/DNABERT_data/Core_promoters/Core_Prom_bed.tsv", sep="\t")

In [6]:
df_CoreProm

Unnamed: 0,Chr,gene_id,transcript_id,exon_id,strand,TSS,start,end
0,chr7,ENSG00000004059.11,ENST00000000233.10,ENSE00001872691.2,+,127588411,127588366,127588455
1,chr7,ENSG00000001626.16,ENST00000003084.11,ENSE00001343851.2,+,117480025,117479980,117480069
2,chr7,ENSG00000001630.17,ENST00000003100.13,ENSE00001216550.10,-,92134477,92134433,92134522
3,chr7,ENSG00000002933.9,ENST00000004103.8,ENSE00001827596.2,+,150800769,150800724,150800813
4,chr7,ENSG00000004799.8,ENST00000005178.6,ENSE00001133344.6,-,95596516,95596472,95596561
...,...,...,...,...,...,...,...,...
236970,chr15,ENSG00000103876.14,ENST00000684569.1,ENSE00003921718.1,+,80153010,80152965,80153054
236971,chr15,ENSG00000213614.11,ENST00000684602.1,ENSE00002620879.1,-,72375989,72375945,72376034
236972,chr15,ENSG00000213614.11,ENST00000684667.1,ENSE00002584031.1,-,72375972,72375928,72376017
236973,chr15,ENSG00000104133.16,ENST00000684676.1,ENSE00003922470.1,-,44663688,44663644,44663733


In [8]:
str_io_CoreProm = StringIO()
df_CoreProm[['Chr','start','end','transcript_id','strand']].to_csv(str_io_CoreProm, sep='\t', index=False, header=False)
str_io_CoreProm.seek(0)
CoreProm_bed = pybedtools.BedTool(str_io_CoreProm.read(), from_string=True)

In [9]:
str_io_enhancer = StringIO()
df_enhancer[['chromosome_name','enhancer_start','enhancer_end']].to_csv(str_io_enhancer, sep='\t', index=False, header=False)
str_io_enhancer.seek(0)
enhancer_bed = pybedtools.BedTool(str_io_enhancer.read(), from_string=True)

In [10]:
# intersect the files
intersect_bed = CoreProm_bed.intersect(enhancer_bed, wa=True, wb=True)

In [11]:
for i, interval in enumerate(intersect_bed):
    if i < 5:
        print(interval)
    else:
        break

chr7	6004341	6004430	ENST00000365120.1	-	chr7	6004337	6004536

chr7	77545707	77545796	ENST00000418110.5	+	chr7	77545645	77545844

chr7	42113320	42113409	ENST00000423237.1	+	chr7	42113334	42113533

chr7	18949718	18949807	ENST00000452663.1	-	chr7	18949599	18949798

chr7	73543849	73543938	ENST00000481667.1	-	chr7	73543761	73543960



In [12]:
intersect_df = intersect_bed.to_dataframe()

In [14]:
intersect_df

Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd
0,chr7,6004341,6004430,ENST00000365120.1,-,chr7,6004337,6004536
1,chr7,77545707,77545796,ENST00000418110.5,+,chr7,77545645,77545844
2,chr7,42113320,42113409,ENST00000423237.1,+,chr7,42113334,42113533
3,chr7,18949718,18949807,ENST00000452663.1,-,chr7,18949599,18949798
4,chr7,73543849,73543938,ENST00000481667.1,-,chr7,73543761,73543960
...,...,...,...,...,...,...,...,...
143,chr18,10681684,10681773,ENST00000579151.1,-,chr18,10681696,10681895
144,chr15,36895258,36895347,ENST00000557992.1,-,chr15,36895234,36895433
145,chr15,44575228,44575317,ENST00000558080.1,-,chr15,44575245,44575444
146,chr15,36895243,36895332,ENST00000559972.1,-,chr15,36895234,36895433


In [15]:
intersect_df.to_csv("/data/projects/Enhancer/RECOMB_2024/Intersected_data/CoreProm_Enhancer_intersect.tsv", sep='\t', index=False)
#output_df.to_csv("/data/private/pdutta/DNABERT_data/Splice_sites/Acceptor_raw_files/PART_Acceptor_Transcript_intersect_details.tsv", sep='\t', index=False)

In [16]:
chr_wise_DBSNP_files = glob.glob('/data/projects/VCF_files/DBSNP/ChrWise_Data/*.csv')

In [17]:
for dbsnp_file in chr_wise_DBSNP_files:
    # Read the csv file into a DataFrame
    print(dbsnp_file)
    df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])
    df = df.iloc[1:]
    print(df.head())
    
    # Convert the DataFrame to a BedTool
    dbsnp = pybedtools.BedTool.from_dataframe(df)
    
    # Intersect the BedTool with the transcript file
    intersected = intersect_bed.intersect(dbsnp, wa=True, wb=True)
    
    # Save the intersected BedTool to a bed file
    output_file = "/data/projects/Enhancer/RECOMB_2024/Intersected_data/CoreProm/Chrwise/intersected_DBSNP_transcript_" + dbsnp_file.split('/')[-1].replace('.csv', '.tsv')
    intersected.saveas(output_file)

/data/projects/VCF_files/DBSNP/ChrWise_Data/chr7_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5   6
1  chr7  10001  10002  rs1583156258  T  1  C,
2  chr7  10002  10003  rs1583156262  A  1  G,
3  chr7  10003  10004  rs1583156265  A  1  C,
4  chr7  10004  10005  rs1583156268  C  1  A,
5  chr7  10006  10007  rs1781584841  C  1  T,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr13_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0         1         2             3  4  5   6
1  chr13  16000025  16000026  rs1175737345  T  1  A,
2  chr13  16000063  16000064  rs1957610123  G  1  A,
3  chr13  16000079  16000080  rs1957610132  G  1  T,
4  chr13  16000081  16000082  rs1957610136  A  1  C,
5  chr13  16000100  16000101  rs1957610144  T  1  C,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chrY_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5     6
1  chrY  10001  10002  rs1226858834  T  1    A,
2  chrY  10002  10003   rs375039031  A  2  C,G,
3  chrY  10006  10007  rs1422184628  C  1    G,
4  chrY  10007  10008  rs1179917603  T  1    G,
5  chrY  10008  10009   rs565284081  A  2  C,G,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chrX_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5     6
1  chrX  10001  10002  rs1226858834  T  1    A,
2  chrX  10002  10003   rs375039031  A  2  C,G,
3  chrX  10006  10007  rs1422184628  C  1    G,
4  chrX  10007  10008  rs1179917603  T  1    G,
5  chrX  10008  10009   rs565284081  A  2  C,G,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr15_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0         1         2             3  4  5     6
1  chr15  17000014  17000015  rs1566750235  A  1   AA,
2  chr15  17000025  17000026  rs2044609058  T  1    G,
3  chr15  17000028  17000029  rs1464642334  G  1    T,
4  chr15  17000037  17000038  rs1427739243  T  2  A,C,
5  chr15  17000048  17000049  rs2044609081  T  1    C,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr6_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5   6
1  chr6  61733  61734  rs1761792635  C  1  T,
2  chr6  61734  61735  rs1357314184  T  1  A,
3  chr6  61753  61754  rs1761792694  G  1  A,
4  chr6  61754  61755  rs1269839351  G  1  T,
5  chr6  61761  61762  rs1761792734  T  1  A,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr19_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5   6
1  chr19  60061  60062  rs1555674440  G  1  C,
2  chr19  60172  60173  rs1371922052  G  1  A,
3  chr19  60222  60223  rs1187548881  A  1  G,
4  chr19  60250  60251  rs1310995734  G  1  A,
5  chr19  60318  60319  rs1244952011  C  1  T,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr1_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5       6
1  chr1  10000  10001  rs1570391677  T  2    A,C,
2  chr1  10001  10002  rs1570391692  A  1      C,
3  chr1  10002  10003  rs1570391694  A  1      C,
4  chr1  10006  10007  rs1639538116  T  2    C,G,
5  chr1  10007  10008  rs1570391698  A  3  C,G,T,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr5_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5     6
1  chr5  10017  10018  rs1658455299  C  1    A,
2  chr5  10020  10021  rs1742866267  A  1    G,
3  chr5  10021  10022  rs1742866288  C  1    A,
4  chr5  10022  10023  rs1742866315  C  2  A,G,
5  chr5  10023  10024  rs1742866334  C  1    A,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr12_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5   6
1  chr12  10009  10010  rs1591589648  A  1  G,
2  chr12  10015  10016  rs1862308353  A  1  G,
3  chr12  10016  10017  rs1591589651  C  1  A,
4  chr12  10019  10020  rs1591589652  T  1  G,
5  chr12  10020  10021  rs1591589654  A  1  C,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr10_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5     6
1  chr10  10022  10023  rs1243990134  C  1    A,
2  chr10  10030  10031  rs1834135687  C  2  A,G,
3  chr10  10033  10034  rs1834135707  A  1    G,
4  chr10  10035  10036  rs1834135720  C  2  A,T,
5  chr10  10040  10041  rs1834135754  C  1    A,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr9_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5     6
1  chr9  10006  10007  rs1449034754  T  1    C,
2  chr9  10006  10007  rs1815666481  T  1     ,
3  chr9  10008  10009  rs1587255763  A  2  C,G,
4  chr9  10012  10013  rs1265381183  T  2  C,G,
5  chr9  10013  10014  rs1587255772  A  1    C,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chrM_data.csv
      0   1   2             3  4  5   6
1  chrM   9  10   rs879233578  T  1  C,
2  chrM  15  16  rs1556422363  A  1  T,
3  chrM  25  26   rs879071945  C  1  T,
4  chrM  40  41   rs879100564  C  1  T,
5  chrM  41  42  rs1603218266  T  1  C,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr8_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3  4  5   6
1  chr8  60008  60009  rs1584847528  G  1  T,
2  chr8  60021  60022  rs1800303754  A  1  C,
3  chr8  60023  60024  rs1584847530  A  1  C,
4  chr8  60024  60025  rs1584847532  C  1  A,
5  chr8  60048  60049  rs1800303798  A  1  G,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr21_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0        1        2             3  4  5   6
1  chr21  5025531  5025532  rs1879593094  G  1  C,
2  chr21  5030087  5030088  rs1455320509  C  1  T,
3  chr21  5030104  5030105  rs1173141359  C  1  A,
4  chr21  5030150  5030151  rs1601770018  T  1  G,
5  chr21  5030153  5030154  rs1461284410  T  1  C,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr20_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5   6
1  chr20  60001  60002   rs555848207  G  1  C,
2  chr20  60008  60009  rs1600128456  C  1  A,
3  chr20  60009  60010  rs1340966003  G  1  A,
4  chr20  60017  60018  rs2054408755  A  1  C,
5  chr20  60021  60022  rs1178632636  G  1  A,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr11_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5   6
1  chr11  60849  60850  rs1590036156  C  1  T,
2  chr11  60993  60994  rs1452162522  T  1  C,
3  chr11  61117  61118   rs867959887  A  1  C,
4  chr11  61118  61119  rs1187622598  A  1  G,
5  chr11  61154  61155   rs899119276  A  1  G,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr22_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0         1         2             3  4  5   6
1  chr22  10510050  10510051  rs2061858963  T  1  C,
2  chr22  10510054  10510055  rs2061858972  A  1  G,
3  chr22  10510076  10510077  rs1290354662  C  1  A,
4  chr22  10510090  10510091  rs2061858975  T  1  A,
5  chr22  10510104  10510105  rs1325858619  T  1  A,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr18_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5     6
1  chr18  10001  10002  rs1600555225  C  1    T,
2  chr18  10002  10003  rs1905036575  C  1    T,
3  chr18  10003  10004  rs1600555228  T  1    C,
4  chr18  10004  10005   rs111219353  A  2  C,T,
5  chr18  10005  10006  rs1600555234  A  2  C,G,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr14_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0         1         2             3          4  5    6
1  chr14  16022653  16022654  rs2089043555          T  1   C,
2  chr14  16022660  16022661  rs1391007058          C  1   T,
3  chr14  16022660  16022663  rs1273995405        CCC  1  CC,
4  chr14  16022662  16022671  rs2089043582  CGTGTGTGT  1    ,
5  chr14  16022663  16022664  rs2089043587          G  1   A,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr16_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5     6
1  chr16  10000  10001  rs1896969576  T  1    C,
2  chr16  10001  10002  rs1596443048  A  1    T,
3  chr16  10003  10004  rs1596443052  C  1    A,
4  chr16  10005  10006  rs1167029723  C  1    G,
5  chr16  10006  10007  rs1416464287  T  2  A,C,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr4_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3    4  5      6
1  chr4  10000  10001  rs1581341342    A  1     C,
2  chr4  10001  10001  rs1560073237  NaN  1     G,
3  chr4  10001  10002  rs1581341346    C  1     A,
4  chr4  10001  10004  rs1560073246  CCC  1  CCCC,
5  chr4  10003  10004  rs1581341351    C  1     T,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr3_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3   4  5       6
1  chr3  10008  10009  rs1339898020   A  3  C,G,T,
2  chr3  10009  10010  rs1695495101   A  1      C,
3  chr3  10010  10011  rs1695495136   C  1      A,
4  chr3  10014  10015  rs1206436362   A  1      C,
5  chr3  10014  10016  rs1695495167  AA  1    AAA,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr17_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


       0      1      2             3  4  5   6
1  chr17  60054  60055  rs2039333033  G  1  A,
2  chr17  60086  60087  rs2039333047  C  1  T,
3  chr17  60102  60103  rs1399099657  G  1  A,
4  chr17  60103  60104  rs1363626035  C  1  T,
5  chr17  60149  60150  rs2039333080  C  1  A,
/data/projects/VCF_files/DBSNP/ChrWise_Data/chr2_data.csv


  df = pd.read_csv(dbsnp_file, header=None, sep= ',', usecols=[0, 1, 2, 3, 4, 5,6])


      0      1      2             3        4  5         6
1  chr2  10006  10007  rs1572047073        C  1        A,
2  chr2  10015  10015  rs1558169263      NaN  1        G,
3  chr2  10016  10023  rs1558169385  CACCCAC  1      CAC,
4  chr2  10018  10021  rs1558169386      CCC  1       CC,
5  chr2  10019  10019  rs1558169388      NaN  2  ACA,ACG,


In [18]:
tsv_files = [f for f in os.listdir("/data/projects/Enhancer/RECOMB_2024/Intersected_data/CoreProm/Chrwise") if f.endswith('.tsv') and "chrM" not in f]

In [19]:
tsv_files

['intersected_DBSNP_transcript_chr2_data.tsv',
 'intersected_DBSNP_transcript_chr13_data.tsv',
 'intersected_DBSNP_transcript_chrY_data.tsv',
 'intersected_DBSNP_transcript_chr19_data.tsv',
 'intersected_DBSNP_transcript_chr21_data.tsv',
 'intersected_DBSNP_transcript_chr7_data.tsv',
 'intersected_DBSNP_transcript_chr12_data.tsv',
 'intersected_DBSNP_transcript_chr9_data.tsv',
 'intersected_DBSNP_transcript_chr6_data.tsv',
 'intersected_DBSNP_transcript_chr3_data.tsv',
 'intersected_DBSNP_transcript_chr4_data.tsv',
 'intersected_DBSNP_transcript_chr11_data.tsv',
 'intersected_DBSNP_transcript_chr15_data.tsv',
 'intersected_DBSNP_transcript_chr22_data.tsv',
 'intersected_DBSNP_transcript_chr18_data.tsv',
 'intersected_DBSNP_transcript_chr16_data.tsv',
 'intersected_DBSNP_transcript_chr10_data.tsv',
 'intersected_DBSNP_transcript_chr1_data.tsv',
 'intersected_DBSNP_transcript_chrX_data.tsv',
 'intersected_DBSNP_transcript_chr14_data.tsv',
 'intersected_DBSNP_transcript_chr17_data.tsv',
 

In [20]:
# Read each .tsv file into a pandas DataFrame and store them in a list
list_of_dfs = []

for file in tsv_files:
    file_path = os.path.join("/data/projects/Enhancer/RECOMB_2024/Intersected_data/CoreProm/Chrwise", file)
    try:
        # Attempt to read the .tsv file
        df = pd.read_csv(file_path, sep='\t', header=None)
        print(df.shape)
        list_of_dfs.append(df)
    except pd.errors.EmptyDataError:
        print(f"Warning: {file_path} is empty and will be skipped.")

(171, 15)
(106, 15)
(325, 15)
(366, 15)
(120, 15)
(238, 15)
(195, 15)
(30, 15)
(440, 15)
(604, 15)
(123, 15)
(139, 15)
(45, 15)
(128, 15)
(38, 15)
(519, 15)
(108, 15)
(286, 15)
(246, 15)
(541, 15)
(35, 15)
(366, 15)


In [21]:
# Concatenate all the DataFrames in the list into a single DataFrame
final_df = pd.concat(list_of_dfs, ignore_index=True) if list_of_dfs else pd.DataFrame()
final_df=final_df.drop(columns=[5, 8])
new_columns = ['chr_name', 'CoreProm_start', 'CoreProm_end', 'Transcript_id', 'Strand', 'Enhancer_start', 'Enhancer_end', 'Variant_start', 'Variant_end', 'RS_ID', 'Reference_Nucleotide', 'No_of_variations', 'Alternative_Nucleotides']
final_df.columns = new_columns
final_df

Unnamed: 0,chr_name,CoreProm_start,CoreProm_end,Transcript_id,Strand,Enhancer_start,Enhancer_end,Variant_start,Variant_end,RS_ID,Reference_Nucleotide,No_of_variations,Alternative_Nucleotides
0,chr2,178702476,178702565,ENST00000414766.5,-,178702385,178702584,178702478,178702479,rs878862911,G,2,"A,T,"
1,chr2,178702476,178702565,ENST00000414766.5,-,178702385,178702584,178702482,178702483,rs138493804,G,1,"A,"
2,chr2,178702476,178702565,ENST00000414766.5,-,178702385,178702584,178702483,178702484,rs1189632210,T,1,"G,"
3,chr2,178702476,178702565,ENST00000414766.5,-,178702385,178702584,178702485,178702486,rs755668668,A,1,"G,"
4,chr2,178702476,178702565,ENST00000414766.5,-,178702385,178702584,178702487,178702488,rs1052276714,A,1,"C,"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5164,chr5,45226241,45226330,ENST00000671050.1,+,45226090,45226289,45226322,45226323,rs1209221494,T,1,"G,"
5165,chr5,45226241,45226330,ENST00000671050.1,+,45226090,45226289,45226324,45226325,rs1339528998,G,1,"C,"
5166,chr5,45226241,45226330,ENST00000671050.1,+,45226090,45226289,45226325,45226326,rs1743986238,T,1,"C,"
5167,chr5,45226241,45226330,ENST00000671050.1,+,45226090,45226289,45226327,45226328,rs1743986258,C,2,"A,G,"


In [23]:
# Write the final DataFrame to a new .tsv file (only if there's data)
if not final_df.empty:
    output_file_path = "/data/projects/Enhancer/RECOMB_2024/Intersected_data/CoreProm/all_data.tsv"
    final_df.to_csv(output_file_path, sep='\t', index=False)
    print(f"All .tsv files (excluding chrM) concatenated into {output_file_path}")
else:
    print("No data to save after excluding empty files and chrM.")

All .tsv files (excluding chrM) concatenated into /data/projects/Enhancer/RECOMB_2024/Intersected_data/CoreProm/all_data.tsv
