In [4]:
import pysam
from pysam import VariantFile as vcf
import operator
from math import log2
import pandas as pd
from pandas import DataFrame as dataframe
import matplotlib.pyplot as plt
import numpy as np

from scipy.spatial.distance import pdist, squareform
import scipy
import  os
import os.path
import matplotlib.colors as mcolors
from scipy import stats
import csv
import requests
import json
from xml.etree import ElementTree as ET

In [5]:
import requests
from xml.etree import ElementTree as ET

def fetch_gene_info(chromosome, start, end):
    query_xml = f"""
    <Query virtualSchemaName="default" formatter="TSV" header="0" uniqueRows="1" count="" datasetConfigVersion="0.6">
        <Dataset name="hsapiens_gene_ensembl" interface="default">
            <Filter name="chromosome_name" value="{chromosome}"/>
            <Filter name="start" value="{start}"/>
            <Filter name="end" value="{end}"/>
            <Attribute name="ensembl_gene_id"/>
            <Attribute name="external_gene_name"/>
            <Attribute name="description"/>
            <Attribute name="start_position"/>
            <Attribute name="end_position"/>            
            <Attribute name="strand"/>
        </Dataset>
    </Query>
    """

    biomart_url = "http://www.ensembl.org/biomart/martservice?query="
    response = requests.get(biomart_url + query_xml.strip())

    if response.status_code != 200:
        raise Exception(f"Error fetching data from BioMart: {response.text}")

    genes = [line.split("\t") for line in response.text.strip().split("\n")]
    print(genes)
    return genes

# chrom = "17"
# start = 45620566
# end = 46418024

# genes = fetch_gene_info(chrom, start, end)
# print("Genes in the specified region:")
# for gene in genes:
#     print(gene)
    # gene_id, gene_name, gene_start, gene_end, gene_description, gene_strand = gene
    # strand = "forward" if gene_strand == "1" else "reverse"
    # print(f"Gene ID: {gene_id}, Gene Name: {gene_name}, Start: {gene_start}, End: {gene_end}, Strand: {strand}, Description: {gene_description}")

In [6]:
split_race_region={'ACB': {'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr7/xan': [[56467013, 56713542]],
  'split_chr11/xaw': [[103206633, 103451621]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr21/xaa': [[10605321, 10733724]]},
  'All':{'split_chr14_xaa': [[19806333, 19976578]],
 'split_chr7_xbc': [[124810550, 125070920]],
 'split_chr1_xak': [[45762038, 46100296]],
 'split_chr1_xbk': [[188243362, 188513984]]},
 'ASW': {'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr9/xak': [[42796452, 42969270]],
  'split_chr10/xaj': [[38491573, 38687800], [38725237, 38832458]],
  'split_chr11/xaf': [[23272931, 23495433]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr14/xak': [[66346187, 66752333]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'BEB': {'split_chr1/xbx': [[245939605, 246162003]],
  'split_chr2/xan': [[56405225, 56644106]],
  'split_chr3/xav': [[98046342, 98273693]],
  'split_chr3/xbf': [[145365797, 145612467]],
  'split_chr4/xaz': [[115247002, 115485969]],
  'split_chr13/xai': [[55102733, 55443491]],
  'split_chr14/xaa': [[19806333, 19976578]]},
 'CDX': {'split_chr1/xay': [[114816228, 115116610]],
  'split_chr2/xbd': [[130125757, 130339027]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr6/xao': [[64160994, 64523871]],
  'split_chr7/xaf': [[19734421, 19953255]],
  'split_chr9/xau': [[105652970, 105876427]],
  'split_chr10/xan': [[55664607, 55894887]],
  'split_chr12/xbc': [[131390693, 131619328]],
  'split_chr13/xai': [[55102733, 55354096]],
  'split_chr17/xag': [[31068982, 31384157]],
  'split_chr18/xan': [[65996302, 66233011]]},
 'CEU': {'split_chr1/xbe': [[158399387, 158641706]],
  'split_chr6/xac': [[8413699, 8649798]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr11/xaf': [[24934206, 25144712]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr17/xaf': [[21588491, 21823237]],
  'split_chr17/xaj': [[45620566, 46418024]]},
 'CHB': {'split_chr2/xbd': [[130125757, 130339027]],
  'split_chr4/xba': [[119293026, 119587470]],
  'split_chr4/xaz': [[115247002, 115485969]],
  'split_chr6/xao': [[64202650, 64523871]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr7/xaf': [[19775051, 19953255]],
  'split_chr10/xar': [[73060579, 73337762]],
  'split_chr11/xav': [[98072700, 98330585]],
  'split_chr22/xaa': [[16175432, 16345129]]},
 'CHS': {'split_chr3/xav': [[98046342, 98245327]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr6/xao': [[64202650, 64523871]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr8/xaz': [[109058766, 109345554]],
  'split_chr14/xat': [[105509278, 105695533]]},
 'CLM': {'split_chr1/xbd': [[152162701, 152471006]],
  'split_chr3/xav': [[95059735, 95322404], [98046342, 98273693]],
  'split_chr3/xbj': [[163862679, 164224214]],
  'split_chr4/xba': [[119293026, 119587470]],
  'split_chr6/xab': [[7345120, 7558507]],
  'split_chr6/xar': [[76169005, 76460170]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr12/xan': [[61013177, 61213757]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr14/xat': [[106520496, 106674883]]},
 'ESN': {'split_chr2/xau': [[89581761, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr5/xai': [[34311870, 34562837]],
  'split_chr6/xbf': [[145443018, 145762353]],
  'split_chr7/xap': [[67044394, 67267582]],
  'split_chr14/xan': [[80889782, 81152442]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'FIN': {'split_chr3/xbj': [[163862679, 164147845]],
  'split_chr4/xaw': [[97892649, 98142264]],
  'split_chr4/xbh': [[151414350, 151728888]],
  'split_chr6/xat': [[85153819, 85387984]],
  'split_chr6/xbf': [[145485121, 145762353]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr8/xaz': [[109010721, 109345554]],
  'split_chr9/xav': [[112161299, 112408537]],
  'split_chr11/xbb': [[124171937, 124450010]],
  'split_chr18/xac': [[12017790, 12244103]]},
 'GBR': {'split_chr1/xak': [[45628366, 46147211]],
  'split_chr2/xan': [[56405225, 56644106]],
  'split_chr2/xas': [[78325225, 78577282]],
  'split_chr3/xbf': [[145365797, 145612467]],
  'split_chr3/xbi': [[158122703, 158435520]],
  'split_chr4/xba': [[119243365, 119539168]],
  'split_chr6/xac': [[8413699, 8649798]],
  'split_chr7/xbc': [[124760998, 125070920]],
  'split_chr11/xbb': [[124171937, 124450010]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr17/xaf': [[21654230, 21823237]],
  'split_chr17/xaj': [[45587933, 46418024]]},
 'GIH': {'split_chr3/xav': [[98046342, 98276757]],
  'split_chr4/xbd': [[132580391, 132816794]],
  'split_chr6/xac': [[8413699, 8649798]],
  'split_chr11/xah': [[34650776, 34905225]],
  'split_chr14/xaa': [[19806333, 20015421]],
  'split_chr18/xac': [[12017790, 12244103]]},
 'GWD': {'split_chr1/xak': [[45803471, 46100296]],
  'split_chr2/xai': [[38264369, 38464894]],
  'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr21/xaa': [[10511846, 10763989]]},
 'IBS': {'split_chr3/xbf': [[145365797, 145612467]],
  'split_chr6/xbf': [[145485121, 145762353]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr12/xas': [[83579779, 83843102]],
  'split_chr17/xaf': [[21619971, 21823237]],
  'split_chr17/xaj': [[45836662, 46418024]],
  'split_chr18/xac': [[12017790, 12244103]],
  'split_chr22/xaa': [[16131708, 16345129]]},
 'ITU': {'split_chr5/xas': [[84862293, 85266031]],
  'split_chr10/xan': [[55664607, 55894887]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr14/xaa': [[19806333, 20047342]],
  'split_chr17/xak': [[52575391, 52889013]]},
 'JPT': {'split_chr3/xav': [[98046342, 98245327]],
  'split_chr5/xbc': [[131688028, 132052794]],
  'split_chr5/xaa': [[676694, 825167]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr13/xap': [[85638733, 85884384]],
  'split_chr14/xad': [[31681845, 31909184]],
  'split_chr22/xaa': [[16175432, 16345129]]},
 'KHV': {'split_chr2/xbq': [[192771748, 193054217]],
  'split_chr3/xav': [[98046342, 98245327]],
  'split_chr3/xbj': [[163946649, 164185047]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr14/xak': [[66298204, 66793088]]},
 'LWK': {'split_chr2/xai': [[38235897, 38464894]],
  'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr14/xak': [[66346187, 66752333]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'MSL': {'split_chr1/xak': [[45803471, 46100296]],
  'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xay': [[113365150, 113639918]],
  'split_chr7/xaf': [[19775051, 19953255]],
  'split_chr7/xap': [[67044394, 67267582]],
  'split_chr9/xak': [[42796452, 42969270]],
  'split_chr11/xaf': [[23272931, 23495433]],
  'split_chr11/xaw': [[103206633, 103451621]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr14/xat': [[105569851, 105695533]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'MXL': {'split_chr3/xav': [[98046342, 98217489]],
  'split_chr3/xaq': [[74901608, 75192541]],
  'split_chr5/xad': [[11999758, 12263260]],
  'split_chr6/xao': [[64160994, 64404669]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr11/xaf': [[23200593, 23461651]],
  'split_chr12/xad': [[14846240, 15093187]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr12/xap': [[73338587, 73586502]],
  'split_chr12/xas': [[83579779, 83843102]],
  'split_chr17/xaf': [[21654230, 21823237]]},
 'PEL': {'split_chr1/xak': [[45628366, 46147211]],
  'split_chr2/xbo': [[186062200, 186331668]],
  'split_chr3/xav': [[98046342, 98217489]],
  'split_chr3/xba': [[120872663, 121116904]],
  'split_chr4/xaq': [[69225334, 69589122]],
  'split_chr6/xab': [[7345120, 7695065]],
  'split_chr8/xba': [[112137591, 112486758]],
  'split_chr10/xaq': [[68153912, 68481883]],
  'split_chr11/xaf': [[23168664, 23461651]],
  'split_chr11/xam': [[58630320, 58868016]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr12/xas': [[83579779, 83843102]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr16/xab': [[5659954, 5828695]]},
 'PJL': {'split_chr12/xac': [[10962097, 11162840]],
  'split_chr14/xac': [[28519266, 28761965]],
  'split_chr14/xaa': [[19806333, 20078222]],
  'split_chr17/xak': [[52575391, 52861780]]},
 'PUR': {'split_chr3/xav': [[98003499, 98273693]],
  'split_chr3/xbj': [[163819938, 164063747]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr12/xan': [[61013177, 61213757]],
  'split_chr12/xas': [[83689105, 83928389]],
  'split_chr17/xaf': [[21654230, 21823237]]},
 'STU': {'split_chr3/xav': [[98046342, 98273693]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr13/xai': [[55102733, 55443491]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr16/xaj': [[48512974, 48799209]],
  'split_chr19/xam': [[51801039, 52098595]]},
 'TSI': {'split_chr4/xaw': [[97892649, 98189302]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr7/xag': [[23951217, 24204307]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr9/xai': [[31957257, 32172669]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr17/xaf': [[21654230, 21823237]],
  'split_chr17/xaj': [[45836662, 46544765]]},
 'YRI': {'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr7/xan': [[56467013, 56713542]],
  'split_chr7/xap': [[67044394, 67267582]],
  'split_chr9/xak': [[42796452, 42969270]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr16/xao': [[72288822, 72684953]],
  'split_chr21/xaa': [[10605321, 10733724]]}}

In [7]:
def generate_gene_info(gene_list_one_record,chrnum):
    result=[]
    result.append(gene_list_one_record[0])
    result.append(gene_list_one_record[1])
    s=gene_list_one_record[3]
    e=gene_list_one_record[4]
    
    pos="chr"+chrnum+":"+str(s)+"-"+str(e)
    result.append(pos)
    result.append(gene_list_one_record[5])
    result.append(gene_list_one_record[2])
    
    return result

In [8]:
import re

def extract_numbers(input_string):
    return re.findall(r'\d+', input_string)

In [9]:
def ifgeneinside_region(complementary_region,gene_region):
    
    s=int(gene_region[0])
    e=int(gene_region[1])
     
    if (complementary_region[0]<=s) and complementary_region[1]>=e:
        return True
    else:
        return False

In [10]:

allrecords=[]

#non_mhc
for race,split_ in split_race_region.items():
    if race!="All":
        for chr,arr in split_.items():
            chr=chr.split("/")[0].split("_")[1]
            chr=extract_numbers(chr)[0]
            print(chr)
            for a in arr:
                gene_list=fetch_gene_info(chromosome=chr,start=a[0]-1,end=a[1]-1)
                if gene_list!=[['']]:
                    for gene_list_record in gene_list:
                        if ifgeneinside_region(complementary_region=a,gene_region=[gene_list_record[3],gene_list_record[4]]):
                            onerecord=[race,0]
                            onerecord+=["chr"+chr+":"+str(a[0])+"-"+str(a[1])]
                            
                            onerecord+=generate_gene_info(gene_list_one_record=gene_list_record,chrnum=chr)
                            allrecords.append(onerecord)
                        else:
                            continue
                
    else:
        for chr,arr in split_.items():
            chr=chr.split("_")[1]
            chr=extract_numbers(chr)[0]
            for a in arr:
                gene_list=fetch_gene_info(chromosome=chr,start=a[0]-1,end=a[1]-1)
                if gene_list!=[['']]:
                    for gene_list_record in gene_list:
                        if ifgeneinside_region(complementary_region=a,gene_region=[gene_list_record[3],gene_list_record[4]]):
                            onerecord=["All",0]
                            onerecord+=["chr"+chr+":"+str(a[0])+"-"+str(a[1])]
                            
                            onerecord+=generate_gene_info(gene_list_one_record=gene_list_record,chrnum=chr)
                            allrecords.append(onerecord)
                        else:
                            continue

2
[['ENSG00000251039', 'IGKV2D-40', 'immunoglobulin kappa variable 2D-40 [Source:HGNC Symbol;Acc:HGNC:5804]', '89851791', '89852493', '1']]
5
[['ENSG00000171368', 'TPPP', 'tubulin polymerization promoting protein [Source:HGNC Symbol;Acc:HGNC:24164]', '659862', '693352', '-1'], ['ENSG00000289088', '', 'novel transcript', '693493', '694831', '1'], ['ENSG00000206077', 'ZDHHC11B', 'zinc finger DHHC-type containing 11B [Source:HGNC Symbol;Acc:HGNC:32962]', '710355', '784729', '-1'], ['ENSG00000249908', 'BRD9P2', 'bromodomain containing 9 pseudogene 2 [Source:HGNC Symbol;Acc:HGNC:51446]', '767382', '768930', '-1'], ['ENSG00000285522', '', 'tubulin polymerization promoting protein (TPPP) pseudogene', '770005', '770113', '-1'], ['ENSG00000288930', '', 'novel transcript', '784825', '786286', '1'], ['ENSG00000188818', 'ZDHHC11', 'zinc finger DHHC-type containing 11 [Source:HGNC Symbol;Acc:HGNC:19158]', '795605', '858973', '-1'], ['ENSG00000247872', 'SPCS2P3', 'signal peptidase complex subunit 2 

In [11]:

mhc_race_positions_dict={'ACB': {'mhc': [[32589647, 32805573]]},
'All':{'mhc':[[32453110, 32577355], [32589647, 32716541]]},
 'ASW': {'mhc': [[29753369, 29913914],
   [29939668, 30120966],
   [32627859, 32843772]]},
 'BEB': {},
 'CDX': {'mhc': [[31295439, 31432528]]},
 'CEU': {'mhc': [[32627859, 32776791]]},
 'CHB': {},
 'CHS': {'mhc': [[32589647, 32716541]]},
 'CLM': {'mhc': [[32453110, 32577355], [32589647, 32732048]]},
 'ESN': {'mhc': [[29720403, 29896285], [32644320, 32843772]]},
 'FIN': {},
 'GBR': {'mhc': [[32453110, 32577355]]},
 'GIH': {'mhc': [[32589647, 32698571]]},
 'GWD': {'mhc': [[29720403, 29913914],
   [29939668, 30085606],
   [32627859, 32732048]]},
 'IBS': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'ITU': {'mhc': [[32423532, 32554290]]},
 'JPT': {'mhc': [[32589647, 32732048]]},
 'KHV': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'LWK': {'mhc': [[32627859, 32805573]]},
 'MSL': {'mhc': [[29720403, 29913914],
   [31317765, 31528792],
   [32554291, 32776791]]},
 'MXL': {'mhc': [[32589647, 32716541]]},
 'PEL': {'mhc': [[32473902, 32616414]]},
 'PJL': {},
 'PUR': {'mhc': [[32589647, 32683157]]},
 'STU': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'TSI': {'mhc': [[32589647, 32698571]]},
 'YRI': {'mhc': [[29720403, 29913914],
   [29939668, 30120966],
   [32589647, 32882258]]}}

In [12]:
len(allrecords)

1341

In [13]:

for race, split_ in mhc_race_positions_dict.items():
    if split_!={}:
        if race!="All":
            for chr,arr in split_.items():
                chr="6" 
                     
                for a in arr:
                    gene_list=fetch_gene_info(chromosome=chr,start=a[0]-1,end=a[1]-1)
                    if gene_list!=[['']]:
                        for gene_list_record in gene_list:
                            if ifgeneinside_region(complementary_region=a,gene_region=[gene_list_record[3],gene_list_record[4]]):
                                onerecord=[race,1]
                                onerecord+=["chr"+chr+":"+str(a[0])+"-"+str(a[1])]
                                
                                onerecord+=generate_gene_info(gene_list_one_record=gene_list_record,chrnum="6")
                                allrecords.append(onerecord)   
                            else:
                                continue                
        else:
            for chr,arr in split_.items():
                chr="6"
                for a in arr:
                    gene_list=fetch_gene_info(chromosome=chr,start=a[0]-1,end=a[1]-1)
                    if gene_list!=[['']]:
                        for gene_list_record in gene_list:
                            if ifgeneinside_region(complementary_region=a,gene_region=[gene_list_record[3],gene_list_record[4]]):
                                onerecord=[race,1]
                                onerecord+=["chr"+chr+":"+str(a[0])+"-"+str(a[1])]
                                
                                onerecord+=generate_gene_info(gene_list_one_record=gene_list_record,chrnum="6")
                                allrecords.append(onerecord)    
                            else:
                                continue

[['ENSG00000196126', 'HLA-DRB1', 'major histocompatibility complex, class II, DR beta 1 [Source:HGNC Symbol;Acc:HGNC:4948]', '32577902', '32589848', '-1'], ['ENSG00000196735', 'HLA-DQA1', 'major histocompatibility complex, class II, DQ alpha 1 [Source:HGNC Symbol;Acc:HGNC:4942]', '32628179', '32647062', '1'], ['ENSG00000179344', 'HLA-DQB1', 'major histocompatibility complex, class II, DQ beta 1 [Source:HGNC Symbol;Acc:HGNC:4944]', '32659467', '32668383', '-1'], ['ENSG00000223534', 'HLA-DQB1-AS1', 'HLA-DQB1 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:39762]', '32659880', '32660729', '1'], ['ENSG00000235040', 'MTCO3P1', 'MT-CO3 pseudogene 1 [Source:HGNC Symbol;Acc:HGNC:31342]', '32706124', '32706955', '-1'], ['ENSG00000232080', '', 'novel transcript', '32718005', '32719170', '1'], ['ENSG00000226030', 'HLA-DQB3', 'major histocompatibility complex, class II, DQ beta 3 [Source:HGNC Symbol;Acc:HGNC:4946]', '32730758', '32731695', '-1'], ['ENSG00000237541', 'HLA-DQA2', 'major histocompatibil

In [14]:
len(allrecords)

1723

In [15]:
#generatedf
def generatedf(columns,allrecords):
    dictforDF=dict()
    for i in range(len(columns)):
        midarr=[]
        for record in allrecords:           
            midarr.append(record[i])
        dictforDF[columns[i]]=midarr
    #dataframe(dictforDF).to_csv("csv2_0721_test.csv")
    return dataframe(dictforDF)


In [16]:
dfnew=generatedf(columns=["Race","isMHC","Position of disassortative mating region","Gene ID","Gene name","Position of gene","is_complement","Gene description"],allrecords=allrecords)

In [17]:
dfnew['is_complement'] = dfnew['is_complement'].apply(lambda x:  "F" if x == "1" else "R")
# dfnew.to_csv("csv2_05_11new.csv")

In [18]:
dfnew

Unnamed: 0,Race,isMHC,Position of disassortative mating region,Gene ID,Gene name,Position of gene,is_complement,Gene description
0,ACB,0,chr2:89762579-89852967,ENSG00000251039,IGKV2D-40,chr2:89851791-89852493,F,immunoglobulin kappa variable 2D-40 [Source:HG...
1,ACB,0,chr5:676694-846549,ENSG00000289088,,chr5:693493-694831,F,novel transcript
2,ACB,0,chr5:676694-846549,ENSG00000206077,ZDHHC11B,chr5:710355-784729,R,zinc finger DHHC-type containing 11B [Source:H...
3,ACB,0,chr5:676694-846549,ENSG00000249908,BRD9P2,chr5:767382-768930,R,bromodomain containing 9 pseudogene 2 [Source:...
4,ACB,0,chr5:676694-846549,ENSG00000285522,,chr5:770005-770113,R,tubulin polymerization promoting protein (TPPP...
...,...,...,...,...,...,...,...,...
1718,YRI,1,chr6:32589647-32882258,ENSG00000204264,PSMB8,chr6:32840717-32844679,R,proteasome 20S subunit beta 8 [Source:HGNC Sym...
1719,YRI,1,chr6:32589647-32882258,ENSG00000204261,PSMB8-AS1,chr6:32844078-32846500,F,PSMB8 antisense RNA 1 (head to head) [Source:H...
1720,YRI,1,chr6:32589647-32882258,ENSG00000240065,PSMB9,chr6:32844136-32859851,F,proteasome 20S subunit beta 9 [Source:HGNC Sym...
1721,YRI,1,chr6:32589647-32882258,ENSG00000168394,TAP1,chr6:32845209-32853816,R,"transporter 1, ATP binding cassette subfamily ..."


In [19]:
dfnew.to_csv("csv2_0722.csv")