In [3]:
import math
import numpy as np
import os
import pandas as pd
import random
import shutil
import sys
import gzip

def PROGRESS(msg, printit=True):
    if printit: # false for some messages when not in debug mode
        sys.stderr.write("%s\n"%msg.strip())
        
def MakeZScoreTable(vals):
    try:
        Zscore=vals['beta']/vals['beta.se']
    except:
        Zscore=None
    return Zscore

def WriteCorrTable(indexed_genotypes):
    """ generate correlation table using normalized genotype
      _1_ ... _n_
    1| 1  ... C1n
    .|    ...
    n|Cn1 ... Cnn=1
    """
    G=indexed_genotypes.transpose()
    variants = list(G.columns)
    CMat=[]
    print ('\t\t\t**', len(variants))
    for V1 in variants:
        COV=[]
        for V2 in variants:
            X=G[V1].replace('None', np.nan).astype(float)
            Y=G[V2].replace('None', np.nan).astype(float)
            #COV.append(X.corr(Y))
            if X.corr(Y) is np.nan:    #### 
                COV.append(0.0) #For missing LD we assume non linear corr (undetermined LD)
            else:
                COV.append(X.corr(Y))
        CMat.append(COV)
    return pd.DataFrame(CMat,columns=variants, index=variants) 

def lookfor (x,p):
    for i in range(1,len(p.index)):
        if x in p.values[i][0]:
            top = p.values[i][0]
            score = p.values[i][2]
            return i,top, score
        
        
TISSUE='Adipose-Subcutaneous/'
CHROM = 1
DISTFROMGENE = 100000
DEBUG =True

PATH = '/storage/szfeupe/Runs/650GTEx_estr/Analysis_by_Tissue/%s'%TISSUE
EXPRFILE = PATH+'Corr_Expr.csv'
EXPRANNOTFILE = '/storage/resources/dbase/human/hg19/gencode_gene_annotations_hg19.csv'
if "chr" not in str(CHROM): CHROM="chr%s"%CHROM
STRGTFILE = '/storage/szfeupe/Runs/650GTEx_estr/Genotypes/NormalizedGenotypes.table'
SNPGTFILE = '/storage/szfeupe/Runs/650GTEx_estr/SNP_Analysis/%s.tab'%str(CHROM)
OUTFILE = '/storage/szfeupe/Runs/650GTEx_estr/Analysis_by_Tissue/%s/HH/caviar.test_ch3'%TISSUE
REG_STRs = PATH+'/Lin_Reg_Out'
REG_SNPs = PATH+'SNP_Analysis/Lin_Reg_Out'
TMPDIR = PATH+'HH/caviar_temps/'
if not os.path.exists(TMPDIR):
    os.mkdir(TMPDIR)
ESTRGENESFILE=None


# Load expression
PROGRESS("\nLoad expression", printit=DEBUG)
expr = pd.read_csv(EXPRFILE)
samples_to_keep = list(expr.index)
# Load annotation
PROGRESS("Load annotation", printit=DEBUG)
expr_annot = pd.read_csv(EXPRANNOTFILE)
expr_annot.index = expr_annot["probe.id"].values
expr_annot = expr_annot.reindex(list(expr.columns))
expr_annot = expr_annot.dropna() 
expr_annot = expr_annot[expr_annot["gene.chr"] == CHROM]
# Load strs Regression
PROGRESS("\nLoad strs regression", printit=DEBUG)
strs = pd.read_csv(REG_STRs, sep="\t")
strs = strs.loc[strs['chrom']==CHROM]
# Load snps regression
PROGRESS("\nLoad snps regression", printit=DEBUG)
snps = pd.read_csv(REG_SNPs, sep="\t")
snps = snps.loc[snps['chrom']==CHROM]
del snps['Unnamed: 0']
#Load SNP genotypes
PROGRESS("Load SNPs", printit=DEBUG)
snpgt = pd.read_csv(SNPGTFILE, sep="\t",low_memory=False)
snpgt = snpgt.loc[snpgt['chrom']==CHROM]
# Load STR genotypes
PROGRESS("Load STRs", printit=DEBUG)
strgt = pd.read_csv(STRGTFILE, sep="\t")
strgt = strgt.loc[strgt['chrom']==CHROM]
# Restrict to STR samples
PROGRESS("Restrict to STRs samples", printit=DEBUG)
str_samples = samples_to_keep
expr = expr.reindex(str_samples)
snpgt = snpgt[["chrom","start"] + str_samples]
snpgt.index = list(snpgt["start"].apply(lambda x: "SNP_%s"%int(x)))
strgt = strgt[["chrom","start"] + str_samples]
try:           #Control for inexisting STRs (Should not happen)
    strgt.index = list(strgt["start"].apply(lambda x: "STR_%s"%int(x)))
except:
    print (strgt)
    sys.exit(1)
#print strgt.head(3)
# Load eSTR results
PROGRESS("Restrict to eSTR genes only", printit=DEBUG)
if ESTRGENESFILE is not None:
    estr_genes = pd.read_csv(ESTRGENESFILE, sep="\t")
    Genes = estr_genes.loc[estr_genes['qvalue']<=0.1]['gene']  # estrs at 10%FDR
    expr_annot = expr_annot.loc[expr_annot['gene.id'].isin(list(Genes))]
#open output files
Errorfile = open(TMPDIR+"/Errorfile.out", 'w')
OUT = open(OUTFILE, "w")
PROGRESS("Start output file "+OUTFILE, printit=DEBUG)
OUT.write("\t".join( ['CHROM','gene','num.strs.in.top5','top_snp','top_snp_score','top_str','top.str.score','str.rank'])+'\n')
# For each gene, get all cis-variants and the best STR

Load expression
Load annotation
Load strs regression
Load snps regression
Load SNPs
Load STRs
Restrict to STRs samples
Restrict to eSTR genes only
Start output file /storage/szfeupe/Runs/650GTEx_estr/Analysis_by_Tissue/Adipose-Subcutaneous//HH/caviar.test_ch3


81

In [50]:
count=0
roo=0
for i in range(expr_annot.shape[0]):
    if i==2:
        break
    gene=expr_annot.index.values[i]
    ensgene = expr_annot["gene.id"].values[i] 
    genedir=TMPDIR+"/%s"%gene
    if not os.path.exists(genedir):
        os.mkdir(genedir)
    clear_cmd = "rm "+genedir+'/*'
    os.system(clear_cmd)
    PROGRESS("Getting data for %s"%gene, printit=DEBUG)
    start = expr_annot["gene.start"].values[i]
    end = expr_annot["gene.stop"].values[i]
# Pull out cis SNPs
    PROGRESS("Getting cis SNPs for %s"%gene)
    cis_snps = snps[(snps["str.start"] >= (start-DISTFROMGENE)) & (snps["str.start"] <= (end+DISTFROMGENE))]
    cis_snps = cis_snps.loc[cis_snps['gene']==ensgene] 
    #print (cis_snps.shape , '##SNPs#')  #*
    cis_variants = cis_snps.loc[cis_snps["str.start"].isin(list(snpgt["start"]))]  ###  
    #cis_variants = cis_variants.dropna(subset=['p.wald'])
    cis_snps=cis_variants.sort_values(by="p.wald").head(n=100).copy()
    cis_snps.index = cis_snps["str.start"].apply(lambda x: "SNP_%s"%int(x))
    L=list(cis_snps.index)
# Pull out all cis STRs
    PROGRESS("Getting cis STRs for %s"%gene)
    cis_strs = strs[strs["gene"]==ensgene].sort_values("p.wald")
    if cis_strs.shape[0]==0 :
        PROGRESS("There are no STRs found for %s... Gene possibly not in LR table"%gene)
        continue
    elif cis_snps.shape[0]<=1:
        PROGRESS("There are no or not enough SNPs found for %s... Or gene not in LR table"%gene)
        continue
    else: 
        cis_strs.index = cis_strs["str.start"].apply(lambda x: "STR_%s"%int(x))
        L0 = list(cis_strs.index)
    #
    cis_variants = pd.concat([cis_snps, cis_strs])
    print(len(L), len(L0), len(set(L0)), cis_variants.shape) #*
# Make z file data
    Ztable = MakeZScoreTable(cis_variants[['beta','beta.se']])
    if Ztable is None:
        Errorfile.write(gene+": Z score could not be calculated; beta.se is probably 0 or null\n")
        continue
    else:
        Ztable.to_csv(genedir+'/ZFILE', sep='\t',header=None)
# Make LD file
    genotypes = snpgt.loc[L]
    genotypes = pd.concat([genotypes, strgt.loc[L0] ])
    del genotypes['chrom']
    del genotypes['start']
    CorrMatrix = WriteCorrTable(genotypes)
    CorrMatrix.to_csv(genedir+'/LDFILE', sep='\t',header=None, index=None)
#Run caviar
    caviar_cmd = "CAVIAR -l %s -z %s -o %s/caviar -c 1 -f 1 > %s"%(genedir+"/LDFILE", genedir+"/ZFILE", genedir, genedir+"/log")
    os.system(caviar_cmd)
    
    PROGRESS("Matrix of corr was sent to file for %s \n LDFILE: %s \t ZFILE: %s \t CORR_MATRIX: %s"%(gene, str(pd.read_csv(genedir+"/LDFILE").shape), str(pd.read_csv(genedir+"/ZFILE").shape), str(CorrMatrix.shape) ) )        
    
#Output results
    if not os.path.exists(genedir+'/caviar_post'):
        PROGRESS("CAVIAR did not run for %s"%gene)
        Errorfile.write(gene+": CAVIAR did not run.\n\tERROR: Segmentation fault (core dumped) in log file\n")
        roo=roo+1 #*
        print("%s: CAVIAR did not run\tERROR: Segmentation fault"%gene) #*
        continue
    else:
        post = pd.read_csv(genedir+'/caviar_post', sep="\t", header=None)
        post = post.sort_values(post.columns[2], ascending=False)
        post = post.reset_index(drop=True)
        p = post.head(5)
        num_str = len([x for x in list(p[0].values) if "STR_" in x])
        PROGRESS("Top 5 variants CAVIAR scores for %s"%gene)
        if 'STR_' in p[0][0]:
            topstr = p[0][0]
            topstrscore = p.values[0][2]
            I,topsnp, topsnpscore =lookfor('SNP_', post)
        else:
            topsnp = p[0][0]
            topsnpscore = p.values[0][2]
            I, topstr , topstrscore =lookfor('STR_',post)
#Output top 5 variants by score values ... The rest can be found on caviar_post fine in gene DIR
        OUT.write("\t".join([CHROM, gene, str(num_str),topsnp,str(topsnpscore),str(topstr), str(topstrscore),str(I+1)])+'\n')
        count=count+1 #*
        print("\t".join([CHROM, gene, str(num_str),topsnp,str(topsnpscore),str(topstr), str(topstrscore),str(I+1)])+str(count)) #*
        
CorrMatrix

Getting data for ENSG00000237683.5
Getting cis SNPs for ENSG00000237683.5
Getting cis STRs for ENSG00000237683.5
There are no STRs found for ENSG00000237683.5... Gene possibly not in LR table
Getting data for ENSG00000187634.6
Getting cis SNPs for ENSG00000187634.6
Getting cis STRs for ENSG00000187634.6


100 6 6 (106, 12)
** 106 (270, 106) (106, 106)
chr1	ENSG00000187634.6	0	SNP_963912	0.275014	STR_886040	0.00123895	1011


Matrix of corr was sent to file for ENSG00000187634.6 
 LDFILE: (105, 1) 	 ZFILE: (105, 1) 	 CORR_MATRIX: (106, 106)
Top 5 variants CAVIAR scores for ENSG00000187634.6


Unnamed: 0,SNP_963912,SNP_839899,SNP_839918,SNP_839912,SNP_839897,SNP_839871,SNP_839940,SNP_839934,SNP_839911,SNP_839873,...,SNP_948921,SNP_948870,SNP_856476,SNP_836924,STR_886040,STR_952495,STR_946128,STR_930889,STR_900684,STR_937563
SNP_963912,1.000000,-0.051913,-0.031858,-0.019064,-0.051989,-0.050471,-0.035926,-0.031117,-0.024820,-0.063407,...,-0.115868,-0.115868,0.040408,-0.021438,0.144559,0.019994,0.145581,0.039133,0.108416,-0.135491
SNP_839899,-0.051913,1.000000,0.952852,0.866488,0.967070,0.877776,0.883301,0.906043,0.866427,0.880210,...,0.025636,0.025636,0.241050,0.621193,-0.002185,-0.004424,0.023580,0.083566,0.131988,-0.043024
SNP_839918,-0.031858,0.952852,1.000000,0.867021,0.936254,0.843976,0.916218,0.939254,0.857449,0.859631,...,0.083588,0.083588,0.257092,0.631263,-0.024984,0.007351,-0.001603,0.070094,0.104977,-0.026815
SNP_839912,-0.019064,0.866488,0.867021,1.000000,0.836994,0.841634,0.793643,0.819436,0.998280,0.806626,...,0.016662,0.016662,0.387952,0.572624,0.081235,0.013589,0.040266,0.138687,0.242983,-0.030932
SNP_839897,-0.051989,0.967070,0.936254,0.836994,1.000000,0.839902,0.820692,0.842383,0.837024,0.824574,...,0.030101,0.030101,0.212285,0.569089,-0.025469,-0.022240,0.042757,0.072647,0.099144,-0.023602
SNP_839871,-0.050471,0.877776,0.843976,0.841634,0.839902,1.000000,0.791858,0.812583,0.841788,0.954546,...,0.042518,0.042518,0.198621,0.581267,-0.018291,0.009084,0.019232,0.058929,0.081483,-0.023718
SNP_839940,-0.035926,0.883301,0.916218,0.793643,0.820692,0.791858,1.000000,0.976387,0.789221,0.814134,...,0.081097,0.081097,0.278145,0.578450,-0.013735,0.040909,-0.001400,0.103857,0.162234,-0.054924
SNP_839934,-0.031117,0.906043,0.939254,0.819436,0.842383,0.812583,0.976387,1.000000,0.814997,0.824827,...,0.077175,0.077175,0.279521,0.598327,-0.007972,0.038315,0.003954,0.086024,0.144861,-0.044336
SNP_839911,-0.024820,0.866427,0.857449,0.998280,0.837024,0.841788,0.789221,0.814997,1.000000,0.806466,...,0.003118,0.003118,0.391171,0.567723,0.109611,0.005052,0.047211,0.143321,0.259649,-0.043105
SNP_839873,-0.063407,0.880210,0.859631,0.806626,0.824574,0.954546,0.814134,0.824827,0.806466,1.000000,...,0.035619,0.035619,0.249089,0.621001,-0.019875,0.033171,0.016938,0.088755,0.126809,-0.044853


In [54]:
Ztable[~Ztable.index.duplicated(keep='first')]

str.start
SNP_963912   -3.882324
SNP_839899   -2.596841
SNP_839918   -2.581448
SNP_839912   -2.536859
SNP_839897   -2.533996
SNP_839871   -2.523541
SNP_839940   -2.514151
SNP_839934   -2.504812
SNP_839911   -2.495886
SNP_839873   -2.450074
SNP_839859   -2.431121
SNP_839872   -2.409711
SNP_839881   -2.372411
SNP_839900   -2.360138
SNP_901023   -2.327418
SNP_839941   -2.300413
SNP_842362   -2.276383
SNP_839919   -2.254493
SNP_856329   -2.247531
SNP_839933   -2.193937
SNP_839916   -2.144735
SNP_844399    2.036087
SNP_874950   -2.028236
SNP_956227    2.027145
SNP_839858   -2.022115
SNP_884091    1.950343
SNP_854777    1.891235
SNP_844323    1.861692
SNP_844459    1.848972
SNP_942705    1.823996
                ...   
SNP_858040   -1.243109
SNP_935671   -1.242353
SNP_852063    1.220024
SNP_852037    1.220024
SNP_834999   -1.215960
SNP_834928   -1.215960
SNP_856436   -1.197072
SNP_949235   -1.168489
SNP_842057   -1.167950
SNP_945111    1.163163
SNP_859685   -1.163223
SNP_909419    1.161310
S

In [53]:
set(list(cis_variants['gene']))#[~cis_variants.index.duplicated(keep='first')]

{'ENSG00000187634.6'}

In [None]:
    # For each gene, get all cis-variants and the best STR
    for i in range(expr_annot.shape[0]):
        gene=expr_annot.index.values[i]
        ensgene = expr_annot["gene.id"].values[i]  #'ENSG00000215912.7'
        genedir=TMPDIR+"/%s"%gene
        if not os.path.exists(genedir):
            os.mkdir(genedir)
        clean_cmd='rm '+
        os.system(clean_cmd)
        PROGRESS("Getting data for %s"%gene, printit=DEBUG)
        start = expr_annot["gene.start"].values[i]
        end = expr_annot["gene.stop"].values[i]
    # Pull out cis SNPs
        PROGRESS("Getting cis SNPs for %s"%gene)
        cis_snps = snps[(snps["str.start"] >= (start-DISTFROMGENE)) & (snps["str.start"] <= (end+DISTFROMGENE))]
        #print cis_snps.shape , '###'
        cis_snps = cis_snps.loc[cis_snps["str.start"].isin(list(snpgt["start"]))]  ###
        cis_variants = cis_snps.loc[cis_snps['gene']==ensgene]
        cis_variants=cis_variants.sort_values(by="p.wald").head(n=100)
        cis_variants.index = cis_variants["str.start"].apply(lambda x: "SNP_%s"%int(x))
        L=list(cis_variants.index)
    # Pull out cis STR
        PROGRESS("Getting most significant cis STR for %s"%gene)
        cis_strs = strs[strs["gene"]==ensgene].sort_values("p.wald")
        if cis_strs.shape[0]==0:
            PROGRESS("There are no STRs found for %s... Gene not in LR table"%gene)
            continue
        else: 
            cis_strs.index = cis_strs["str.start"].apply(lambda x: "STR_%s"%int(x)) 
            L0 = list(cis_strs.index)
        #
        cis_variants = pd.concat([cis_snps, cis_strs])
    # Make z file data
        Ztable = MakeZScoreTable(cis_variants[['beta','beta.se']])
        if Ztable is None:
            Errorfile.write(gene+": Z score could not be calculated; beta.se is probably 0 or null\n")
            continue
        else:
            Ztable.to_csv(genedir+'/ZFILE', sep='\t',header=None)
    # Make LD file
        genotypes = snpgt.loc[L]
        genotypes = pd.concat([genotypes, strgt.loc[L0] ])
        del genotypes['chrom']
        del genotypes['start']
        CorrMatrix = WriteCorrTable(genotypes)
        CorrMatrix.to_csv(genedir+'/LDFILE', sep='\t',header=None, index=None)
        PROGRESS("Matrix of corr was sent to file for %s"%gene)        
    #Run caviar
        caviar_cmd = "CAVIAR -l %s -z %s -o %s/caviar -c 1 -f 1 > %s"%(genedir+"/LDFILE", genedir+"/ZFILE", genedir, genedir+"/log")
        os.system(caviar_cmd)
    #Output results
    #Output results
        if not os.path.exists(genedir+'/caviar_post'):
            Errorfile.write(gene+": CAVIAR did not run.\n\tERROR: Segmentation fault (core dumped) in log file\n")
            continue
        else:
            post = pd.read_csv(genedir+'/caviar_post', sep="\t", header=None)
            p = post.head(5)
            num_str = len([x for x in list(p[0].values) if "STR_" in x])
            if 'STR_' in p[0][0]:
                topstr = p[0][0]
                topstrscore = p.values[0][2]
                I,topsnp, topsnpscore =lookfor('SNP_', post)
            else:
                topsnp = p[0][0]
                topsnpscore = p.values[0][2]
                I, topstr , topstrscore =lookfor('STR_',post)
            OUT.write("\t".join([CHROM, gene, str(num_str), topsnp, str(topsnpscore), str(topstr), str(topstrscore)])+'\n')
            print("**\t".join([CHROM, gene, str(num_str), topsnp, str(topsnpscore), str(topstr), str(topstrscore), str(I)])+'\n')

    OUT.close()

In [22]:
#I, topstr , topstrscore =lookfor('STR_',post)
print(lookfor('STR_',post))
#1 SNP_82865 0.0104928

None


In [None]:
#OLD LOOP
    for i in range(expr_annot.shape[0]):
        gene=expr_annot.index.values[i]
        ensgene = expr_annot["gene.id"].values[i]  #'ENSG00000215912.7'
        genedir=TMPDIR+"/%s"%gene
        if not os.path.exists(genedir):
            os.mkdir(genedir)        
        clear_cmd = "rm "+genedir+'/*'
        os.system(clear_cmd)
        PROGRESS("Getting data for %s"%gene, printit=DEBUG)
        start = expr_annot["gene.start"].values[i]
        end = expr_annot["gene.stop"].values[i]
    # Pull out cis SNPs
        PROGRESS("Getting cis SNPs for %s"%gene)
        cis_snps = snps[(snps["str.start"] >= (start-DISTFROMGENE)) & (snps["str.start"] <= (end+DISTFROMGENE))]
        cis_snps = cis_snps.loc[cis_snps['gene']==ensgene]
        cis_snps.index = cis_snps["str.start"].apply(lambda x: "SNP_%s"%int(x))
        print cis_snps.shape , '##SNPs#'
        cis_variants = cis_snps.loc[cis_snps["str.start"].isin(list(snpgt["start"]))]  ###        
        #cis_variants = cis_snps.loc[cis_snps['gene']==ensgene]
        cis_snps=cis_variants.sort_values(by="p.wald").head(n=100)
        #cis_snps.index = cis_snps["str.start"].apply(lambda x: "SNP_%s"%int(x))
        L=list(cis_snps.index)
    # Pull out cis STR
        PROGRESS("Getting all cis STR for %s"%gene)
        cis_strs = strs[strs["gene"]==ensgene].sort_values("p.wald")
        if cis_strs.shape[0]==0 :
            PROGRESS("There are no STRs found for %s... Gene not in LR table"%gene)
            continue
        elif cis_snps.shape[0]<=1:
            PROGRESS("There are no or not enough SNPs found for %s... Gene not in LR table"%gene)
            continue
        else: 
            cis_strs.index = cis_strs["str.start"].apply(lambda x: "STR_%s"%int(x)) 
            L0 = list(cis_strs.index)
        #
        cis_variants = pd.concat([cis_snps, cis_strs])
        print len(L), len(L0), cis_variants.shape
    # Make z file data
        Ztable = MakeZScoreTable(cis_variants[['beta','beta.se']])
        #print cis_variants[['beta','beta.se']]
        if Ztable is None:
            Errorfile.write(gene+": Z score could not be calculated; beta.se is probably 0 or null\n")
            continue
        else:
            Ztable.to_csv(genedir+'/ZFILE', sep='\t',header=None)
    # Make LD file
        
        genotypes = snpgt.loc[L]
        genotypes = pd.concat([genotypes, strgt.loc[L0] ])
        del genotypes['chrom']
        del genotypes['start']
        CorrMatrix = WriteCorrTable(genotypes)
        CorrMatrix.to_csv(genedir+'/LDFILE', sep='\t',header=None, index=None)
        PROGRESS("Matrix of corr for %s LDFILE %s ZFILE %s "%(gene, str(CorrMatrix.shape), str(Ztable.shape)))        
    #Run caviar
        caviar_cmd = "CAVIAR -l %s -z %s -o %s/caviar -c 1 -f 1 > %s"%(genedir+"/LDFILE", genedir+"/ZFILE", genedir, genedir+"/log")
        os.system(caviar_cmd)
    #Output results
        PROGRESS(genedir)
        if not os.path.exists(genedir+'/caviar_post'):
            Errorfile.write(gene+": CAVIAR did not run.\n\tERROR: Segmentation fault (core dumped) in log file\n")
            continue
        else:
            post = pd.read_csv(genedir+'/caviar_post', sep="\t", header=None)
            post = post.sort_values(post.columns[2], ascending=False)
            p = post.head(5)
            print 'top 5 ranked variants', p.shape
            num_str = len([x for x in list(p[0].values) if "STR_" in x])
            if 'STR_' in p[0][0]:
                topstr = p[0][0]
                topstrscore = p.values[0][2]
                I,topsnp, topsnpscore =lookfor('SNP_', post)
            else:
                topsnp = p[0][0]
                topsnpscore = p.values[0][2]
                I, topstr , topstrscore =lookfor('STR_',post)
            OUT.write("\t".join([CHROM, gene, str(num_str),topsnp,str(topsnpscore),str(topstr), str(topstrscore),str(I+1)])+'\n')
            strsscores = post.loc[post[0].isin(cis_strs.index)][[0,2]]
            strsscores['chrom']=[CHROM]*strsscores.shape[0]
            strsscores['gene']= [gene]*strsscores.shape[0]
            strsscores.columns = ['str','score', 'chrom', 'gene']
            with open('strs_core'+CHROM, 'a') as k:
                (strsscores[['chrom', 'gene','str','score']]).to_csv(k, header=False, index=False,sep='\t')  
            O=0+1
        #break

