Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cms/combine/cms_data.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// functions for handling cms component(+composite) score datastructures
// last updated: 1.02.17 vitti@broadinstitute.org
// last updated: 1.10.17 vitti@broadinstitute.org

#include <stdio.h>
#include <string.h>
Expand Down Expand Up @@ -298,7 +298,7 @@ void get_ihs_data(ihs_data* data, char filename[]) {
else if (itoken == 4) {
data->ihh1[isnp] = atof(token);
}
else if (itoken == 9) {
else if (itoken == 5) {
data->ihs_unnormed[isnp] = atof(token);
}
else if (itoken == 10) {
Expand Down
6 changes: 3 additions & 3 deletions cms/combine/write_xpehh_from_ihh.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ int main(int argc, char **argv) {
exit(0);
}

fprintf(stderr, "found sign error in calculation on 12.28.16, creating potential issues with previous calcs. if using this program, slow down and verify\n");
exit(0);
//fprintf(stderr, "found sign error in calculation on 12.28.16, creating potential issues with previous calcs. if using this program, slow down and verify\n");
// exit(0);

newLine = malloc((line_size+1) * sizeof(char));
assert(newLine != NULL);
Expand Down Expand Up @@ -73,7 +73,7 @@ int main(int argc, char **argv) {
if (pos1 == pos2){
if (ihh1 == 0){ihh1 = 1e-08;}
if (ihh2 == 0){ihh2 = 1e-08;}
xpehh = log(ihh1/ihh2);
xpehh = log(ihh1/ihh2); // times neg 1?
//fprintf(outf, chromstr);
//fprintf(outf, "_");
fprintf(outf, "%d\t%d\t%f\t%f\t%f\t%f\t%f\t%f\n", pos1, pos1, gdpos1, ihh1, p1, ihh2, p2, xpehh);
Expand Down
3 changes: 2 additions & 1 deletion cms/composite.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ def execute_xp_from_ihh(args):
inputtped1 = args.inIhh1
inputtped2 = args.inIhh2
outfilename = args.outfilename
cmd = "dists/write_xpehh_fromihh"

cmd = args.cmsdir + "combine/write_xpehh_from_ihh"
argstring = inputtped1 + " " + inputtped2 + " " + outfilename
cmdstring = cmd + " " + argstring
if args.printOnly:
Expand Down
9 changes: 1 addition & 8 deletions cms/dists/scores_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def calc_delihh(readfilename, writefilename):
#ancestral - derived
else:
locus, phys, freq_1, ihh_1, ihh_0, ihs_unnormed, ihs_normed, lastcol = entries
unstand_delIHH = fabs(float(ihh_1) - float(ihh_0))
unstand_delIHH = fabs(float(ihh_1) - float(ihh_0)) #WAIT WHY FABS?????????
writeline = locus + "\t" + phys + "\t" + freq_1 + "\t" + str(ihs_unnormed) + "\t" + str(unstand_delIHH) +"\t" + str(unstand_delIHH) + "\n" #6 columns for selscan norm
writefile.write(writeline)
writefile.close()
Expand Down Expand Up @@ -125,8 +125,6 @@ def norm_sel_ihs(inputScoreFile, neutNormfilename):
normfile.close()
print("wrote to: " + normfilename)
return


def norm_neut_xpehh(inputScoreFile, outfileName, runProgram = "scans.py"):
'''from func_clean.py'''
cmdStr = "python " + runProgram + " selscan_norm_xpehh " + inputScoreFile + " > " + outfileName
Expand Down Expand Up @@ -326,7 +324,6 @@ def choose_vals_from_files(filename, numCols, takeindices, comp, stripHeader = F
allpos.extend(chosenpos)
alltoreturn = [allpos, allscores]
return alltoreturn

def choose_from_reps(repscores, reppositions, repanc, mode="max"):
'''flexible function to choose for likes '''
ncomp = len(repscores)
Expand Down Expand Up @@ -369,8 +366,6 @@ def choose_from_reps(repscores, reppositions, repanc, mode="max"):
#print(scores)
#print(itemtoreturn)
return toreturn, allpositions


def calc_hist_from_scores(causal_scores, linked_scores, neut_scores, xlims, givenBins, thinToSize = False):
if thinToSize:
limiting = min(len(causal_scores), len(linked_scores), len(neut_scores))
Expand Down Expand Up @@ -425,8 +420,6 @@ def calc_hist_from_scores(causal_scores, linked_scores, neut_scores, xlims, give
# n_neut[ibin] = 1e-10

return n_causal, n_linked, n_neut, bins_causal, bins_linked, bins_neut


def write_hists_to_files(writePrefix, givenBins, n_causal, n_linked, n_neut):
assert len(givenBins) == (len(n_causal) + 1)
for status in ['causal', 'linked', 'neut']:
Expand Down
6 changes: 6 additions & 0 deletions cms/likes_from_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,12 @@ def execute_run_sel_sims(args):
return
def execute_scores_from_sims(args):
''' adapted from JV scores_from_tped_vers.py. functions point to scans.py'''
""" PHASE OUT; replace with power.py functions that parallelize by REP (much more efficient)"""

#print("YOUR SHIT'S BUSTED")
#sys.exit(0)


inputFilename, outputFilename = args.inputFilename, args.outputFilename

#################
Expand Down
4 changes: 2 additions & 2 deletions cms/model/Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
## leaf Makefile for cms2.0/model
## last updated 11.4.16 vitti@broadinstitute.org
## last updated 1.25.17 vitti@broadinstitute.org

######################
## DEFINE VARIABLES ##
######################

CC = gcc
CCFLAG = -O0 -ggdb3 -lm -Wall
CCFLAG = -O0 -ggdb3 -lm -lz -Wall

##################
## DEFINE RULES ##
Expand Down
6 changes: 3 additions & 3 deletions cms/model/calc_fst_deldaf.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// last updated 09.13.16 vitti@broadinstitute.org
// last updated 09.13.16 vitti@broadinstitute.org // 1.25.17 experimenting with zlib for large data (eg 1kg tpeds)

#include <stdio.h>
#include <string.h>
Expand Down Expand Up @@ -43,7 +43,7 @@ int main(int argc, char **argv) {
fst_sum = 0;
nfst = 0;
//count alleles for pop0
get_coal_data_tped_vers(&data, inTped1, inRecomfile);
get_coal_data_tped_vers_gz(&data, inTped1, inRecomfile);
//if (data.nsample == 0) {continue;}

nall0[0] = calloc(data.nsnp, sizeof(int));
Expand All @@ -60,7 +60,7 @@ int main(int argc, char **argv) {
free_coal_data(&data);

//count alleles for pop1
get_coal_data_tped_vers(&data, inTped2, inRecomfile);
get_coal_data_tped_vers_gz(&data, inTped2, inRecomfile);
//if (data.nsample == 0) {continue;}

nall0[1] = calloc(data.nsnp, sizeof(int));
Expand Down
175 changes: 156 additions & 19 deletions cms/model/coal_data_tped_vers.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// last updated 10.20.16 vitti@broadinstitute.org
// last updated 1.25.17 vitti@broadinstitute.org

#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
//#include <zlib.h>
#include <zlib.h>
#include "coal_data_tped_vers.h"

/************************/
Expand Down Expand Up @@ -97,17 +97,15 @@ void get_coal_data_tped_vers(coal_data* data, char tpedfilename[], char recomfil
//char cmd[600];
char *newLine, *token, *running;
int isamp, isnp, itoken, iRecom;
double genrate;
FILE *inf=NULL;
double genrate;
FILE *inf=NULL;

/**************************
INITIALIZATION, ALLOCATION,
COUNTING nsample nsnp nrecom
**************************/

**************************/
newLine = malloc((line_size+1) * sizeof(char));
assert(newLine != NULL);

data->nsample = 0;
data->nsnp = 0;
data->nRecom = 0;
Expand All @@ -124,20 +122,16 @@ void get_coal_data_tped_vers(coal_data* data, char tpedfilename[], char recomfil

//fprintf(stderr, "Getting information from file: %s\n", tpedfilename);

//handle zipped?

// Count number of SNPs in tped
inf = fopen(tpedfilename, "r");

//handle zipped
//sprintf(cmd, "gunzip -c %s", tpedfilename);
//inf = popen(cmd, "r");

if (inf == NULL) {fprintf(stderr, "Missing TPED file: %s\n", tpedfilename);}
assert (inf != NULL);
while (fgets(newLine, line_size, inf) != NULL) {
assert(strlen(newLine) < line_size);
data->nsnp++;
}

//get number of samples from last line
int ik = 0;
char *pch=strchr(newLine,' ');
Expand All @@ -146,13 +140,12 @@ void get_coal_data_tped_vers(coal_data* data, char tpedfilename[], char recomfil
pch=strchr(pch+1,' ');
}
data->nsample = ik-3;

fclose(inf);
fprintf(stderr, "nSNP: %d\n", data->nsnp);
fprintf(stderr, "nSample: %d\n", data->nsample);

// Count number of lines in recombination file
inf = fopen(recomfilename, "r"); assert(inf != NULL);
inf = fopen(recomfilename, "r"); assert(inf != NULL);
fgets(newLine, line_size, inf); //header
if (inf == NULL) {fprintf(stderr, "Missing recombination file: %s\n", recomfilename);}
while (fgets(newLine, line_size, inf) != NULL) {
Expand Down Expand Up @@ -192,8 +185,6 @@ void get_coal_data_tped_vers(coal_data* data, char tpedfilename[], char recomfil
GET DATA FROM TPED
*******************/
//handle zipped
//sprintf(cmd, "gunzip -c %s", tpedfilename);
///inf = popen(cmd, "r");
inf = fopen(tpedfilename, "r");
if (inf == NULL) {fprintf(stderr, "Missing TPED file: %s\n", tpedfilename);}
assert(inf != NULL);
Expand Down Expand Up @@ -242,13 +233,159 @@ void get_coal_data_tped_vers(coal_data* data, char tpedfilename[], char recomfil
data->genPos[iRecom] = (genrate / 1000000.); //per Mb rate
iRecom++;
break;
}
}
} // end else if
} // end for running
} // end while fgets
fclose(inf);
free(newLine);
} // end function

void get_coal_data_tped_vers_gz(coal_data* data, char tpedfilename[], char recomfilename[]) {
const int line_size = 999999999; // upper limit
const int numRecomLines = 500000;
//char cmd[600];
char *newLine, *token, *running;
int isamp, isnp, itoken, iRecom;
double genrate;
//FILE *inf=NULL;
gzFile *inf=NULL;

/**************************
INITIALIZATION, ALLOCATION,
COUNTING nsample nsnp nrecom
**************************/
newLine = malloc((line_size+1) * sizeof(char));
assert(newLine != NULL);
data->nsample = 0;
data->nsnp = 0;
data->nRecom = 0;
data->samp_id = NULL;
data->pos = NULL;
data->snp_id = NULL;
data->chrom = NULL;
data->genotype = NULL;
data->anc_base = NULL;
data->nallele = NULL;
data->genPos = NULL;
data->physPos = NULL;
data->genloc = NULL;

fprintf(stderr, "Getting information from file: %s\n", tpedfilename);
// Count number of SNPs in tped
inf = gzopen(tpedfilename, "rb");
if (inf == NULL) {fprintf(stderr, "Missing TPED file: %s\n", tpedfilename);}
assert (inf != NULL);
while (gzgets(newLine, line_size, inf) != NULL) {
assert(strlen(newLine) < line_size);
data->nsnp++;
}
//get number of samples from last line
int ik = 0;
char *pch=strchr(newLine,' ');
while (pch!=NULL) {
ik++;
pch=strchr(pch+1,' ');
}
data->nsample = ik-3;
gzclose(inf);
fprintf(stderr, "nSNP: %d\n", data->nsnp);
fprintf(stderr, "nSample: %d\n", data->nsample);

// Count number of lines in recombination file
inf = fopen(recomfilename, "r"); assert(inf != NULL);
fgets(newLine, line_size, inf); //header
if (inf == NULL) {fprintf(stderr, "Missing recombination file: %s\n", recomfilename);}
while (fgets(newLine, line_size, inf) != NULL) {
assert(strlen(newLine) < line_size);
data->nRecom++;
}
fclose(inf);

// Allocate memory; initialize
data->samp_id = malloc(data->nsample * sizeof(char*)); assert(data->samp_id != NULL);
data->genotype = malloc(data->nsample * sizeof(int*)); assert(data->genotype != NULL);
for (isamp = 0; isamp < data->nsample; isamp++) {
data->samp_id[isamp] = malloc(64 * sizeof(char)); assert(data->samp_id[isamp] != NULL);
data->genotype[isamp] = malloc(data->nsnp * sizeof(int)); assert(data->genotype[isamp] !=NULL);
}
data->snp_id = malloc(data->nsnp * sizeof(char*)); assert(data->snp_id != NULL);
for (isnp = 0; isnp < data->nsnp; isnp++) {
data->snp_id[isnp] = malloc(64 * sizeof(char)); assert(data->snp_id[isnp] != NULL);
}
data->pos = malloc(data->nsnp * sizeof(long int)); assert(data->pos != NULL);
data->chrom = malloc(data->nsnp * sizeof(int)); assert(data->chrom != NULL);
data->anc_base = malloc(data->nsnp * sizeof(int)); assert(data->anc_base != NULL);
data->snp_base[0] = malloc(data->nsnp * sizeof(char*)); assert(data->snp_base[0] != NULL);
data->snp_base[1] = malloc(data->nsnp * sizeof(char*)); assert(data->snp_base[1] != NULL);
for (isnp = 0; isnp < data->nsnp; isnp++) {
data->snp_base[0][isnp] = malloc(3 * sizeof(char)); assert(data->snp_base[0][isnp] != NULL);
data->snp_base[1][isnp] = malloc(3 * sizeof(char)); assert(data->snp_base[1][isnp] != NULL);
}
data->genloc = malloc(data->nsnp * sizeof(double)); assert(data->genloc != NULL);
data->snp_base[2] = calloc(data->nsnp, sizeof(char*)); // STRIKE?
data->snp_base[3] = calloc(data->nsnp, sizeof(char*)); // STRIKE?
data->nallele = malloc(data->nsnp * sizeof(int)); assert(data->nallele != NULL);
data->genPos = malloc(numRecomLines * sizeof(double)); assert(data->genPos !=NULL);
data->physPos = malloc(numRecomLines * sizeof(int)); assert(data->physPos != NULL);

/*******************
GET DATA FROM TPED
*******************/
//handle zipped
inf = gzopen(tpedfilename, "rb");
if (inf == NULL) {fprintf(stderr, "Missing TPED file: %s\n", tpedfilename);}
assert(inf != NULL);

isnp = 0;
while (gzgets(newLine, line_size, inf) != NULL) {
for (running = newLine, itoken = 0; (token = strsep(&running, " \t")) != NULL; itoken++) {
if (itoken == 0) {
//data->chrom[isnp] = chromosome;
data->nallele[isnp] = 2; //only biallelics in tped
}
else if (itoken == 1) {
strcpy(data->snp_id[isnp], token);
}
else if (itoken == 2){
data->genloc[isnp] = atof(token);
}

else if (itoken == 3) {
data->pos[isnp] = atoi(token);
}
else if (itoken >= 4) {
isamp = itoken - 4; //RIGHT?
data->genotype[isamp][isnp] = atoi(token);
}
} // END for running=newLine
isnp++;
} //END while(fgets(newLine))
gzclose(inf);

/**************************
GET DATA FROM RECOMB FILE
**************************/
inf = fopen(recomfilename, "r");
if (inf == NULL) {fprintf(stderr, "Missing recombination file: %s\n", recomfilename);}
assert(inf != NULL);
fgets(newLine, line_size, inf); //header
iRecom = 0;
while (fgets(newLine, line_size, inf) != NULL) {
for (running = newLine, itoken = 0; (token = strsep(&running, "\t")) != NULL; itoken++) {
if (itoken == 1) {
data->physPos[iRecom] = atoi(token);
}
else if (itoken == 2) {
genrate = atof(token);
data->genPos[iRecom] = (genrate / 1000000.); //per Mb rate
iRecom++;
break;
} // end else if
} // end for running
} // end while fgets
fclose(inf);
free(newLine);
} // end function

void free_coal_data(coal_data* data) {
int isamp, isnp;
Expand Down
Loading