Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 20 additions & 73 deletions cms/likes_from_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def full_parser_likes_from_model():
likes_from_scores_parser = subparsers.add_parser('likes_from_scores', help='Collate scores from simulated data in order to generate component test probability distributions.')
if True:
likes_from_scores_parser.add_argument('neutFile', action='store', help='file with scores for neutral scenarios (normalized if necessary)')
likes_from_scores_parser.add_argument('selFile_firstBin', action='store', help='file with scores for selected scenarios (normalized if necessary) for first selscenario bin; will infer other binfiles from this and freqRange/nBins')
likes_from_scores_parser.add_argument('selFile', action='store', help='file with scores for selected scenarios (normalized if necessary)')
likes_from_scores_parser.add_argument('selPos', action='store', type=int, help='position of causal variant', default=500000)
likes_from_scores_parser.add_argument('outPrefix', action='store', help='save file as')
likes_from_scores_parser.add_argument('--thinToSize', action='store_true', help='subsample from simulated SNPs (since nSel << nLinked < nNeut)')
Expand Down Expand Up @@ -278,84 +278,31 @@ def execute_likes_from_scores(args):
expectedlen_neut, indices_neut = get_indices('nsl', "neut")
expectedlen_sel, indices_sel = get_indices('nsl', "sel")
histBins,scoreRange,yLims = get_hist_bins('nsl', numLikesBins)
"""
As previously written, this routine takes in a single (concatenated) selscore and and a single neutscore file , then separates out each SNP according to ITS OWN MAF.
I don't think this is right. I think we want to concat all reps for a given selscenario, and let the selSNP's MAF determine which bin SNPs(/reps) get sorted into.
i.e., I think this previous approach will divvy up SNPs from a single replicate into multiple separate probability density functions, and I don't think that's legit (same haplotype!)
This seems a rehashing of my alternate CMS scoring scheme confusion from like, 2013

val_array = load_vals_from_files(args.neutFile, expectedlen_neut, indices_neut, stripHeader)
neut_positions, neut_score_final, neut_anc_freq = val_array[0], val_array[1], val_array[2]
val_array = load_vals_from_files(args.selFile, expectedlen_sel, indices_sel, stripHeader)
sel_positions, sel_score_final, sel_anc_freq = val_array[0], val_array[1], val_array[2]

#################
## SORT BY MAF ## 10.04.16 need to revisit this
#################

neut_der_freq = [1. - float(x) for x in neut_anc_freq]
sel_der_freq = [1. - float(x) for x in sel_anc_freq]

for ibin in range(len(bin_starts)):
causal_indices = [i for i, x in enumerate(sel_positions) if x == args.selPos and sel_der_freq[i]>=bin_starts[ibin] and sel_der_freq[i]<bin_ends[ibin]]
linked_indices = [i for i, x in enumerate(sel_positions) if x != args.selPos and sel_der_freq[i]>=bin_starts[ibin] and sel_der_freq[i]<bin_ends[ibin]]
neutral_indices = [i for i, x in enumerate(neut_positions) if neut_der_freq[i]>=bin_starts[ibin] and neut_der_freq[i]<bin_ends[ibin]]
print("loaded " +str(len(causal_indices)) + " causal variants, " + str(len(linked_indices)) + " linked variants, and " + str(len(neutral_indices)) + " neutral variants." )

causal_positions = [sel_positions[variant] for variant in causal_indices]
causal_score_final = [sel_score_final[variant] for variant in causal_indices]

linked_positions = [sel_positions[variant] for variant in linked_indices]
linked_score_final = [sel_score_final[variant] for variant in linked_indices]

neutral_positions = [neut_positions[variant] for variant in neutral_indices]
neutral_score_final = [neut_score_final[variant] for variant in neutral_indices]

for datatype in datatypes:
key = (bin_medians_str[ibin], datatype) #BINS!
data[key] = eval(datatype)
else:
print("error")

#################
## BIN SCORES ##
#################
for ibin in range(len(bin_starts)):
xlims = scoreRange
causal_scores, linked_scores, neut_scores = data[(bin_medians_str[ibin], 'causal_score_final')], data[(bin_medians_str[ibin], 'linked_score_final')], data[(bin_medians_str[ibin], 'neutral_score_final')]
n_causal, n_linked, n_neut, bin_causal, bins_linked, bins_neut = calc_hist_from_scores(causal_scores, linked_scores, neut_scores, xlims, int(numLikesBins), args.thinToSize)
write_hists_to_files(args.outPrefix +"_" + bin_medians_str[ibin], histBins, n_causal, n_linked, n_neut)
print("wrote to " + args.outPrefix +"_" + bin_medians_str[ibin])
"""
binfilenamelast = ""
xlims = scoreRange
val_array = load_vals_from_files(args.neutFile, expectedlen_neut, indices_neut, stripHeader)
neut_positions, neut_score_final, neut_anc_freq = val_array[0], val_array[1], val_array[2]

firstbinfilename = args.selFile_firstBin
binfile_entries = firstbinfilename.split(str(bin_medians_str[0]))
for bin_median in bin_medians_str:
binfilename = bin_median.join(binfile_entries)#binfile_entries.join(bin_median)

if binfilename == binfilenamelast:
print("error iterating over selfreq bins; make sure that your scores are sorted into directories according to final sel DAF")

val_array = load_vals_from_files(binfilename, expectedlen_sel, indices_sel, stripHeader)
sel_positions, sel_score_final, sel_anc_freq = val_array[0], val_array[1], val_array[2]
val_array = load_vals_from_files(args.selFile, expectedlen_sel, indices_sel, stripHeader)
sel_positions, sel_score_final, sel_anc_freq = val_array[0], val_array[1], val_array[2]

causal_indices = [i for i, x in enumerate(sel_positions) if x == args.selPos]
linked_indices = [i for i, x in enumerate(sel_positions) if x != args.selPos]
causal_indices = [i for i, x in enumerate(sel_positions) if x == args.selPos]
linked_indices = [i for i, x in enumerate(sel_positions) if x != args.selPos]

print("loaded " +str(len(causal_indices)) + " causal variants, " + str(len(linked_indices)) + " linked variants from:" + binfilename)
print("loaded " + str(len(neut_positions)) + " neutral variants from : " + args.neutFile)
print(" and "+str(len(causal_indices)) + " causal variants, and " + str(len(linked_indices)) + " linked variants from: " + args.selFile)

causal_positions = [sel_positions[variant] for variant in causal_indices]
causal_score_final = [sel_score_final[variant] for variant in causal_indices]
causal_positions = [sel_positions[variant] for variant in causal_indices]
causal_score_final = [sel_score_final[variant] for variant in causal_indices]

linked_positions = [sel_positions[variant] for variant in linked_indices]
linked_score_final = [sel_score_final[variant] for variant in linked_indices]
linked_positions = [sel_positions[variant] for variant in linked_indices]
linked_score_final = [sel_score_final[variant] for variant in linked_indices]

n_causal, n_linked, n_neut, bin_causal, bins_linked, bins_neut = calc_hist_from_scores(causal_score_final, linked_score_final, neut_score_final, xlims, int(numLikesBins), args.thinToSize)
write_hists_to_files(args.outPrefix +"_" + bin_median, histBins, n_causal, n_linked, n_neut)
print("wrote to " + args.outPrefix +"_" + bin_median)
binfilenamelast = binfilename
n_causal, n_linked, n_neut, bin_causal, bins_linked, bins_neut = calc_hist_from_scores(causal_score_final, linked_score_final, neut_score_final, xlims, int(numLikesBins), args.thinToSize)
write_hists_to_files(args.outPrefix, histBins, n_causal, n_linked, n_neut)
print("wrote to " + args.outPrefix)
return
def execute_visualize_likes(args):
likesfilenames = args.likesFiles
Expand Down Expand Up @@ -404,10 +351,10 @@ def execute_visualize_likes(args):

#print(pops)
#print(models)
#print('loaded likes for ' + str(len(scores)) + " scores...")
#print('loaded likes for ' + str(len(models)) + " models...")
#print('loaded likes for ' + str(len(pops)) + " pops...")
#print('loaded likes for ' + str(len(dists)) + " dists...")
print('loaded likes for ' + str(len(scores)) + " scores...")
print('loaded likes for ' + str(len(models)) + " models...")
print('loaded likes for ' + str(len(pops)) + " pops...")
print('loaded likes for ' + str(len(dists)) + " dists...")

colorDict = {'causal':'red', 'linked':'green', 'neut':'blue'}

Expand Down