Skip to content

Commit

Permalink
Remove remaining out-of-scope in_args references
Browse files Browse the repository at this point in the history
  • Loading branch information
biologyguy committed Jan 9, 2017
1 parent cff0604 commit dff65ae
Showing 1 changed file with 34 additions and 30 deletions.
64 changes: 34 additions & 30 deletions rdmcl/rdmcl.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,8 @@ def score_cluster(cluster, sql_broker):


def mc_psi_pred(seq_obj, args):
out_dir = args[0]
if os.path.isfile("{0}{1}psi_pred{1}{2}.ss2".format(out_dir, os.sep, seq_obj.id)):
outdir = args[0]
if os.path.isfile("{0}{1}psi_pred{1}{2}.ss2".format(outdir, os.sep, seq_obj.id)):
return
temp_dir = br.TempDir()
pwd = os.getcwd()
Expand All @@ -458,13 +458,13 @@ def mc_psi_pred(seq_obj, args):

Popen(command, shell=True).wait()
os.chdir(pwd)
shutil.move("%s%s%s.ss2" % (temp_dir.path, os.sep, seq_obj.id), "{0}{1}psi_pred{1}".format(out_dir, os.sep))
shutil.move("%s%s%s.ss2" % (temp_dir.path, os.sep, seq_obj.id), "{0}{1}psi_pred{1}".format(outdir, os.sep))
return


def mcmcmc_mcl(args, params):
inflation, gq = args
external_tmp_dir, min_score, seqbuddy, parent_cluster, taxa_separator, sql_broker, progress = params
external_tmp_dir, min_score, seqbuddy, parent_cluster, taxa_separator, sql_broker, outdir, progress = params
mcl_tmp_dir = br.TempDir()

mcl_output = Popen("mcl %s/input.csv --abc -te 2 -tf 'gq(%s)' -I %s -o %s/output.groups" %
Expand All @@ -490,7 +490,7 @@ def mcmcmc_mcl(args, params):
sb_copy = Sb.make_copy(seqbuddy)
sb_copy = Sb.pull_recs(sb_copy, "|".join(["^%s$" % rec_id for rec_id in cluster]))
alb_obj = generate_msa(sb_copy, sql_broker)
sim_scores = create_all_by_all_scores(alb_obj, sql_broker=sql_broker, quiet=True)
sim_scores = create_all_by_all_scores(alb_obj, outdir, sql_broker=sql_broker, quiet=True)

cluster = Cluster(cluster, sim_scores, parent=parent_cluster, taxa_separator=taxa_separator)
clusters[indx] = cluster
Expand All @@ -506,7 +506,7 @@ def mcmcmc_mcl(args, params):
return score


def orthogroup_caller(master_cluster, cluster_list, seqbuddy, sql_broker, progress,
def orthogroup_caller(master_cluster, cluster_list, seqbuddy, sql_broker, progress, outdir,
steps=1000, quiet=True, taxa_separator="-"):
"""
Run MCMCMC on MCL to find the best orthogroups
Expand All @@ -516,18 +516,19 @@ def orthogroup_caller(master_cluster, cluster_list, seqbuddy, sql_broker, progre
:param seqbuddy: The sequences that are included in the master sequence_ids
:param sql_broker: Multithread SQL broker that can be queried
:param progress: Progress class
:param outdir: where are files being written to?
:param steps: How many MCMCMC iterations to run TODO: calculate this on the fly
:param quiet: Suppress StdErr
:param taxa_separator: The string that separates taxon names from gene names
:return: list of sequence_ids objects
"""
def save_cluster():
cluster_list.append(master_cluster)
if not os.path.isdir("%s/mcmcmc/%s" % (in_args.outdir, master_cluster.name())):
temp_dir.save("%s/mcmcmc/%s" % (in_args.outdir, master_cluster.name()))
if not os.path.isdir("%s/mcmcmc/%s" % (outdir, master_cluster.name())):
temp_dir.save("%s/mcmcmc/%s" % (outdir, master_cluster.name()))
alignment = generate_msa(seqbuddy, sql_broker)
alignment.write("%s/alignments/%s.aln" % (in_args.outdir, master_cluster.name()))
master_cluster.sim_scores.to_csv("%s/sim_scores/%s.scores" % (in_args.outdir, master_cluster.name()),
alignment.write("%s/alignments/%s.aln" % (outdir, master_cluster.name()))
master_cluster.sim_scores.to_csv("%s/sim_scores/%s.scores" % (outdir, master_cluster.name()),
header=None, index=False, sep="\t")
update = len(master_cluster.seq_ids) if not master_cluster.subgroup_counter else 0
progress.update("placed", update)
Expand All @@ -553,7 +554,8 @@ def save_cluster():
try:
with open("%s/max.txt" % temp_dir.path, "w") as ofile:
ofile.write("-1000000000")
mcmcmc_params = ["%s" % temp_dir.path, False, seqbuddy, master_cluster, taxa_separator, sql_broker, progress]
mcmcmc_params = ["%s" % temp_dir.path, False, seqbuddy, master_cluster,
taxa_separator, sql_broker, outdir, progress]
mcmcmc_factory = mcmcmc.MCMCMC([inflation_var, gq_var], mcmcmc_mcl, steps=steps, sample_rate=1,
params=mcmcmc_params,
quiet=quiet, outfile="%s/mcmcmc_out.csv" % temp_dir.path)
Expand All @@ -567,8 +569,8 @@ def save_cluster():
for chain in mcmcmc_factory.chains:
worst_score = chain.raw_min if chain.raw_min < worst_score else worst_score

mcmcmc_factory.reset_params(["%s" % temp_dir.path, worst_score, seqbuddy,
master_cluster, taxa_separator, sql_broker, progress])
mcmcmc_factory.reset_params(["%s" % temp_dir.path, worst_score, seqbuddy, master_cluster,
taxa_separator, sql_broker, outdir, progress])
mcmcmc_factory.run()
mcmcmc_output = pd.read_csv("%s/mcmcmc_out.csv" % temp_dir.path, "\t")

Expand Down Expand Up @@ -610,15 +612,16 @@ def save_cluster():

# Recursion... Reassign cluster_list, as all clusters are returned at the end of a call to orthogroup_caller
cluster_list = orthogroup_caller(sub_cluster, cluster_list, seqbuddy=seqbuddy_copy, sql_broker=sql_broker,
progress=progress, steps=steps, quiet=quiet, taxa_separator=taxa_separator)
progress=progress, outdir=outdir, steps=steps, quiet=quiet,
taxa_separator=taxa_separator)

save_cluster()
return cluster_list


class Progress(object):
def __init__(self, out_dir):
self.outdir = out_dir
def __init__(self, outdir):
self.outdir = outdir
with open("%s/.progress" % self.outdir, "w") as progress_file:
_progress = {"mcl_runs": 0, "placed": 0, "total": len(group_0_cluster)}
json.dump(_progress, progress_file)
Expand Down Expand Up @@ -855,10 +858,11 @@ def generate_msa(seqbuddy, sql_broker=None):
return alignment


def create_all_by_all_scores(alignment, sql_broker=None, quiet=False):
def create_all_by_all_scores(alignment, outdir, sql_broker=None, quiet=False):
"""
Generate a multiple sequence alignment and pull out all-by-all similarity graph
:param alignment: AlignBuddy object
:param outdir: Where are files being written to?
:param sql_broker: Multithread SQL broker that can be queried
:param quiet: Supress multicore output
:return:
Expand All @@ -883,7 +887,7 @@ def create_all_by_all_scores(alignment, sql_broker=None, quiet=False):
# Need to specify what columns the PsiPred files map to now that there are gaps.
psi_pred_files = OrderedDict()
for rec in alignment.records_iter():
ss_file = pd.read_csv("%s/psi_pred/%s.ss2" % (in_args.outdir, rec.id), comment="#",
ss_file = pd.read_csv("%s/psi_pred/%s.ss2" % (outdir, rec.id), comment="#",
header=None, delim_whitespace=True)
ss_file.columns = ["indx", "aa", "ss", "coil_prob", "helix_prob", "sheet_prob"]
ss_counter = 0
Expand Down Expand Up @@ -937,16 +941,16 @@ def create_all_by_all_scores(alignment, sql_broker=None, quiet=False):
return sim_scores


def check_sequences(seqbuddy):
logging.warning("Checking that the format of all sequence ids matches 'taxa%sgene'" % in_args.taxa_separator)
def check_sequences(seqbuddy, taxa_separator):
logging.warning("Checking that the format of all sequence ids matches 'taxa%sgene'" % taxa_separator)
failures = []
for rec in seqbuddy.records:
rec_id = rec.id.split(in_args.taxa_separator)
rec_id = rec.id.split(taxa_separator)
if len(rec_id) != 2:
failures.append(rec.id)
if failures:
logging.error("Malformed sequence id(s): '%s'\nThe taxa separator character is currently set to '%s',\n"
" which can be changed with the '-ts' flag" % (", ".join(failures), in_args.taxa_separator))
" which can be changed with the '-ts' flag" % (", ".join(failures), taxa_separator))
sys.exit()
else:
logging.warning(" %s sequences PASSED" % len(seqbuddy))
Expand Down Expand Up @@ -1016,7 +1020,7 @@ def check_sequences(seqbuddy):
"graph TEXT", "cluster_score TEXT"])
broker.start_broker()
sequences = Sb.SeqBuddy(in_args.sequences)
check_sequences(sequences)
check_sequences(sequences, in_args.taxa_separator)
seq_ids_hash = helpers.md5_hash("".join(sorted([rec.id for rec in sequences.records])))

# Check if job has been run already
Expand All @@ -1025,12 +1029,12 @@ def check_sequences(seqbuddy):
" All cached resources will be reused.")

# Make sure all the necessary directories are present and emptied of old run files
for outdir in ["%s%s" % (in_args.outdir, x) for x in ["", "/alignments", "/mcmcmc", "/sim_scores", "/psi_pred"]]:
if not os.path.isdir(outdir):
logging.info("mkdir %s" % outdir)
os.makedirs(outdir)
for _path in ["%s%s" % (in_args.outdir, x) for x in ["", "/alignments", "/mcmcmc", "/sim_scores", "/psi_pred"]]:
if not os.path.isdir(_path):
logging.info("mkdir %s" % _path)
os.makedirs(_path)
# Delete old 'group' files/directories
root, dirs, files = next(os.walk(outdir))
root, dirs, files = next(os.walk(_path))
for _file in files:
if "group" in _file:
os.remove("%s/%s" % (root, _file))
Expand Down Expand Up @@ -1100,7 +1104,7 @@ def check_sequences(seqbuddy):
else:
logging.warning("Generating initial all-by-all similarity graph")
logging.info(" written to: %s/sim_scores/complete_all_by_all.scores" % in_args.outdir)
scores_data = create_all_by_all_scores(alignbuddy, sql_broker=broker)
scores_data = create_all_by_all_scores(alignbuddy, in_args.outdir, sql_broker=broker)
scores_data.to_csv("%s/sim_scores/complete_all_by_all.scores" % in_args.outdir,
header=None, index=False, sep="\t")
broker.query("UPDATE data_table SET graph='{0}' "
Expand Down Expand Up @@ -1135,7 +1139,7 @@ def check_sequences(seqbuddy):
run_time = br.RunTime(prefix=progress_tracker.__str__, _sleep=0.3, final_clear=True)
run_time.start()
final_clusters = orthogroup_caller(group_0_cluster, final_clusters, seqbuddy=sequences, sql_broker=broker,
progress=progress_tracker, steps=in_args.mcmcmc_steps, quiet=True,
progress=progress_tracker, outdir=in_args.outdir, steps=in_args.mcmcmc_steps, quiet=True,
taxa_separator=in_args.taxa_separator)
run_time.end()

Expand Down

0 comments on commit dff65ae

Please sign in to comment.