Skip to content

Commit

Permalink
minor cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
agladstein committed Feb 20, 2018
1 parent ea0cf8e commit 3ed0cb4
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 41 deletions.
3 changes: 1 addition & 2 deletions alleles_generator/seqInfo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from bitarray import bitarray

from main_tools.my_random import MY_RANDOM as random
from main_tools.housekeeping import debugPrint


Expand All @@ -23,7 +22,7 @@ def __repr__(self):
return self.name


def create_sequences(processedData, args):
def create_sequences(processedData):
'''
Parameters: args is a dictionary that maps the SNP file to
array_template
Expand Down
6 changes: 3 additions & 3 deletions ascertainment/pseudo_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def find2(a, x):
elif d1==d2:
return i

def add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_asc, nb_array_snps):
def add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_asc):
''''
This function is called 199 times when the program is
ran.
Expand Down Expand Up @@ -149,7 +149,7 @@ def pseudo_array(asc_panel, daf, pos, snps):
diff = int(len(snps) - len(pos_asc))
for m in range(1, diff + 1):
pos_asc2 = []
pos_asc2 = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_asc, nb_array_snps)
pos_asc2 = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_asc)
pos_asc = pos_asc2
nbss_asc = len(pos_asc)
if nbss_asc == len(snps):
Expand Down Expand Up @@ -266,7 +266,7 @@ def pseudo_array_bits(asc_panel_bits, daf, pos, snps):
diff = int(len(snps) - len(pos_asc))
for m in range(1, diff + 1):
pos_asc2 = []
pos_asc2 = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_asc, nb_array_snps)
pos_asc2 = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_asc)
pos_asc = pos_asc2
nbss_asc = len(pos_asc)
if nbss_asc == len(snps):
Expand Down
31 changes: 16 additions & 15 deletions main_tools/write_files.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import os

def create_sim_directories(path_name):
'''
Parameter: pathname, which is currently output_dir
Returns: dir_list (['output_dir/sim_data', 'output_dir/germline_out', 'output_dir/results'])
'''
"""
:param path_name: which is currently output_dir
:return: dir_list (['output_dir/sim_data', 'output_dir/germline_out', 'output_dir/results'])
"""
sim_data_dir = str(path_name)+'/sim_data'
germline_out_dir = str(path_name)+'/germline_out'
sim_results_dir = str(path_name)+'/results'
Expand All @@ -22,24 +22,25 @@ def create_sim_directories(path_name):


def write_sim_results_file(dir, job, param_dict, res_list, header):
'''
Parameters:
dir = output_dir/results
job = 1
param_dict = {'A': '44499.7180488', 'daf': '0.0264139586625', 'B': '40008.4616861', 'AB_t': '2546.95287896', 'AN': '10000.0', 'AN_t': '2113.43905612'}
res_list = [4372, 1724, 590, -0.40890634648504526, 165, 27, 11, 57.40526315789474, 0.970678370998841, 158, 4, 1, 49.15632065775952, 2.3175661604382545, 0.03793034448167276]
header = ['SegS_D1_CGI', 'Sing_D1_CGI', 'Dupl_D1_CGI', 'TajD_D1_CGI', 'SegS_D1_ASC', 'Sing_D1_ASC', 'Dupl_D1_ASC', 'Pi_D1_ASC', 'TajD_D1_ASC', 'SegS_S1_ASC', 'Sing_S1_ASC', 'Dupl_S1_ASC', 'Pi_S1_ASC', 'TajD_S1_ASC', 'FST_D1_S1_ASC']
'''
"""
:param dir: output_dir/results
:param job: 1
:param param_dict: {'A': '44499.7180488', 'daf': '0.0264139586625', 'B': '40008.4616861', 'AB_t': '2546.95287896', 'AN': '10000.0', 'AN_t': '2113.43905612'}
:param res_list: [4372, 1724, 590, -0.40890634648504526, 165, 27, 11, 57.40526315789474, 0.970678370998841, 158, 4, 1, 49.15632065775952, 2.3175661604382545, 0.03793034448167276]
:param header: ['SegS_D1_CGI', 'Sing_D1_CGI', 'Dupl_D1_CGI', 'TajD_D1_CGI', 'SegS_D1_ASC', 'Sing_D1_ASC', 'Dupl_D1_ASC', 'Pi_D1_ASC', 'TajD_D1_ASC', 'SegS_S1_ASC', 'Sing_S1_ASC', 'Dupl_S1_ASC', 'Pi_S1_ASC', 'TajD_S1_ASC', 'FST_D1_S1_ASC']
:return:
"""

result = '{}/results_{}.txt'.format(dir, job)
out_file = open(result, 'w')

#lines write_sims_file
params = []
vals = []
for param, val in param_dict.items():
params.append(param)
vals.append(str(val))
#header from write_results_file
header = '\t'.join(header)+'\n'
out = '\t'.join([str(r) for r in res_list]) + '\n'

Expand Down
2 changes: 1 addition & 1 deletion real_data_ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def main(args):
using_pseudo_array = False

### Create a list of Sequence class instances. These will contain the bulk of all sequence-based data
sequences = create_sequences(processedData, args)
sequences = create_sequences(processedData)
names = [seq.name for seq in sequences]

n_d = sum([1 for seq in sequences if seq.type == 'discovery'])
Expand Down
2 changes: 1 addition & 1 deletion simprily.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def main(args):


### Create a list of Sequence class instances. These will contain the bulk of all sequence-based data
sequences = create_sequences(processedData, args)
sequences = create_sequences(processedData)
names = [seq.name for seq in sequences]

n_d = sum([1 for seq in sequences if seq.type == 'discovery'])
Expand Down
18 changes: 6 additions & 12 deletions simulation/run_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,19 @@


def run_macs(macs_args, sequences):
'''
Parameters: sequences and macs_args
macs_args:
['./bin/macs', '166.0', '1000000', '-I', '2', '26', '140',
"""
:param macs_args: ['./bin/macs', '166.0', '1000000', '-I', '2', '26', '140',
'-t', '0.00444997180488', '-s', '1231', '-r', '0.00177998872195',
'-h', '1e5', '-n', '1', '1.0', '-n', '2', '0.899072251249', '-en',
'0.0118708617304', '1', '0.224720524949', '-ej', '0.0143090794261',
'2', '1', '-R', 'genetic_map_b37/genetic_map_GRCh37_chr1.txt.macshs']
sequences: [A, B], which is a sequence type
Returns: sequences, which is a list of two instance types stored as
:param sequences: [A, B], which is a sequence type
:return: sequences, which is a list of two instance types stored as
[A, B]
position: list of floats cast as strings, length: 10752
the floaty strings increase from '0.000178136752' to ' 0.99995896'
'''
"""

debugPrint(2,"running macs simulation:")
position = []
null = open(os.devnull, 'w')
Expand All @@ -42,8 +39,5 @@ def run_macs(macs_args, sequences):
# debugPrint(3,line)
else:
break
# print("THIS IS SEQUENCES zero: " + str(sequences[0].__dict__))
# print("THIS IS SEQUENCES one: " + str(sequences[1].__dict__))
# print("THIS IS position: " + str(position))
# debugPrint(2,"Finished macs simulation")
return [sequences,position]
1 change: 0 additions & 1 deletion summary_statistics/germline_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from subprocess import Popen

def run_germline(ped_name, map_name, out_name, min_m):
print("THIS IS THE ")
print( 'Running Germline on ' + ped_name + ' ' + map_name)

bash_command = 'bash ./bin/phasing_pipeline/gline.sh ./bin/germline-1-5-1/germline {0} {1} {2} "-bits 10 -min_m {3}"'.format(
Expand Down
12 changes: 6 additions & 6 deletions unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,23 +274,23 @@ def test_add_snps(self):
pos_asc = [3000, 3000, 3000]
nbss_acs = 198
nb_array_snps = 200
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs, nb_array_snps)
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs)
self.assertEquals(check, [3000, 3000, 3000])

avail_sites = [1.0, 2.0]
nb_avail_sites = 1000
pos_asc = [12, 12, 12]
nbss_acs = 23
nb_array_snps = 200
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs, nb_array_snps)
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs)
self.assertEquals(check, [12, 12 ,12])

avail_sites = [21313211242134]
nb_avail_sites = 3000
pos_asc = [-1, -1, 3000]
nbss_acs = 23
nb_array_snps = 200
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs, nb_array_snps)
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs)
self.assertEquals(check, [-1, -1, 3000])

# this one is defined
Expand All @@ -299,7 +299,7 @@ def test_add_snps(self):
pos_asc = [12, -1, 2]
nbss_acs = 23
nb_array_snps = 200
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs, nb_array_snps)
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs)
self.assertEquals(check, [12, -1, 2, 3])

# defined, even though nbss_acs is negative and a float
Expand All @@ -308,7 +308,7 @@ def test_add_snps(self):
pos_asc = [12, -1, 2]
nbss_acs = -1
nb_array_snps = 200
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs, nb_array_snps)
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs)
self.assertEquals(check, [12, -1, 2, 3])

# not defined
Expand All @@ -317,7 +317,7 @@ def test_add_snps(self):
pos_asc = [12, 8000001, 2]
nbss_acs = -1
nb_array_snps = 200
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs, nb_array_snps)
check = add_snps(avail_sites, nb_avail_sites, pos_asc, nbss_acs)
self.assertEquals(check, [12, 8000001, 2])

'''
Expand Down

0 comments on commit 3ed0cb4

Please sign in to comment.