-
Notifications
You must be signed in to change notification settings - Fork 9
/
MakeBootstraps.py
145 lines (125 loc) · 5.46 KB
/
MakeBootstraps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python
'''
@author: jonathanfriedman
Script for making simulated datasets used to get pseudo p-values.
'''
import os
import numpy as np
import pandas as pd
from sparcc.io_methods import read_txt, write_txt
from typing import Union
def permute_w_replacement(frame:Union[pd.DataFrame,np.ndarray], axis=0):
'''
Permute the frame values across the given axis.
Create simulated dataset were the counts of each component (column)
in each sample (row), are randomly sampled from the all the
counts of that component in all samples.
Parameters
----------
frame : Numpy Array
Frame to permute.
axis : {0, 1}
- 0 - Permute row values across columns
- 1 - Permute column values across rows
Returns
-------
Permuted DataFrame (new instance).
'''
if isinstance(frame,pd.DataFrame):
frame=frame.values
fp=lambda x:np.random.permutation(x)
if axis==0:
#Along the columns
aux=np.apply_along_axis(fp,0,frame)
aux=np.apply_along_axis(fp,1,aux)
return aux
elif axis==1:
#Along the rows
aux=np.apply_along_axis(fp,1,frame)
aux=np.apply_along_axis(fp,0,aux)
return aux
def make_bootstraps(counts, nperm, perm_template, outpath='./', iprint=0):
'''
Make n simulated datasets used to get pseudo p-values.
Simulated datasets are generated by assigning each OTU in each sample
an abundance that is randomly drawn (w. replacement) from the
abundances of the OTU in all samples.
Simulated datasets are either written out as txt files.
Parameters
----------
counts : DataFrame
Inferred correlations whose p-values are to be computed.
nperm : int
Number of permutations to produce.
perm_template : str
Template for the permuted data file names.
Should not include the path, which is specified using the
outpath parameter.
The iteration number is indicated with a "#".
For example: 'permuted/counts.permuted_#.txt'
outpath : str (default './')
The path to which permuted data will be written.
If not provided files will be written to the cwd.
iprint : int (default = 0)
The interval at which iteration number is printed out.
If iprint<=0 no printouts are made.
'''
if not os.path.exists(outpath): os.makedirs(outpath)
for i in range(nperm):
if iprint>0:
if not i%iprint: print(i)
#New Matrix
counts_perm = permute_w_replacement(counts, axis=1)
outfile = outpath + perm_template.replace('#', '%d'%i)
#The output is written
write_txt(counts_perm, outfile,index=True)
def main(counts_file, nperm, perm_template, outpath='./'):
'''
Make n simulated datasets used to get pseudo p-values.
Simulated datasets are generated by assigning each OTU in each sample
an abundance that is randomly drawn (w. replacement) from the
abundances of the OTU in all samples.
Simulated datasets are either written out as csv files.
'''
if perm_template is None:
perm_template = counts_file + '.permuted_#.csv'
## read counts data
counts = read_txt(counts_file,index_col=0)
if counts.shape[0]==0:
print('A problem has occurred with the file, it will be resolved.')
try:
counts=read_txt(counts_file,sep=',',index_col=0)
except IOError as IOE:
raise (IOE)
assert counts.shape[0]!=0,"ERROR!"
## make permutated data
print("Permutations")
make_bootstraps(counts, nperm, perm_template, outpath=outpath)
if __name__ == '__main__':
## parse input arguments
from optparse import OptionParser
kwargs = {}
usage = ('Make n simulated datasets used to get pseudo p-values.\n'
'Simulated datasets are generated by assigning each OTU in each sample an abundance that is randomly drawn (w. replacement) from the abundances of the OTU in all samples.\n'
'Simulated datasets are either written out as txt files. \n'
'\n'
'Usage: python MakeBootstraps.py counts_file [options]\n'
'Example: python MakeBootstraps.py example/fake_data.txt -n 5 -t permutation_#.txt -p example/pvals/')
parser = OptionParser(usage)
parser.add_option("-n", dest="n", default=100, type = 'int',
help="Number of simulated datasets to create (100 default).")
parser.add_option("-t", "--template", dest="perm_template", default=None, type = 'str',
help="The template for the permuted data file names.\n"
"Should not include the path, which is specified using the -p option.\n"
'The iteration number is indicated with a "#".\n'
"For example: 'permuted/counts.permuted_#.txt'"
"If not provided a '.permuted_#.txt' suffix will be added to the counts file name.\n")
parser.add_option("-p", "--path", dest="outpath", default='./', type = 'str',
help="The path to which permuted data will be written.\n"
"If not provided files will be written to the cwd.\n")
(options, args) = parser.parse_args()
counts_file = args[0]
n = options.n
outpath = options.outpath
perm_template = options.perm_template
main(counts_file, n, perm_template, outpath)