In [1]:
import numpy as np
import time

In [3]:
# Parameters
n_nodes = 268

# p<0.05 uncorrected
p1=0.05 

# p<0.05 Bonferroni corrected
# p1 = np.sqrt(0.05/n_nodes) 
# print("p1={}".format(p1))
 
n_iter = 1000000

In [4]:
print("Slow version")

starttime = time.time()
false_positives = np.empty(n_iter)
 
for iter in range(n_iter):
    result1 = np.random.permutation(np.random.rand(n_nodes)<p1) # randomly create result for cohort 1
    result2 = np.random.permutation(np.random.rand(n_nodes)<p1) # randomly create result for cohort 1
 
    false_positives[iter] = (result1*result2).sum() # count how many nodes were significant in both cohorts
    
print("Mean # of nodes significant in both cohorts: {}".format(np.mean(false_positives)))
print("Median # of nodes significant in both cohorts: {}".format(np.median(false_positives)))
print("Max # of nodes significant in both cohorts: {}".format(np.max(false_positives)))
for signifnodes in range(1,11):
    print("Probability of at least {} node(s) significant in both cohorts: {}".format(
        signifnodes, (false_positives>=signifnodes).sum()/n_iter))

print("Execute time: {}seconds".format(time.time()-starttime))


Slow version
Mean # of nodes significant in both cohorts: 0.669198
Median # of nodes significant in both cohorts: 0.0
Max # of nodes significant in both cohorts: 7.0
Probably of at least 1 node significant in both cohorts: 0.488365
Probably of at least 2 node significant in both cohorts: 0.144808
Probably of at least 3 node significant in both cohorts: 0.030275
Probably of at least 4 node significant in both cohorts: 0.005022
Probably of at least 5 node significant in both cohorts: 0.000648
Probably of at least 6 node significant in both cohorts: 7.4e-05
Probably of at least 7 node significant in both cohorts: 6e-06
Probably of at least 8 node significant in both cohorts: 0.0
Probably of at least 9 node significant in both cohorts: 0.0
Probably of at least 10 node significant in both cohorts: 0.0
Execute time: 128.71206188201904seconds


In [4]:
print("For test retest, setting p1 to p1^2 and only randomizing once")

starttime = time.time()
false_positives = np.empty(n_iter)
 
for iter in range(n_iter):
    result1 = np.random.permutation(np.random.rand(n_nodes)<(p1**2)) # randomly create result
 
    false_positives[iter] = (result1).sum() # count how many nodes were significant in both cohorts
    
print("Mean # of nodes significant in both cohorts: {}".format(np.mean(false_positives)))
print("Median # of nodes significant in both cohorts: {}".format(np.median(false_positives)))
print("Max # of nodes significant in both cohorts: {}".format(np.max(false_positives)))
for signifnodes in range(1,11):
    print("Probability of at least {} node(s) significant in both cohorts: {}".format(
        signifnodes, (false_positives>=signifnodes).sum()/n_iter))

print("Execute time: {}seconds".format(time.time()-starttime))

For test retest, setting p1 to p1^2 and only randomizing once
Mean # of nodes significant in both cohorts: 0.6698357
Median # of nodes significant in both cohorts: 0.0
Max # of nodes significant in both cohorts: 8.0
Probably of at least 1 node significant in both cohorts: 0.4884633
Probably of at least 2 node significant in both cohorts: 0.145372
Probably of at least 3 node significant in both cohorts: 0.0304176
Probably of at least 4 node significant in both cohorts: 0.0048756
Probably of at least 5 node significant in both cohorts: 0.0006306
Probably of at least 6 node significant in both cohorts: 6.84e-05
Probably of at least 7 node significant in both cohorts: 7.6e-06
Probably of at least 8 node significant in both cohorts: 6e-07
Probably of at least 9 node significant in both cohorts: 0.0
Probably of at least 10 node significant in both cohorts: 0.0
Execute time: 152.57229590415955seconds


In [5]:
print("For test retest, setting p1 to p1^2 and only randomizing once and running without a for loop")

starttime = time.time()
false_positives = np.empty(n_iter)

rand_tests = np.random.permutation(np.random.rand(n_iter, n_nodes)<(p1**2))
false_positives = rand_tests.sum(axis=1)

print("Mean # of nodes significant in both cohorts: {}".format(np.mean(false_positives)))
print("Median # of nodes significant in both cohorts: {}".format(np.median(false_positives)))
print("Max # of nodes significant in both cohorts: {}".format(np.max(false_positives)))
for signifnodes in range(1,11):
    print("Probably of at least {} node significant in both cohorts: {}".format(
        signifnodes, (false_positives>=signifnodes).sum()/n_iter))

print("Execute time: {}seconds".format(time.time()-starttime))

For test retest, setting p1 to p1^2 and only randomizing once and running without a for loop
Mean # of nodes significant in both cohorts: 0.6700504
Median # of nodes significant in both cohorts: 0.0
Max # of nodes significant in both cohorts: 8
Probably of at least 1 node significant in both cohorts: 0.488891
Probably of at least 2 node significant in both cohorts: 0.1451964
Probably of at least 3 node significant in both cohorts: 0.0303647
Probably of at least 4 node significant in both cohorts: 0.0048781
Probably of at least 5 node significant in both cohorts: 0.0006427
Probably of at least 6 node significant in both cohorts: 7.04e-05
Probably of at least 7 node significant in both cohorts: 6.5e-06
Probably of at least 8 node significant in both cohorts: 6e-07
Probably of at least 9 node significant in both cohorts: 0.0
Probably of at least 10 node significant in both cohorts: 0.0
Execute time: 123.56277227401733seconds
