-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #149 from CenzhuoYao/powerlaw2
MRG: Powerlaw Generalized Likelihood Ratio Test
- Loading branch information
Showing
2 changed files
with
252 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
from __future__ import print_function, absolute_import, division | ||
|
||
import numpy as np | ||
from mousestyles import data | ||
|
||
|
||
def random_powerlaw(n, a, seed=-1): | ||
""" | ||
Random generate points of truncated power law. | ||
Description | ||
----------- | ||
The method we generate is to inverse Cumulative Density Function | ||
of truncated powerlaw function, and put random number draw from | ||
Unif[0,1]. The theory behind it is F^{-1}(U)~F. | ||
Parameters | ||
---------- | ||
n : int | ||
number of points | ||
a : int>1 | ||
power law parameter alpha | ||
Returns | ||
------- | ||
points : a vector of float number | ||
n points have the target distribution. | ||
Examples | ||
-------- | ||
>>> random_powerlaw(4,2) | ||
array([ 1.18097435, 1.04584078, 1.4650779 , 36.03967524]) | ||
""" | ||
if seed != -1: | ||
np.random.seed(seed) | ||
y = np.random.sample(n) | ||
return((1 - y)**(-1 / (a - 1))) | ||
|
||
|
||
def random_exp(n, l, seed=-1): | ||
""" | ||
Random generate points of truncated exponential. | ||
Description | ||
----------- | ||
The method we generate is to use the memorylessness property | ||
of exponential distribution. As the survival function of | ||
exponential distribution is always the same, for truncated | ||
exponential distribution, it is just the same to draw from | ||
regular exponential distribtion and shift the truncated | ||
value. | ||
Parameters | ||
---------- | ||
n : int | ||
number of points | ||
l : int | ||
exponential parameter lambda | ||
Returns | ||
------- | ||
points : a vector of float number | ||
n points have the target distribution. | ||
Examples | ||
-------- | ||
>>> random_exp(4,2) | ||
array([ 1.07592496, 1.19789646, 1.19759663, 1.03993227]) | ||
""" | ||
if seed != -1: | ||
np.random.seed(seed) | ||
y = np.random.exponential(1.0 / l, n) | ||
return(y + 1) | ||
|
||
|
||
def hypo_powerLaw_null(strain, mouse, day, law_est=0, seed=-1): | ||
""" | ||
Return the outcome from GLRT with null hypothesis law distribution. | ||
Description | ||
----------- | ||
This function used the Generalized Likelihood Ratio Test to test the | ||
goodness of fit: in other words, which distribution is more likely. | ||
In this function, we choose the powerLaw distributin to be the null | ||
and exponential distribution to be the alternative. We derived the | ||
test statistics by theory and pluged in MLE as our estimation of | ||
best parameters. | ||
After we calculated the paramters, we need to find the rejection | ||
region, critical value or pvalue. To get a more general test, we | ||
want to use pvalue, instead of critical value under certain | ||
significance level. | ||
To find the p-value, we use simulation methods, and all random | ||
numbers are drawn from previous functions. Therefore, although | ||
p value should be a constant given data, it is not a constant in | ||
our function, if we did not set the seed. | ||
In general, in this function, if the p value is too small, then we | ||
will reject the null, and we say powerlaw is not a better fit | ||
compared to exponential distribution. | ||
Parameters | ||
---------- | ||
strain : int | ||
the strain number of the mouse | ||
mouse : int | ||
the mouse number in its strain | ||
day : int | ||
the day number | ||
law_est: double (optional) | ||
the estimated parameter in law distribution | ||
Returns | ||
------- | ||
p_value: | ||
the probablity under null reject. | ||
Examples | ||
-------- | ||
>>> hypo_law_null (0, 0, 0) | ||
0.0070000000000000001 | ||
""" | ||
if seed != -1: | ||
np.random.seed(seed) | ||
df = data.load_movement(strain, mouse, day) | ||
xcood = df["x"] | ||
ycood = df["y"] | ||
distance_vector = np.sqrt(np.diff(xcood)**2 + np.diff(ycood)**2) | ||
msk = distance_vector > 1 | ||
cut_dist = distance_vector[msk] | ||
if law_est == 0: | ||
law_est = 1 + len(cut_dist) * 1 / \ | ||
(np.sum(np.log(cut_dist / np.min(cut_dist)))) | ||
n = len(cut_dist) | ||
log_cut = np.log(cut_dist) | ||
sum_cut = np.sum(log_cut) | ||
test_stat = n * (np.log(sum_cut - n) - np.log(sum_cut)) - law_est * sum_cut | ||
sample_stat = [] | ||
for i in range(1000): | ||
sample = random_powerlaw(len(cut_dist), law_est) | ||
sum_sam = np.sum(sample) | ||
log_sam = np.log(sample) | ||
sum_log_sam = np.log(np.sum(log_sam)) | ||
tmp = n*(np.log(sum_sam-n)-sum_log_sam) - law_est*np.sum(log_sam) | ||
sample_stat.append(tmp) | ||
# critical_value = ss.mstats.mquantiles(sample_stat, prob = 0.05)[0] | ||
p_value = np.sum(sample_stat > test_stat) / len(sample_stat) | ||
return (p_value) | ||
|
||
|
||
def hypo_exp_null(strain, mouse, day, law_est=0, exp_est=0, seed=-1): | ||
""" | ||
Return the outcome from GLRT with null hypothesis law distribution. | ||
Description | ||
----------- | ||
This function also used the Generalized Likelihood Ratio Test to test | ||
goodness of fit: in other words, which distribution is more likely. | ||
In this function, we choose the exponential distributin to be the null | ||
and powerlaw distribution to be the alternative. We derived the | ||
test statistics by theory and pluged in MLE as our estimation of | ||
best parameters. | ||
After we calculated the paramters, we need to find the rejection | ||
region, critical value or pvalue. To get a more general test, we | ||
want to use pvalue, instead of critical value under certain | ||
significance level. | ||
To find the p-value, we use simulation methods, and all random | ||
numbers are drawn from previous functions. Therefore, although | ||
p value should be a constant given data, it is not a constant in | ||
our function, if we did not set the seed. | ||
In general, in this function, if the p value is too small, then we | ||
will reject the null, and we say exponential is not a better fit | ||
compared to exponential distribution. | ||
Parameters | ||
---------- | ||
strain : int | ||
the strain number of the mouse | ||
mouse : int | ||
the mouse number in its strain | ||
day : int | ||
the day number | ||
law_est: double (optional) | ||
the estimated parameter in law distribution | ||
exp_est: double (optional) | ||
the estimated parameter in exponential distribution | ||
Returns | ||
------- | ||
p_value: | ||
the probablity under null reject. | ||
Examples | ||
-------- | ||
>>> hypo_exp_null (0, 0, 0) | ||
1.0 | ||
""" | ||
if seed != -1: | ||
np.random.seed(seed) | ||
df = data.load_movement(strain, mouse, day) | ||
xcood = df["x"] | ||
ycood = df["y"] | ||
distance_vector = np.sqrt(np.diff(xcood)**2 + np.diff(ycood)**2) | ||
msk = distance_vector > 1 | ||
cut_dist = distance_vector[msk] | ||
if law_est == 0: | ||
law_est = 1 + len(cut_dist) * 1 / \ | ||
(np.sum(np.log(cut_dist / np.min(cut_dist)))) | ||
if exp_est == 0: | ||
exp_est = len(cut_dist) / (np.sum(cut_dist) - len(cut_dist)) | ||
n = len(cut_dist) | ||
sum_cut = np.sum(cut_dist) | ||
log_cut = np.log(cut_dist) | ||
sum_log_cut = np.sum(log_cut) | ||
test_stat = n * (np.log(sum_cut - n) - np.log(sum_log_cut)) - \ | ||
law_est * sum_log_cut | ||
sample_stat = [] | ||
for i in range(1000): | ||
sample = random_exp(len(cut_dist), exp_est) | ||
sum_sam = np.sum(sample) | ||
log_sam = np.log(sample) | ||
sum_log_sam = np.sum(log_sam) | ||
tmp = n * (np.log(sum_sam - n) - np.log(sum_log_sam)) - \ | ||
law_est * sum_log_sam | ||
sample_stat.append(tmp) | ||
# critical_value = ss.mstats.mquantiles(sample_stat, prob = 0.95)[0] | ||
p_value = np.sum(sample_stat <= test_stat) / len(sample_stat) | ||
return (p_value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from mousestyles.GLRT_distribution import (random_powerlaw, random_exp, | ||
hypo_powerLaw_null, hypo_exp_null) | ||
|
||
|
||
def test_random_powerlaw(): | ||
assert (abs(random_powerlaw(1, 3, 0)[0] - 1.48875061) <= 1e-7) | ||
|
||
|
||
def test_random_exp(): | ||
assert (abs(random_exp(1, 2, 0)[0] - 1.39793725) <= 1e-7) | ||
|
||
|
||
def test_hypo_powerLaw_null(): | ||
assert abs(hypo_powerLaw_null(0, 0, 0, 0) - 0) <= 1e-7 | ||
|
||
|
||
def test_hypo_exp_null(): | ||
assert abs(hypo_exp_null(0, 0, 0, seed=0) - 1.0) <= 1e-7 |