diff --git a/ibdw/mcmc.py b/ibdw/mcmc.py deleted file mode 100644 index 46ab984..0000000 --- a/ibdw/mcmc.py +++ /dev/null @@ -1,67 +0,0 @@ -import numpy as np -from model import * -from pylab import csv2rec -import pymc as pm -import os - -fname = '../ibd_loc_all_030509.csv' -# landmass = 'Africa' -# -# landmasses = {'Eurasia': ['Europe','Asia','Oceania'], -# 'Africa': ['Africa'], -# 'America': ['America']} - -data = csv2rec(fname) - -s_obs_gen = data['as'] -a_obs_gen = data['as'] + data.n*2. - -s_obs_af = data['hbs_gf']*.01 * data['n'] -a_obs_af = (1.-data['hbs_gf']*.01) * data['n'] - -s_obs = np.choose(s_obs_gen.mask, [s_obs_gen.data, s_obs_af.data]) -a_obs = np.choose(a_obs_gen.mask, [a_obs_gen.data, a_obs_af.data]) - -where_interpretable = np.where(1-(s_obs_af.mask & s_obs_gen.mask)) - -s_obs = s_obs[where_interpretable].astype('int') -a_obs = a_obs[where_interpretable].astype('int') - - -fdata = data[where_interpretable] -locs = [] -from_ind = [] -locs = [(float(fdata[0].long), float(fdata[0].lat))] -from_ind = [0] -for i in xrange(1,len(fdata)): - row = fdata[i] - - # If repeat location, add observation - loc = (float(row.long), float(row.lat)) - if loc in locs: - from_ind.append(locs.index(loc)) - - # Otherwise, new obs - else: - locs.append(loc) - from_ind.append(max(from_ind)+1) -from_ind = np.array(from_ind) -to_ind = [np.where(from_ind == i)[0] for i in xrange(max(from_ind)+1)] - -lon = np.array(locs)[:,0] -lat = np.array(locs)[:,1] - - - -M=pm.MCMC(make_model(s_obs,s_obs+a_obs,lon,lat,from_ind,{}), db='hdf5', dbname=os.path.basename(fname)+'_matern.hdf5', complevel=1) -M.db._h5file.createGroup('/','metadata') -M.db._h5file.createArray('/metadata','logp_mesh',M.logp_mesh) -M.use_step_method(pm.AdaptiveMetropolis, list(M.stochastics -set([M.f, M.eps_p_f])), verbose=0, delay=50000) -for s in M.stochastics | M.deterministics | M.potentials: - s.verbose = 0 -M.use_step_method(FieldStepper, M.f, 1./M.V, M.V, M.C_eval, M.M_eval, M.logp_mesh, M.eps_p_f, to_ind, jump_tau = False) -M.isample(500000,0,100, verbose=0) -# M.isample(10000,0,10) -# from pylab import * -# plot(M.trace('f')[:]) -# M.use_step_method(FieldStepper, M.f, 1./M.V, M.V, M.C_eval, M.M_eval, M.logp_mesh, M.eps_p_f, ti, jump_tau=False) \ No newline at end of file diff --git a/ibdw/model.py b/ibdw/model.py index d4cfd99..a10ccf8 100644 --- a/ibdw/model.py +++ b/ibdw/model.py @@ -7,15 +7,54 @@ import numpy as np import pymc as pm import gc -from map_utils import basic_spatial_submodel, invlogit +from map_utils import * -__all__ = ['make_model','postproc','f_name','x_name','nugget_name','f_has_nugget','metadata_keys'] +__all__ = ['make_model','postproc','f_name','x_name','nugget_name','f_has_nugget','metadata_keys','step_method_orders'] + +def ibd_spatial_submodel(): + """ + A small function that creates the mean and covariance object + of the random field. + """ + + # Anisotropy parameters. + inc = pm.CircVonMises('inc', 0, 0) + sqrt_ecc = pm.Uniform('sqrt_ecc', 0, .95) + ecc = pm.Lambda('ecc', lambda s=sqrt_ecc: s**2) + + # The partial sill. + amp = pm.Exponential('amp', .1, value=1.) + + # The range parameter. Units are RADIANS. + # 1 radian = the radius of the earth, about 6378.1 km + scale_shift = pm.Exponential('scale_shift', 1./.08, value=.08) + scale = pm.Lambda('scale',lambda s=scale_shift: s+.01) -def make_model(s_obs,a_obs,lon,lat,from_ind,covariate_values): + # This parameter controls the degree of differentiability of the field. + diff_degree = pm.Uniform('diff_degree', .01, 3) + + # The nugget variance. + V = pm.Exponential('V', .1, value=1.) + + # Create the covariance & its evaluation at the data locations. + @pm.deterministic(trace=True) + def C(amp=amp, scale=scale, inc=inc, ecc=ecc, diff_degree=diff_degree): + return pm.gp.FullRankCovariance(pm.gp.cov_funs.matern.aniso_geo_rad, amp=amp, scale=scale, inc=inc, ecc=ecc, diff_degree=diff_degree) + + return locals() + +def make_model(pos,neg,lon,lat,covariate_values,cpus=1): """ + This function is required by the generic MBG code. """ + + # Non-unique data locations + data_mesh = combine_spatial_inputs(lon, lat) - locs = [(lon[0], lat[0]))] + s_hat = (pos+1.)/(pos+neg+2.) + + # Uniquify the data locations. + locs = [(lon[0], lat[0])] fi = [0] for i in xrange(1,len(lon)): @@ -33,19 +72,31 @@ def make_model(s_obs,a_obs,lon,lat,from_ind,covariate_values): lon = np.array(locs)[:,0] lat = np.array(locs)[:,1] - - # V = pm.Exponential('V',.1,value=1.) - V = pm.OneOverX('V',value=1.) + # Unique data locations + logp_mesh = combine_spatial_inputs(lon,lat) + + # Create the mean & its evaluation at the data locations. + M, M_eval = trivial_means(logp_mesh) init_OK = False while not init_OK: try: # Space-time component - sp_sub = basic_spatial_submodel(lon, lat, covariate_values) + sp_sub = ibd_spatial_submodel() + covariate_dict, C_eval = cd_and_C_eval(covariate_values, sp_sub['C'], logp_mesh) # The field evaluated at the uniquified data locations - f = pm.MvNormalCov('f', sp_sub['M_eval'], sp_sub['C_eval']) + f = pm.MvNormalCov('f', M_eval, C_eval) + + # The field plus the nugget + eps_p_f = pm.Normal('eps_p_f', f[fi], 1./sp_sub['V'], value=pm.logit(s_hat)) + + # The allele frequency + s = pm.InvLogit('s',eps_p_f) + + # The observed allele frequencies + data = pm.Binomial('data', pos+neg, s, value=pos, observed=True) init_OK = True except pm.ZeroProbability, msg: @@ -53,12 +104,6 @@ def make_model(s_obs,a_obs,lon,lat,from_ind,covariate_values): init_OK = False gc.collect() - # The field plus the nugget - eps_p_f = pm.Normal('eps_p_f', f[from_ind], 1./V) - - s = pm.InvLogit('s',eps_p_f) - - data = pm.Binomial('data', s_obs + a_obs, s, value=s_obs, observed=True) out = locals() out.pop('sp_sub') @@ -77,7 +122,6 @@ def make_model(s_obs,a_obs,lon,lat,from_ind,covariate_values): x_name = 'data_mesh' f_has_nugget = True nugget_name = 'V' -metadata_keys = ['data_mesh','from_ind','to_ind'] - - -postproc = invlogit \ No newline at end of file +metadata_keys = ['data_mesh','fi','ti'] +postproc = invlogit +step_method_orders = {'f':(FieldStepper, )} \ No newline at end of file diff --git a/ibdw/predict.py b/ibdw/predict.py deleted file mode 100644 index 3232121..0000000 --- a/ibdw/predict.py +++ /dev/null @@ -1,45 +0,0 @@ -from map_utils import * -import tables as tb -import numpy as np - -import sys -print sys.argv - -# map -d ibd-world -k 10kbuf5kres.asc -f ibd_loc_all_030509.csv_matern.hdf5 -b 1000 -r 20 -t 400 -i 5000 - -mask_name = '10kbuf5kres.asc' -x, unmasked = asc_to_locs(mask_name,thin=20, bufsize=3) -hf = tb.openFile('ibd_loc_all_030509.csv_matern.hdf5') -ch = hf.root.chain0 -meta = hf.root.metadata - -n_bins = 1000 -bins = np.linspace(0,1,n_bins) -def binfn(arr,n_bins=n_bins): - return np.array(arr*n_bins,dtype=int) - -q = [.05,.25,.5,.75,.95] - -hsr = histogram_reduce(bins, binfn) -hsf = histogram_finalize(bins, q, hsr) - -def finalize(prod, n): - mean = prod[mean_reduce] / n - var = prod[var_reduce] / n - mean**2 - std = np.sqrt(var) - std_to_mean = std/mean - out = {'mean': mean, 'var': var, 'std': std, 'std-to-mean':std_to_mean} - out.update(hsf(prod, n)) - return out - -# hdf5_to_samps(chain, metadata, x, burn, thin, total, fns, f_label, pred_cv_dict=None, nugget_label=None, postproc=None, finalize=None) -products = hdf5_to_samps(ch,meta,x,1000,400,5000,[mean_reduce, var_reduce, hsr], 'f', None, 'V', invlogit, finalize) -# products = hdf5_to_samps(ch,meta,x,1000,400,5000,[mean_reduce, var_reduce],None, invlogit, finalize) - -# mean_surf = vec_to_asc(products['mean'],mask_name,'ihd-mean.asc',unmasked) -# std_surf = vec_to_asc(products['std'],mask_name,'ihd-std.asc',unmasked) -# std_mean_surf = vec_to_asc(products['std-to-mean'],mask_name,'ihd-std-to-mean.asc',unmasked) - -for k,v in products.iteritems(): - print k - q=vec_to_asc(v,mask_name,'ihd-%s.asc'%k,unmasked) \ No newline at end of file diff --git a/setup.py b/setup.py index e69de29..3eb5b54 100644 --- a/setup.py +++ b/setup.py @@ -0,0 +1,14 @@ +# Author: Anand Patil +# Date: 6 Feb 2009 +# License: Creative Commons BY-NC-SA +#################################### + +from setuptools import setup +from numpy.distutils.misc_util import Configuration +import os +config = Configuration('ibdw',parent_package=None,top_path=None) + +config.packages = ["ibdw"] +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(**(config.todict())) \ No newline at end of file