Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

This commit was manufactured by cvs2svn to create tag

'bioperl-ext-release-1-5-0-rc1'.

svn path=/bioperl-ext/tags/bioperl-ext-release-1-5-0-rc1/; revision=661
  • Loading branch information...
commit 8c861791408fea9471cfb27ad56e12cddbe72245 1 parent 8695d5b
nobody authored
View
8 Bio/Ext/Align/Align.xs
@@ -3514,14 +3514,6 @@ end2(obj)
OUTPUT:
RETVAL
-int
-score(obj)
- dpAlign_AlignOutput * obj
- CODE:
- RETVAL = obj->score;
- OUTPUT:
- RETVAL
-
void
DESTROY(obj)
dpAlign_AlignOutput * obj
View
2  Bio/Ext/Align/Makefile.PL
@@ -3,7 +3,7 @@ use ExtUtils::MakeMaker;
# the contents of the Makefile that is written.
WriteMakefile(
'NAME' => 'Bio::Ext::Align',
- 'VERSION' => '1.5.1',
+ 'VERSION' => '0.1',
'LIBS' => ['-lm'], # e.g., '-lm'
'DEFINE' => '-DPOSIX -DNOERROR', # e.g., '-DHAVE_SOMETHING'
'INC' => '-I./libs', # e.g., '-I/usr/include/other'
View
1  Bio/Ext/Align/libs/dpalign.h
@@ -29,7 +29,6 @@ typedef struct _dpAlign_alnoutput {
char * aln2; /* aligned subsequence of sequence 2 with space '-' inserted */
int start2; /* start point of aligned subsequence 2 */
int end2; /* end point of aligned subsequence 2 */
- int score; /* score of this alignment */
} dpAlign_AlignOutput;
typedef struct _dpAlign_SequenceProfile {
View
1  Bio/Ext/Align/libs/linspc.c
@@ -651,7 +651,6 @@ traceback(sw_AlignStruct * as)
if (ao == NULL)
dpAlign_fatal("Can't allocate memory for AlignOutput!\n");
- ao->score = as->score;
/* insert gaps to sequence 1 */
k = 0;
for (i = 0; i <= aln1_len; ++i) {
View
49 Bio/Ext/HMM/HMM.pm
@@ -1,49 +0,0 @@
-
-package Bio::Ext::HMM;
-
-use vars qw($AUTOLOAD @ISA @EXPORT_OK $dl_debug);
-use Exporter;
-use Carp;
-use strict;
-
-use DynaLoader;
-
-
-@ISA = qw(Exporter DynaLoader);
-# Items to export into callers namespace by default. Note: do not export
-# names by default without a very good reason. Use EXPORT_OK instead.
-# Do not simply export all your public functions/methods/constants.
-
-sub AUTOLOAD {
- # This AUTOLOAD is used to 'autoload' constants from the constant()
- # XS function. If a constant is not found then control is passed
- # to the AUTOLOAD in AutoLoader.
-
- my $constname;
- ($constname = $AUTOLOAD) =~ s/.*:://;
- my $val = constant($constname, @_ ? $_[0] : 0);
- if ($! != 0) {
- if ($! =~ /Invalid/) {
- $AutoLoader::AUTOLOAD = $AUTOLOAD;
- goto &AutoLoader::AUTOLOAD;
- }
- else {
- croak "Your vendor has not defined Test macro $constname";
- }
- }
- eval "sub $AUTOLOAD { $val }";
- goto &$AUTOLOAD;
-}
-
-BEGIN {
- $dl_debug = 40;
-}
-
-bootstrap Bio::Ext::HMM; # hopefully has the correct things...
-
-# Preloaded methods go here.
-
-# Autoload methods go after __END__, and are processed by the autosplit program.
-
-1;
-__END__
View
207 Bio/Ext/HMM/HMM.xs
@@ -1,207 +0,0 @@
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-#ifdef __cplusplus
-}
-#endif
-
-#include "hmm.h"
-
-MODULE = Bio::Ext::HMM PACKAGE = Bio::Ext::HMM
-
-void
-HMM_statistical_training(class, hmm, obs, hs)
- char * class
- HMM * hmm
- SV * obs
- SV * hs
- PPCODE:
- AV * obs_av = (AV *) SvRV(obs);
- AV * hs_av = (AV *) SvRV(hs);
- int i;
- int avlen = av_len(obs_av);
- char ** obs_ar = (char **) malloc(avlen*sizeof(char *));
- char ** hs_ar = (char **) malloc(avlen*sizeof(char *));
- int * obs_len = (int *) malloc(avlen*sizeof(int));
- if (obs_ar == NULL || hs_ar == NULL)
- croak("Can't allocate memory for observation and/or state arrays!\n");
- if (obs_len == NULL)
- croak("Can't allocate memory for observation length array!\n");
- for (i = 0; i < avlen; ++i) {
- obs_ar[i] = (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na);
- obs_len[i] = strlen(obs_ar[i]);
- obs_ar[i] = (char *) malloc((obs_len[i]+1)*sizeof(char));
- if (obs_ar[i] == NULL)
- croak("Can't allocate memory for observation array!\n");
- strcpy(obs_ar[i], (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na));
- hs_ar[i] = (char *) malloc((obs_len[i]+1)*sizeof(char));
- if (hs_ar[i] == NULL)
- croak("Can't allocate memory for state array!\n");
- strcpy(hs_ar[i], (char *) SvPV(*av_fetch(hs_av, i, 0), PL_na));
- omap(hmm, obs_ar[i], obs_len[i]);
- smap(hmm, hs_ar[i], obs_len[i]);
- }
- state_est(hmm, obs_ar, hs_ar, obs_len, avlen);
- for (i = 0; i < avlen; ++i) {
- free(obs_ar[i]);
- free(hs_ar[i]);
- }
- free(obs_ar);
- free(hs_ar);
- free(obs_len);
-
-double
-HMM_likelihood(class, hmm, seq)
- char * class
- HMM * hmm
- char * seq
- CODE:
- int T = strlen(seq);
- char obs[T+1];
- strcpy(obs, seq);
- omap(hmm, obs, T);
- RETVAL = Palpha(hmm, obs, T);
- OUTPUT:
- RETVAL
-
-void
-HMM_baum_welch_training(class, hmm, obs)
- char * class
- HMM * hmm
- SV * obs
- PPCODE:
- AV * obs_av = (AV *) SvRV(obs);
- int i;
- int avlen = av_len(obs_av);
- char ** obs_ar = (char **) malloc(avlen*sizeof(char *));
- int * obs_len = (int *) malloc(avlen*sizeof(int));
- if (obs_ar == NULL)
- croak("Can't allocate memory for observation arrays!\n");
- if (obs_len == NULL)
- croak("Can't allocate memory for observation length array!\n");
- for (i = 0; i < avlen; ++i) {
- obs_ar[i] = (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na);
- obs_len[i] = strlen(obs_ar[i]);
- obs_ar[i] = (char *) malloc((obs_len[i]+1)*sizeof(char));
- if (obs_ar[i] == NULL)
- croak("Can't allocate memory for observation array!\n");
- strcpy(obs_ar[i], (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na));
- omap(hmm, obs_ar[i], obs_len[i]);
- }
- baum_welch(hmm, obs_ar, obs_len, avlen);
- for (i = 0; i < avlen; ++i)
- free(obs_ar[i]);
- free(obs_ar);
-
-SV *
-HMM_viterbi(class, hmm, seq)
- char * class
- HMM * hmm
- char * seq
- PPCODE:
- SV * sv;
- int T = strlen(seq);
- char * hss = (char *) malloc(T*sizeof(char));
- char obs[T+1];
- if (hss == NULL)
- croak("Can't allocate memory for hidden state sequence!\n");
- strcpy(obs, seq);
- omap(hmm, obs, T);
- viterbi(hmm, hss, obs, T);
- sv = newSVpv(hss, strlen(hss));
- free(hss);
- PUSHs(sv_2mortal(sv));
-
-
-MODULE = Bio::Ext::HMM PACKAGE = Bio::Ext::HMM::HMM
-
-HMM *
-new(class, symbols, states)
- char * class
- char * symbols
- char * states
- PPCODE:
- HMM * out;
- out = HMM_new(symbols, states);
- ST(0) = sv_newmortal();
- sv_setref_pv(ST(0), class, (void *) out);
- XSRETURN(1);
-
-double
-get_init_entry(class, hmm, state)
- char * class
- HMM * hmm
- char * state
- CODE:
- RETVAL = HMM_get_init_entry(hmm, state);
- OUTPUT:
- RETVAL
-
-void
-set_init_entry(class, hmm, state, val)
- char * class
- HMM * hmm
- char * state
- double val
- CODE:
- HMM_set_init_entry(hmm, state, val);
-
-double
-get_a_entry(class, hmm, state1, state2)
- char * class
- HMM * hmm
- char * state1
- char * state2
- CODE:
- RETVAL = HMM_get_a_entry(hmm, state1, state2);
- OUTPUT:
- RETVAL
-
-void
-set_a_entry(class, hmm, state1, state2, val)
- char * class
- HMM * hmm
- char * state1
- char * state2
- double val
- CODE:
- HMM_set_a_entry(hmm, state1, state2, val);
-
-double
-get_e_entry(class, hmm, state, symbol)
- char * class
- HMM * hmm
- char * state
- char * symbol
- CODE:
- RETVAL = HMM_get_e_entry(hmm, state, symbol);
- OUTPUT:
- RETVAL
-
-void
-set_e_entry(class, hmm, state, symbol, val)
- char * class
- HMM * hmm
- char * state
- char * symbol
- double val
- CODE:
- HMM_set_e_entry(hmm, state, symbol, val);
-
-void
-DESTROY(obj)
- HMM * obj
- PPCODE:
- int i;
- free(obj->init);
- for (i = 0; i < obj->N; ++i)
- free(obj->a_mat[i]);
- free(obj->a_mat);
- for (i = 0; i < obj->N; ++i)
- free(obj->e_mat[i]);
- free(obj->e_mat);
-
View
13 Bio/Ext/HMM/Makefile.PL
@@ -1,13 +0,0 @@
-use ExtUtils::MakeMaker;
-# See lib/ExtUtils/MakeMaker.pm for details of how to influence
-# the contents of the Makefile that is written.
-WriteMakefile(
- 'NAME' => 'Bio::Ext::HMM',
- 'VERSION' => '0.1',
- 'LIBS' => ['-lm'], # e.g., '-lm'
- 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING'
- 'INC' => '', # e.g., '-I/usr/include/other'
- 'OBJECT' => 'HMM.o hmmlib.o',
- 'clean' => { 'FILES' => '*.o' }
-);
-
View
36 Bio/Ext/HMM/hmm.h
@@ -1,36 +0,0 @@
-#ifndef _HMM_H_
-#define _HMM_H_
-
-typedef struct HMM
-{
- int M;
- int N;
- char * symbols;
- unsigned char omap[256];
- char * states;
- unsigned char smap[256];
- double * init;
- double ** a_mat;
- double ** e_mat;
-} HMM;
-
-#define UPPER_TOL 0.000001
-#define LOWER_TOL 1e-100
-
-HMM * init_HMM(char *, char *, double *, double **, double **);
-HMM * HMM_new(char *, char *);
-double HMM_get_init_entry(HMM *, char *);
-void HMM_set_init_entry(HMM *, char *, double);
-double HMM_get_a_entry(HMM *, char *, char *);
-void HMM_set_a_entry(HMM *, char *, char *, double);
-double HMM_get_e_entry(HMM *, char *, char *);
-void HMM_set_e_entry(HMM *, char *, char *, double);
-void omap(HMM *, char *, int);
-void smap(HMM *, char *, int);
-double Palpha(HMM *, char *, int);
-double Pbeta(HMM *, char *, int);
-void viterbi(HMM *, char *, char *, int);
-void state_est(HMM *, char **, char **, int *, int);
-void baum_welch(HMM *, char **, int *, int);
-
-#endif
View
779 Bio/Ext/HMM/hmmlib.c
@@ -1,779 +0,0 @@
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <sys/types.h>
-#include "hmm.h"
-
-static unsigned int hmm_seed;
-static void alphaT(HMM *, double *, char *, int);
-static void beta1(HMM *, double *, char *, int);
-static double * xi(HMM *);
-static void alpha_mat(HMM *, double **, char *, int);
-static void beta_mat(HMM *, double **, char *, int);
-static void HMM_fatal(char *);
-static double sumLogProbs(double *, int);
-static double sumLogProb(double, double);
-
-/*
- sumLogProbs adds up the log probabilities stored in the array
- logprobs of size sz by dividing every items with the largest
- item in the array. This prevents underflow such that calculations
- for long sequence are allowed. The function returns the log of the
- sum of log probabilities.
- log(a_0+a_1+...+a_n) = log(a_max)+log(a_0/a_max+a_1/a_max+...+a_n/a_max)
- */
-static double
-sumLogProbs(double * logprobs, int sz)
-{
- double max = 0.0;
- double p = 0.0;
- int i;
- for (i = 0; i < sz; ++i)
- if (i == 0 || logprobs[i] > max)
- max = logprobs[i];
- if (max == log(0))
- return max;
- for (i = 0; i < sz; ++i)
- p += exp(logprobs[i] - max);
- return max + log(p);
-}
-
-/*
- sumLogProb is similar to sumLogProbs except that it is adding up
- only two log probabilities p1 and p2. It returns the log of the sum
- of p1 and p2.
- */
-static double
-sumLogProb(double p1, double p2)
-{
- if (p1 == log(0) && p2 == log(0))
- return p1;
- if (p1 > p2)
- return p1 + log(1 + exp(p2 - p1));
- else
- return p2 + log(1 + exp(p1 - p2));
-}
-
-/*
- HMM_fatal prints the string s to STDERR and then exit
- */
-static void
-HMM_fatal(char * s)
-{
- fprintf(stderr, s);
- exit(-1);
-}
-
-/*
- HMM_new initializes an HMM object with string of all possible
- symbols and string of all possible states. The other parameters
- like initial probabilities (HMM->init), state transition matrix
- (HMM->a_mat) and emission matrix (HMM->e_mat) are initialized
- randomly such that it is ready for training.
- */
-HMM *
-HMM_new(char * symbols, char * states)
-{
- HMM * hmm;
- double sum, random;
- int i, j;
-
- hmm_seed = time(NULL);
- hmm = (HMM *) malloc(sizeof(HMM));
- if (hmm == NULL)
- HMM_fatal("Can't allocate memory for HMM, die!\n");
- hmm->symbols = symbols;
- hmm->states = states;
- hmm->M = strlen(symbols);
- hmm->N = strlen(states);
- for (i = 0; i < hmm->M; ++i)
- hmm->omap[symbols[i]] = i;
- for (i = 0; i < hmm->N; ++i)
- hmm->smap[states[i]] = i;
- hmm->init = (double *) malloc(hmm->N*sizeof(double));
- if (hmm->init == NULL)
- HMM_fatal("Can't allocate memory for init array of HMM, die!\n");
-/* initialize the initial state array */
- sum = 0.0;
- for (j = 0; j < hmm->N; ++j) {
- srand(hmm_seed++);
- random = (double) rand();
- hmm->init[j] = random;
- sum += random;
- }
- for (j = 0; j < hmm->N; ++j)
- hmm->init[j] /= sum;
- hmm->e_mat = (double **) malloc(hmm->N*sizeof(double *));
- hmm->a_mat = (double **) malloc(hmm->N*sizeof(double *));
- if (hmm->a_mat == NULL)
- HMM_fatal("Can't allocate memory for transition matrix of HMM, die!\n");
- for (i = 0; i < hmm->N; ++i) {
- hmm->a_mat[i] = (double *) malloc(hmm->N*sizeof(double));
- if (hmm->a_mat[i] == NULL)
- HMM_fatal("Can't allocate memory for transition matrix of HMM, die!\n");
-
-/* randomize the transition matrix */
- sum = 0.0;
- for (j = 0; j < hmm->N; ++j) {
- srand(hmm_seed++);
- random = (double) rand();
- hmm->a_mat[i][j] = random;
- sum += random;
- }
- for (j = 0; j < hmm->N; ++j)
- hmm->a_mat[i][j] /= sum;
- }
- hmm->e_mat = (double **) malloc(hmm->N*sizeof(double *));
- if (hmm->e_mat == NULL)
- HMM_fatal("Can't allocate memory for emission matrix of HMM, die!\n");
- for (i = 0; i < hmm->N; ++i) {
- hmm->e_mat[i] = (double *) malloc(hmm->M*sizeof(double));
- if (hmm->e_mat[i] == NULL)
- HMM_fatal("Can't allocate memory for emission matrix of HMM, die!\n");
-
-/* randomize the emission matrix */
- sum = 0.0;
- for (j = 0; j < hmm->M; ++j) {
- srand(hmm_seed++);
- random = (double) rand();
- hmm->e_mat[i][j] = random;
- sum += random;
- }
- for (j = 0; j < hmm->M; ++j)
- hmm->e_mat[i][j] /= sum;
- }
- return hmm;
-}
-
-/*
- init_HMM is similar to HMM_new except that it allows you to initialize
- the HMM with your own initial probabilities (init), your own state
- transition matrix (a_mat) and your own emission matrix (e_mat).
- */
-HMM *
-init_HMM(char * symbols, char * states, double * init, double ** a_mat, double ** e_mat)
-{
- HMM * hmm;
- int i;
-
- hmm = (HMM *) calloc(1, sizeof(HMM));
- if (hmm == NULL) {
- fprintf(stderr, "Can't allocate memory for HMM, die!\n");
- exit(-1);
- }
- hmm->symbols = symbols;
- hmm->states = states;
- hmm->M = strlen(symbols);
- hmm->N = strlen(states);
- for (i = 0; i < hmm->M; ++i)
- hmm->omap[symbols[i]] = i;
- for (i = 0; i < hmm->N; ++i)
- hmm->smap[states[i]] = i;
- hmm->init = init;
- hmm->a_mat = a_mat;
- hmm->e_mat = e_mat;
- return hmm;
-}
-
-/*
- HMM_get_init_entry returns the value of a cell in the initial
- probability array based on the state supplied.
- HMM_get_init_entry is written such that XS can access the C array.
- */
-double
-HMM_get_init_entry(HMM * hmm, char * state)
-{
- return hmm->init[hmm->smap[state[0]]];
-}
-
-/*
- HMM_set_init_entry sets the value of a cell in the initial
- probability array based on the val supplied.
- HMM_set_init_entry is written such that XS can modify the C array.
- */
-void
-HMM_set_init_entry(HMM * hmm, char * state, double val)
-{
- hmm->init[hmm->smap[state[0]]] = val;
-}
-
-/*
- HMM_get_a_entry returns the value of a cell in the state transition
- matrix based on the from-state (state1) and the to-state (state2)
- supplied.
- HMM_get_a_entry is written such that XS can access the C matrix.
- */
-double
-HMM_get_a_entry(HMM * hmm, char * state1, char * state2)
-{
- return hmm->a_mat[hmm->smap[state1[0]]][hmm->smap[state2[0]]];
-}
-
-/*
- HMM_set_a_entry sets the value of a cell in the state transition
- matrix to val based on the from-state (state1) and the to-state
- (state2) supplied.
- HMM_set_a_entry is written such that XS can modify the C matrix.
- */
-void
-HMM_set_a_entry(HMM * hmm, char * state1, char * state2, double val)
-{
- hmm->a_mat[hmm->smap[state1[0]]][hmm->smap[state2[0]]] = val;
-}
-
-/*
- HMM_get_e_entry returns the value of a cell in the emission
- matrix based on the from-state (state1) and the to-state (state2)
- supplied.
- HMM_get_e_entry is written such that XS can access the C matrix.
- */
-double
-HMM_get_e_entry(HMM * hmm, char * state, char * symbol)
-{
- return hmm->e_mat[hmm->smap[state[0]]][hmm->omap[symbol[0]]];
-}
-
-/*
- HMM_set_e_entry sets the value of a cell in the emission matrix
- to val based on the from-state (state1) and the to-state (state2)
- supplied.
- HMM_set_e_entry is written such that XS can modify the C matrix.
- */
-void
-HMM_set_e_entry(HMM * hmm, char * state, char * symbol, double val)
-{
- hmm->e_mat[hmm->smap[state[0]]][hmm->omap[symbol[0]]] = val;
-}
-
-/*
- omap takes a symbol character and return its index in the state
- transition matrix.
- */
-void
-omap(HMM * hmm, char * c, int L)
-{
- int i;
-
- for (i = 0; i < L; ++i)
- c[i] = hmm->omap[c[i]];
-}
-
-/*
- smap takes a state character and return its index in the emission
- matrix.
- */
-void
-smap(HMM * hmm, char * c, int L)
-{
- int i;
-
- for (i = 0; i < L; ++i)
- c[i] = hmm->smap[c[i]];
-}
-
-/*
- alpha function for HMM. It computes the alpha function value
- at the time T which is the length of the observation sequence
- obs for every state. Note that alpha_vec needs to be malloc'd
- before we supply it to alphaT.
- Note that the return vector is in logarithmic form.
- */
-static void
-alphaT(HMM * hmm, double * alpha_vec, char * obs, int T)
-{
- int N = hmm->N;
- double alpha_vec_prev[N];
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int i, j, k;
-
- if (T <= 0) {
- fprintf(stderr, "Nonpositive T, die!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i)
- alpha_vec[i] = log(init[i]) + log(e_mat[i][obs[0]]);
- if (T == 1)
- return;
- if (T > 1) {
- for (i = 1; i < T; ++i) {
- for (j = 0; j < N; ++j) {
- alpha_vec_prev[j] = alpha_vec[j];
- alpha_vec[j] = log(0);
- }
-
- for (j = 0; j < N; ++j) {
- for (k = 0; k < N; ++k)
- alpha_vec[j] = sumLogProb(alpha_vec[j], alpha_vec_prev[k] + log(a_mat[k][j]));
- alpha_vec[j] = alpha_vec[j] + log(e_mat[j][obs[i]]);
- }
- }
- }
- return;
-}
-
-/*
- Palpha computes the probability of an observation sequence
- using the alpha function.
- */
-double
-Palpha(HMM * hmm, char * obs, int T)
-{
- double P = 0.0;
- int i;
- double * alpha_vec;
-
- alpha_vec = (double *) calloc(hmm->N, sizeof(double));
- if (alpha_vec == NULL) {
- fprintf(stderr, "Can't allocate memory for alpha vector, die!\n");
- exit(-1);
- }
- alphaT(hmm, alpha_vec, obs, T);
- P = sumLogProbs(alpha_vec, hmm->N);
- free(alpha_vec);
- return P;
-}
-
-
-/*
- beta function for HMM. It computes the beta function value
- at the time 1 for every state. Note that beta_vec should
- be allocated memory thru malloc before we use it.
- Note that the return vector is in logarithmic form.
- */
-static void
-beta1(HMM * hmm, double * beta_vec, char * obs, int T)
-{
- int N = hmm->N;
- double beta_vec_next[N];
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int i, j, k;
-
- if (T <= 0) {
- fprintf(stderr, "Nonpositive T, die!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i)
- beta_vec[i] = 0;
- if (T == 1)
- return;
- if (T > 1) {
- for (i = T - 1; i >= 1; --i) {
- for (j = 0; j < N; ++j) {
- beta_vec_next[j] = beta_vec[j];
- beta_vec[j] = log(0);
- }
-
- for (j = 0; j < N; ++j) {
- for (k = 0; k < N; ++k)
- beta_vec[j] = sumLogProb(beta_vec[j], beta_vec_next[k] + log(a_mat[j][k]) + log(e_mat[k][obs[i]]));
- }
- }
- }
- return;
-}
-
-/*
- Pbeta computes the probability of an observation sequence
- using the beta function.
- */
-double
-Pbeta(HMM * hmm, char * obs, int T)
-{
- double P = log(0);
- int i;
- double * beta_vec;
-
- beta_vec = (double *) calloc(hmm->N, sizeof(double));
- if (beta_vec == NULL) {
- fprintf(stderr, "Can't allocate memory for beta vector, die!\n");
- exit(-1);
- }
- beta1(hmm, beta_vec, obs, T);
- for (i = 0; i < hmm->N; ++i) {
- P = sumLogProb(P, log(hmm->init[i]) + log(hmm->e_mat[i][obs[0]]) + beta_vec[i]);
- }
- free(beta_vec);
- return P;
-}
-
-/*
- alpha_mat computes the values for the alpha function in the TxN space
- where T is the length of the observation sequence and N is the number
- of states. Note that the alpha matrix has to be malloc'd before we
- supply it to alpha_mat. The computed values will be stored in
- alpha.
- */
-static void
-alpha_mat(HMM * hmm, double ** alpha, char * obs, int T)
-{
- int i, j, t;
- int N = hmm->N;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- double * init = hmm->init;
-
- for (i = 0; i < N; ++i)
- alpha[i][0] = log(init[i]) + log(e_mat[i][obs[0]]);
- for (t = 1; t < T; ++t) {
- for (i = 0; i < N; ++i)
- alpha[i][t] = log(0);
- for (i = 0; i < N; ++i) {
- for (j = 0; j < N; ++j)
- alpha[i][t] = sumLogProb(alpha[i][t], alpha[j][t-1] + log(a_mat[j][i]));
- alpha[i][t] += log(e_mat[i][obs[t]]);
- }
- }
-}
-
-/*
- beta_mat computes the values for the alpha function in the TxN space
- where T is the length of the observation sequence and N is the number
- of states. Note that the beta matrix has to be malloc'd before we
- supply it to beta_mat. The computed values will be stored in
- beta.
- */
-static void
-beta_mat(HMM * hmm, double ** beta, char * obs, int T)
-{
- int i, j, t;
- int N = hmm->N;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- double * init = hmm->init;
-
- for (i = 0; i < N; ++i)
- beta[i][T-1] = 0;
- for (t = T-2; t >= 0; --t) {
- for (i = 0; i < N; ++i)
- beta[i][t] = log(0);
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- beta[i][t] = sumLogProb(beta[i][t], beta[j][t+1] + log(a_mat[i][j]) + log(e_mat[j][obs[t+1]]));
- }
-}
-
-/*
- baum_welch takes an array of observation sequence strings (obs) and
- the number of strings in the array (L) to estimate the parameters
- of an HMM using a special case of Expectation Maximization called
- Baum-Welch algorithm. baum_welch doesn't return anything but upon
- its completion, it will set the init array, a_mat matrix, e_mat
- matrix with parameters that maximize the chance of seeing the
- observation sequences you supplied.
- */
-void
-baum_welch(HMM * hmm, char ** obs, int * T, int L)
-{
- int i, j, l, t;
- int maxL = -1; /* length of longest training sequence */
- double P[L];
- double newP[L];
- double S, newS, diff;
- int N = hmm->N, M = hmm->M;
- double ** alpha;
- double ** beta;
- double A[N][N];
- double E[N][M];
- double init[N];
- double a_mat[N][N];
- double e_mat[N][M];
-
- for (l = 0; l < L; ++l) {
- P[l] = Palpha(hmm, obs[l], T[l]);
- if (maxL == -1)
- maxL = T[l];
- else if (T[l] > maxL)
- maxL = T[l];
- }
-
- alpha = (double **) malloc(hmm->N*sizeof(double *));
- if (alpha == NULL) {
- fprintf(stderr, "Can't allocate memory for alpha matrix!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i) {
- alpha[i] = (double *) malloc(maxL*sizeof(double));
- if (alpha[i] == NULL) {
- fprintf(stderr, "Can't allocate memory for rows of alpha matrix!\n");
- exit(-1);
- }
- }
- beta = (double **) malloc(hmm->N*sizeof(double *));
- if (beta == NULL) {
- fprintf(stderr, "Can't allocate memory for beta matrix!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i) {
- beta[i] = (double *) malloc(maxL*sizeof(double));
- if (beta[i] == NULL) {
- fprintf(stderr, "Can't allocate memory for rows of beta matrix!\n");
- exit(-1);
- }
- }
-
- do { /* do...while loop */
-
-/* initialize the matrices */
- for (i = 0; i < N; ++i)
- init[i] = 0.0;
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- A[i][j] = 0.0;
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j)
- E[i][j] = 0.0;
-
-/* use all training sequences to train */
- for (l = 0; l < L; ++l) {
- alpha_mat(hmm, alpha, obs[l], T[l]);
- beta_mat(hmm, beta, obs[l], T[l]);
-
-/* estimate new initial state probability */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- init[i] += exp(alpha[i][0] + log(hmm->a_mat[i][j]) + log(hmm->e_mat[j][obs[l][1]]) + beta[j][1] - P[l]);
-
-/* estimate A matrix */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- for (t = 0; t < T[l]-1; ++t)
- A[i][j] += exp(alpha[i][t] + log(hmm->a_mat[i][j]) + log(hmm->e_mat[j][obs[l][t+1]]) + beta[j][t+1] - P[l]);
-
-/* Estimate E matrix */
- for (i = 0; i < N; ++i)
- for (t = 0; t < T[l]; ++t)
- E[i][obs[l][t]] += exp(alpha[i][t] + beta[i][t] - P[l]);
- }
-
-/* Estimate init */
- for (i = 0; i < N; ++i)
- hmm->init[i] = init[i] / (double) L;
-
-/* Estimate a_mat */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- a_mat[i][j] = hmm->a_mat[i][j];
-
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j) {
- hmm->a_mat[i][j] = 0.0;
- for (t = 0; t < N; ++t)
- hmm->a_mat[i][j] += A[i][t];
- hmm->a_mat[i][j] = A[i][j] / hmm->a_mat[i][j];
- }
-
-/* Estimate e_mat */
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j)
- e_mat[i][j] = hmm->e_mat[i][j];
-
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j) {
- hmm->e_mat[i][j] = 0.0;
- for (t = 0; t < M; ++t)
- hmm->e_mat[i][j] += E[i][t];
- hmm->e_mat[i][j] = E[i][j] / hmm->e_mat[i][j];
- }
-
-/*
-// print parameter estimates for debugging
- for (i = 0; i < N; ++i)
- printf("%.2f\t", hmm->init[i]);
- printf("\n");
- for (i = 0; i < N; ++i) {
- for (j = 0; j < N; ++j)
- printf("%.2f\t", hmm->a_mat[i][j]);
- printf("\n");
- }
- for (i = 0; i < N; ++i) {
- for (j = 0; j < M; ++j)
- printf("%.2f\t", hmm->e_mat[i][j]);
- printf("\n");
- }
-*/
-
-/* find new P */
- S = 0.0;
- newS = 0.0;
- for (l = 0; l < L; ++l) {
- S += P[l];
- newP[l] = Palpha(hmm, obs[l], T[l]);
- newS += newP[l];
- P[l] = newP[l];
- }
- diff = newS - S;
-//printf("%g(%g)\t%g(%g)\t%g\n", newS, log(newS), S, log(S), diff);
- if (diff < 0 && fabs(diff) >= LOWER_TOL) {
- fprintf(stderr, "S should be monotonic increasing!\n");
- exit(-1);
- }
- }
- while (diff >= UPPER_TOL);
-
- for (i = 0; i < N; ++i) {
- free(alpha[i]);
- free(beta[i]);
- }
- free(alpha);
- free(beta);
-}
-
-/*
- viterbi algorithm takes a observation sequence (obs) and the HMM model
- (hmm) and then returns the hidden state sequence (hss) that maximizes
- the probability of seeing obs. The hss argument supplied should be
- allocated to the same amount of memory as obs.
- */
-void
-viterbi(HMM * hmm, char * hss, char * obs, int T)
-{
- double delta_vec[hmm->N];
- double delta_vec_prev[hmm->N];
- int N = hmm->N;
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int i, j, k;
- double max;
- int max_i = -1;
- int phi_vec[T];
- int phi_mat[T][N];
-
- if (T <= 0) {
- fprintf(stderr, "Nonpositive T, die!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i) {
-// delta_vec[i] = init[i]*e_mat[i][obs[0] - '1'];
-// delta_vec[i] = i == 0 ? log(0.5) : -HUGE_VAL;
- delta_vec[i] = log(init[i]);
- if (max_i < 0) {
- max_i = i;
- max = delta_vec[i];
- }
- else if (delta_vec[i] > max) {
- max_i = i;
- max = delta_vec[i];
- }
- }
- if (T == 1) {
- phi_vec[0] = max_i;
- hss[0] = hmm->states[phi_vec[0]];
- hss[1] = '\0';
- return;
- }
- if (T > 1) {
- for (i = 0; i < T; ++i) {
- for (j = 0; j < N; ++j) {
- delta_vec_prev[j] = delta_vec[j];
- delta_vec[j] = 0;
- }
- for (j = 0; j < N; ++j) {
- max_i = -1;
- for (k = 0; k < N; ++k) {
- double val = delta_vec_prev[k] + log(a_mat[k][j]);
- if (max_i < 0) {
- max_i = k;
- max = val;
- }
- else if (val > max) {
- max_i = k;
- max = val;
- }
- }
- delta_vec[j] = max + log(e_mat[j][obs[i]]);
- phi_mat[i][j] = max_i;
- }
- }
- }
- max_i = -1;
- for (i = 0; i < N; ++i) {
- if (max_i < 0) {
- max_i = i;
- max = delta_vec[i];
- }
- else if (delta_vec[i] > max) {
- max_i = i;
- max = delta_vec[i];
- }
- }
- phi_vec[T-1] = max_i;
-// traceback
- for (i = T-2; i >= 0; --i)
- phi_vec[i] = phi_mat[i+1][phi_vec[i+1]];
- for (i = 0; i < T; ++i)
- hss[i] = hmm->states[phi_vec[i]];
- hss[T] = '\0';
- return;
-}
-
-/*
- state_est takes an array of observation sequence strings (obs),
- an array of the corresponding hidden state sequence strings (sta)
- and the number of strings (L) to find the HMM parameters that
- maximize the chance of seeing the supplied observation sequences
- and their corresponding hidden state sequences. At the completion
- of this function, init, a_mat and e_mat will be set to the
- proper values.
- */
-void
-state_est(HMM * hmm, char ** obs, char ** sta, int * T, int L)
-{
- int N = hmm->N, M = hmm->M;
- int i, j, k, l;
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int pi[N];
- int A[N][N];
- int E[N][M];
-
-/* initialize the matrices */
- for (i = 0; i < N; ++i)
- pi[i] = 0;
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j) {
- A[i][j] = 0;
- a_mat[i][j] = 0.0;
- }
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j) {
- E[i][j] = 0;
- e_mat[i][j] = 0.0;
- }
-
-/* count occurrences */
- for (l = 0; l < L; ++l) {
- ++pi[sta[l][0]];
-
- for (i = 0; i < T[l] - 1; ++i)
- ++A[sta[l][i]][sta[l][i+1]];
- for (i = 0; i < T[l]; ++i)
- ++E[sta[l][i]][obs[l][i]];
- }
-
-/* Estimate initial state probability */
- for (i = 0; i < N; ++i)
- init[i] = (double) pi[i] / (double) L;
-
-/* Estimate state transition probability matrix */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j) {
- for (k = 0; k < N; ++k)
- a_mat[i][j] += A[i][k];
- a_mat[i][j] = A[i][j] / a_mat[i][j];
- }
-
-/* Estimate Symbol Emission matrix */
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j) {
- for (k = 0; k < M; ++k)
- e_mat[i][j] += E[i][k];
- e_mat[i][j] = E[i][j] / e_mat[i][j];
- }
-}
View
102 Bio/Ext/HMM/test.pl
@@ -1,102 +0,0 @@
-## Test framework for HMM XS stuff
-## $Id$
-
-use Bio::Matrix::Scoring;
-use Bio::Tools::HMM;
-
-$hmm = new Bio::Tools::HMM('-symbols' => "123456", '-states' => "FL");
-
-$seq1 = "315116246446644245311321631164152133625144543631656626566666";
-$obs1 = "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFLLLLLLLLLLLLLLL";
-$seq1 .= "651166453132651245636664631636663162326455236266666625151631";
-$obs1 .= "LLLLLLFFFFFFFFFFFFLLLLLLLLLLLLLLLLFFFLLLLLLLLLLLLLLFFFFFFFFF";
-$seq1 .= "222555441666566563564324364131513465146353411126414626253356";
-$obs1 .= "FFFFFFFFLLLLLLLLLLLLLFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-$seq1 .= "366163666466232534413661661163252562462255265252265435353336";
-$obs1 .= "LLLLLLLLFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-$seq1 .= "233121625364414432335163243633665562466662632666612355245242";
-$obs1 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFLLLLLLLLLLLLLLLLLLLLLLFFFFFFFFFFF";
-
-$seq2 = "544552213525245666363632432522253566166546666666533666543261";
-$obs2 = "FFFFFFFFFFFFLLLLLLLLLLLFFFFFFFFFFFFLLLLLLLLLLLLLLLLLLLFFFFFF";
-$seq2 .= "363546253252546524422555242223224344432423341365415551632161";
-$obs2 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-$seq2 .= "144212242323456563652263346116214136666156616666566421456123";
-$obs2 .= "FFFFFFLLLFFFFFFFFFFFFFFFFFFFFFFFFLFLLLLLLLLLLLLLLLLFFFFFFFFF";
-$seq2 .= "346313546514332164351242356166641344615135266642261112465663";
-$obs2 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-
-@seqs = ($seq1, $seq2);
-
-printf "Baum-Welch Training\n";
-printf "===================\n";
-
-
-$hmm->baum_welch_training(\@seqs);
-
-printf "Initial Probability Array:\n";
-$init = $hmm->init_prob;
-foreach $s (@{$init}) {
- printf "%g\t", $s;
-}
-printf "\n";
-
-printf "Transition Probability Matrix:\n";
-$matrix = $hmm->transition_prob;
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "Emission Probability Matrix:\n";
-$matrix = $hmm->emission_prob;
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "\n";
-printf "Log Probability of sequence 1: %g\n", $hmm->likelihood($seq1);
-printf "Log Probability of sequence 2: %g\n", $hmm->likelihood($seq2);
-printf "\n";
-printf "Statistical Training\n";
-printf "====================\n";
-
-@obs = ($obs1, $obs2);
-$hmm->statistical_training(\@seqs, \@obs);
-
-printf "Initial Probability Array:\n";
-$init = $hmm->init_prob;
-$hmm->init_prob($init);
-foreach $s (@{$init}) {
- printf "%g\t", $s;
-}
-printf "\n";
-
-printf "Transition Probability Matrix:\n";
-$matrix = $hmm->transition_prob;
-$hmm->transition_prob($matrix);
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "Emission Probability Matrix:\n";
-$matrix = $hmm->emission_prob;
-$hmm->emission_prob($matrix);
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "Vitebi Algorithm:\n";
-$obs3 = $hmm->viterbi($seq1);
-printf "%s\n", $obs3;
View
10 Bio/Ext/HMM/typemap
@@ -1,10 +0,0 @@
-TYPEMAP
-HMM * T_HMM
-
-INPUT
-T_HMM
- $var = ($type) (SvROK($arg) == 0 ? ($type) NULL : ($type) SvIV((SV*)SvRV($arg)))
-
-OUTPUT
-T_HMM
- sv_setref_pv($arg, "Bio::Ext::HMM::HMM", (void*) $var);
View
7 Bio/Ext/Makefile.PL
@@ -0,0 +1,7 @@
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'Bio::Ext',
+ 'VERSION' => '0.07.0'
+);
View
2  Bio/SeqIO/staden/Makefile.PL
@@ -69,7 +69,7 @@ sub MY::post_initialize {
$iolibinc = ExtUtils::MakeMaker::prompt("Please tell us where your Staden io_lib \"Read.h\" header is installed: ", $prompt);
}
- `perl -pi -e 's{(LIBS\\s*=>\\s*")([^"]+)}{\$1-L$ioliblib -lread -lz};' read.pm`;
+ `perl -pi -e 's{(LIBS\\s*=>\\s*")([^"]+)}{\$1-L$ioliblib -lread};' read.pm`;
`perl -pi -e 's{(INC\\s*=>\\s*")([^"]+)}{\$1-I$iolibinc};' read.pm`;
return '';
View
8 Bio/SeqIO/staden/read.pm
@@ -78,15 +78,15 @@ sub BEGIN {
use Inline (C => 'DATA',
VERSION => '0.01',
NAME => 'Bio::SeqIO::staden::read',
- LIBS => "-L/usr/local/lib -lread -lz", # leave these as double quotes - necessary for Makefile.PL function
- INC => "-I/usr/local/include/io_lib", # leave these as double quotes - necessary for Makefile.PL function
+ LIBS => "-L/usr/local/lib -lread", # leave these as double quotes - necessary for Makefile.PL function
+ INC => "-I/usr/local/include/io_lib" # leave these as double quotes - necessary for Makefile.PL function
);
} or Bio::Root::Root::throw( -class => 'Bio::Root::SystemException',
-text => "No Inline::C (or maybe io-lib?) support available",
);
}
-$VERSION = 1.5.1;
+$VERSION = 0.01;
my %formats = ( scf => 1,
abi => 2,
@@ -196,7 +196,7 @@ int staden_write_trace(SV *self, FILE *fh, int format,
read->leftCutoff = 0;
read->rightCutoff = len + 1;
- qualarr = (AV *) SvRV(qual);
+ qualarr = SvRV(qual);
n = av_len(qualarr) + 1;
for (i = 0 ; i < n && i < len ; i++) {
val = *(av_fetch(qualarr, i, 0));
View
3  Makefile.PL
@@ -3,8 +3,7 @@ use ExtUtils::MakeMaker;
# the contents of the Makefile that is written.
WriteMakefile(
'NAME' => 'Bio',
- 'DISTNAME' => 'bioperl-ext',
- 'VERSION' => '1.5.1-RC1',
+ 'VERSION' => '0.08.0',
'DIR' => [ qw( Bio/Ext/Align Bio/SeqIO/staden )],
'dist' => { COMPRESS => 'gzip -9f',
SUFFIX => '.gz',
View
5 README
@@ -1,6 +1,6 @@
o Version
- This is Bioperl-ext version 1.5.1 from CVS HEAD
+ This is Bioperl-ext version 1.3 from CVS HEAD
o Summary
@@ -71,8 +71,7 @@ o Notes for Bio::SeqIO::staden::read
install the "config.h" file for "os.h" to find all the information it
needs. Finally, you may also need to edit "os.h" to #include
<config.h> instead of #include "config.h" if you continue to get
- undefined symbol errors during compilation. On some OSes you may in fact
- have to do the OPPOSITE, meaning change <config.h> to "config.h".
+ undefined symbol errors during compilation.
The bioperl-ext make process will prompt you for the LIB and INCLUDE
locations (usually /usr/local/lib and usr/local/include/io_lib,
Please sign in to comment.
Something went wrong with that request. Please try again.