Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

This commit was manufactured by cvs2svn to create tag

'bioperl-ext-release-1-4-0'.

svn path=/bioperl-ext/tags/bioperl-ext-release-1-4-0/; revision=660
  • Loading branch information...
commit d43d34c7cf84dcabcfcfb6608e930abde53a923d 1 parent 8695d5b
nobody authored
View
143 Bio/Ext/Align/Align.xs
@@ -3323,9 +3323,6 @@ Align_DNA_Sequences(seq1, seq2, match, mismatch, gap, ext, alg)
case 1:
RETVAL = dpAlign_Local_DNA_MillerMyers(seq1, seq2, match, mismatch, gap, ext);
break;
- case 2:
- RETVAL = dpAlign_Global_DNA_MillerMyers(seq1, seq2, match, mismatch, gap, ext);
- break;
default:
RETVAL = dpAlign_Local_DNA_MillerMyers(seq1, seq2, match, mismatch, gap, ext);
break;
@@ -3334,136 +3331,15 @@ Align_DNA_Sequences(seq1, seq2, match, mismatch, gap, ext, alg)
RETVAL
dpAlign_AlignOutput *
-Align_Protein_Sequences(seq1, seq2, matrix, alg)
+Align_Protein_Sequences(seq1, seq2, matrix)
char * seq1
char * seq2
- dpAlign_ScoringMatrix * matrix
- int alg
+ char * matrix
CODE:
- switch (alg) {
- case 1:
- RETVAL = dpAlign_Local_Protein_MillerMyers(seq1, seq2, matrix);
- break;
- case 2:
- RETVAL = dpAlign_Global_Protein_MillerMyers(seq1, seq2, matrix);
- break;
- default:
RETVAL = dpAlign_Local_Protein_MillerMyers(seq1, seq2, matrix);
- break;
- }
- OUTPUT:
- RETVAL
-
-int
-Score_DNA_Sequences(sp, seq2)
- dpAlign_SequenceProfile * sp
- char * seq2
- CODE:
- RETVAL = dpAlign_Local_DNA_PhilGreen(sp, seq2);
- ST(0) = sv_newmortal();
- sv_setiv(ST(0), (IV)RETVAL);
- XSRETURN(1);
-
-int
-Score_Protein_Sequences(sp, seq2)
- dpAlign_SequenceProfile * sp
- char * seq2
- CODE:
- RETVAL = dpAlign_Local_Protein_PhilGreen(sp, seq2);
- ST(0) = sv_newmortal();
- sv_setiv(ST(0), (IV)RETVAL);
- XSRETURN(1);
-
-MODULE = Bio::Ext::Align PACKAGE = Bio::Ext::Align::ScoringMatrix
-
-dpAlign_ScoringMatrix *
-new(class, alphabet, gap, ext)
- char * class
- char * alphabet
- int gap
- int ext
- PPCODE:
- dpAlign_ScoringMatrix * out;
- out = new_dpAlign_ScoringMatrix(alphabet, gap, ext);
- ST(0) = sv_newmortal();
- sv_setref_pv(ST(0), class, (void *) out);
- XSRETURN(1);
-
-void
-set_entry(class, matrix, row, col, val)
- char * class
- dpAlign_ScoringMatrix * matrix
- char * row
- char * col
- int val
- PPCODE:
- set_dpAlign_ScoringMatrix(matrix, row, col, val);
- XSRETURN(1);
-
-void
-DESTROY(obj)
- dpAlign_ScoringMatrix * obj
- PPCODE:
- int i;
- for (i = 0; i < obj->sz; ++i)
- free(obj->s[i]);
- free(obj->s);
-
-MODULE = Bio::Ext::Align PACKAGE = Bio::Ext::Align::SequenceProfile
-
-dpAlign_SequenceProfile *
-dna_new(class, seq1, match, mismatch, gap, ext)
- char * class
- char * seq1
- int match
- int mismatch
- int gap
- int ext
- PPCODE:
- dpAlign_SequenceProfile * out;
- out = dpAlign_DNA_Profile(seq1, match, mismatch, gap, ext);
- ST(0) = sv_newmortal();
- sv_setref_pv(ST(0), class, (void *) out);
- XSRETURN(1);
-
-dpAlign_SequenceProfile *
-protein_new(class, seq1, matrix)
- char * class
- char * seq1
- dpAlign_ScoringMatrix * matrix
- PPCODE:
- dpAlign_SequenceProfile * out;
- out = dpAlign_Protein_Profile(seq1, matrix);
- ST(0) = sv_newmortal();
- sv_setref_pv(ST(0), class, (void *) out);
- XSRETURN(1);
-
-char *
-alphabet(obj)
- dpAlign_SequenceProfile * obj
- CODE:
- switch (obj->type) {
- case 1:
- RETVAL = "dna";
- break;
- case 2:
- RETVAL = "protein";
- break;
- case 3:
- RETVAL = "rna";
- break;
- default:
- RETVAL = "unknown";
- }
OUTPUT:
RETVAL
-void
-DESTROY(obj)
- dpAlign_SequenceProfile * obj
- CODE:
- free(obj->waa);
-
MODULE = Bio::Ext::Align PACKAGE = Bio::Ext::Align::AlignOutput
char *
@@ -3514,18 +3390,3 @@ end2(obj)
OUTPUT:
RETVAL
-int
-score(obj)
- dpAlign_AlignOutput * obj
- CODE:
- RETVAL = obj->score;
- OUTPUT:
- RETVAL
-
-void
-DESTROY(obj)
- dpAlign_AlignOutput * obj
- CODE:
- free(obj->aln1);
- free(obj->aln2);
-
View
6 Bio/Ext/Align/Makefile.PL
@@ -3,7 +3,7 @@ use ExtUtils::MakeMaker;
# the contents of the Makefile that is written.
WriteMakefile(
'NAME' => 'Bio::Ext::Align',
- 'VERSION' => '1.5.1',
+ 'VERSION' => '0.1',
'LIBS' => ['-lm'], # e.g., '-lm'
'DEFINE' => '-DPOSIX -DNOERROR', # e.g., '-DHAVE_SOMETHING'
'INC' => '-I./libs', # e.g., '-I/usr/include/other'
@@ -14,8 +14,8 @@ WriteMakefile(
sub MY::postamble{
'
$(MYEXTLIB):
- DEFINE=\'$(DEFINE)\'; CC=\'$(PERLMAINCC)\'; CFLAGS=\'$(CCFLAGS)\'; export DEFINE INC CC CFLAGS; \
- cd libs && $(MAKE) CC=\'$(PERLMAINCC)\' CFLAGS=\'$(CCFLAGS) $(DEFINE)\' DEFINE=\'$(DEFINE)\' libsw$(LIB_EXT) -e
+ DEFINE=\'$(DEFINE)\'; CC=\'$(PERLMAINCC)\'; export DEFINE INC CC; \
+ cd libs && $(MAKE) CC=$(CC) libsw$(LIB_EXT) -e
';
}
View
30 Bio/Ext/Align/blosum45.mat
@@ -1,30 +0,0 @@
-# Matrix made by matblas from blosum45.iij
-# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 45
-# Entropy = 0.3795, Expected = -0.2789
- A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -2 -2 0 -1 -1 0
-R -2 7 0 -1 -3 1 0 -2 0 -3 -2 3 -1 -2 -2 -1 -1 -2 -1 -2 -1 0 -1
-N -1 0 6 2 -2 0 0 0 1 -2 -3 0 -2 -2 -2 1 0 -4 -2 -3 4 0 -1
-D -2 -1 2 7 -3 0 2 -1 0 -4 -3 0 -3 -4 -1 0 -1 -4 -2 -3 5 1 -1
-C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -3 -2
-Q -1 1 0 0 -3 6 2 -2 1 -2 -2 1 0 -4 -1 0 -1 -2 -1 -3 0 4 -1
-E -1 0 0 2 -3 2 6 -2 0 -3 -2 1 -2 -3 0 0 -1 -3 -2 -3 1 4 -1
-G 0 -2 0 -1 -3 -2 -2 7 -2 -4 -3 -2 -2 -3 -2 0 -2 -2 -3 -3 -1 -2 -1
-H -2 0 1 0 -3 1 0 -2 10 -3 -2 -1 0 -2 -2 -1 -2 -3 2 -3 0 0 -1
-I -1 -3 -2 -4 -3 -2 -3 -4 -3 5 2 -3 2 0 -2 -2 -1 -2 0 3 -3 -3 -1
-L -1 -2 -3 -3 -2 -2 -2 -3 -2 2 5 -3 2 1 -3 -3 -1 -2 0 1 -3 -2 -1
-K -1 3 0 0 -3 1 1 -2 -1 -3 -3 5 -1 -3 -1 -1 -1 -2 -1 -2 0 1 -1
-M -1 -1 -2 -3 -2 0 -2 -2 0 2 2 -1 6 0 -2 -2 -1 -2 0 1 -2 -1 -1
-F -2 -2 -2 -4 -2 -4 -3 -3 -2 0 1 -3 0 8 -3 -2 -1 1 3 0 -3 -3 -1
-P -1 -2 -2 -1 -4 -1 0 -2 -2 -2 -3 -1 -2 -3 9 -1 -1 -3 -3 -3 -2 -1 -1
-S 1 -1 1 0 -1 0 0 0 -1 -2 -3 -1 -2 -2 -1 4 2 -4 -2 -1 0 0 0
-T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1 2 5 -3 -1 0 0 -1 0
-W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2 1 -3 -4 -3 15 3 -3 -4 -2 -2
-Y -2 -1 -2 -2 -3 -1 -2 -3 2 0 0 -1 0 3 -3 -2 -1 3 8 -1 -2 -2 -1
-V 0 -2 -3 -3 -1 -3 -3 -3 -3 3 1 -2 1 0 -3 -1 0 -3 -1 5 -3 -3 -1
-B -1 -1 4 5 -2 0 1 -1 0 -3 -3 0 -2 -3 -2 0 0 -4 -2 -3 4 2 -1
-Z -1 0 0 1 -3 4 4 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -3 2 4 -1
-X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -2 -1 -1 -1 -1 -1
-
View
29 Bio/Ext/Align/blosum50.mat
@@ -1,29 +0,0 @@
-# Matrix made by matblas from blosum50.iij
-# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 50
-# Entropy = 0.4808, Expected = -0.3573
- A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -1 -1
-R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 0 -1
-N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 4 0 -1
-D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 5 1 -1
-C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2
-Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 4 -1
-E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 5 -1
-G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -2 -2
-H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 0 -1
-I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 -3 -1
-L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 -3 -1
-K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 1 -1
-M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 -1 -1
-F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 -4 -2
-P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2
-S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 0 -1
-T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 0
-W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -3
-Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -2 -1
-V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -4 -3 -1
-B -2 -1 4 5 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -4 5 2 -1
-Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 2 5 -1
-X -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1 0 -3 -1 -1 -1 -1 -1
View
30 Bio/Ext/Align/blosum62.mat
@@ -1,30 +0,0 @@
-# Matrix made by matblas from blosum62.iij
-# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
-# Blocks Database = /data/blocks_5.0/blocks.dat
-# Cluster Percentage: >= 62
-# Entropy = 0.6979, Expected = -0.5209
- A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0
-R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1
-N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1
-D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1
-C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2
-Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1
-E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1
-G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1
-H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1
-I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1
-L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1
-K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1
-M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1
-F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1
-P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2
-S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0
-T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0
-W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2
-Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1
-V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1
-B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1
-Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1
-X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1
-
View
107 Bio/Ext/Align/libs/dpalign.c
@@ -189,110 +189,3 @@ align(unsigned char * A, unsigned char * B, int M, int N, int ** s, int g, int e
}
return midc;
}
-
-/*
- pgreen runs Phil Green's algorithm to calculate alignment score between
- two sequences. The argument waa is a sequence profile derived from
- the query sequence and the scoring matrix. The second argument M is
- the length of query sequence. B is the sequence to compare to. N is
- the length of the second sequence. gap is the gap opening cost. ext
- is the gap extension cost. ss is a preallocated array of struct swstr
- with a size of M.
-
- Most of the code in this function is copied from the do_work function
- in dropgsw.c inside the FASTA distribution.
- */
-int
-pgreen(int * waa, int M, unsigned char * B, int N, int gap, int ext, struct swstr * ss)
-{
- int score;
- int e, f, h, p, i, temp;
- struct swstr * ssj;
-
- int * pwaa;
-
- gap = gap + ext;
- ext = -ext;
- ss[M].H = -1;
- ss[M].E = 1;
- score = 0;
- for (h = 0; h < M; ++h) {
- ss[h].H = ss[h].E = 0;
- }
-
- for (i = 0; i < N; ++i) {
- pwaa = waa + B[i]*M;
- ssj = ss;
-
- e = f = h = p = 0;
- zero_f:
- while (1) {
- h = p + *pwaa++;
- p = ssj->H;
- if ((e = ssj->E) > 0) {
- if (p == -1) goto next_row;
- if (h < e) h = e;
- else if (h > gap) {
- e += ext;
- goto transition;
- }
- e += ext;
- ssj->E = (e > 0) ? e : 0;
- ssj++->H = h;
- }
- else {
- if (h > 0) {
- if (h > gap) {
- e = 0;
- goto transition;
- }
- ssj++->H = h;
- }
- else ssj++->H = 0;
- }
- }
-
- transition:
- if (score < h) score = h;
-
- temp = h - gap;
- if (f < temp) f = temp;
- if (e < temp) e = temp;
- ssj->E = (e > 0) ? e : 0;
- ssj++->H = h;
- e = 0;
-
- do {
- h = p + *pwaa++;
- p = ssj->H;
-
- if (h < f) h = f;
- f += ext;
-
- if ((e = ssj->E) > 0) {
- if (p == -1) goto next_row;
- if (h < e) h = e;
- else if (h > gap) {
- e += ext;
- goto transition;
- }
- e += ext;
- ssj->E = (e > 0) ? e : 0;
- ssj++->H = h;
- }
- else {
- if (h > gap) {
- e = 0;
- goto transition;
- }
- ssj++->H = h;
- }
- } while (f > 0);
- goto zero_f;
- next_row:
- ;
- }
-
- free(waa);
- return score;
-}
View
41 Bio/Ext/Align/libs/dpalign.h
@@ -23,44 +23,17 @@ struct swstr {
#define prot_encode(c) (c == 'A' ? 0x00 : c == 'R' ? 0x01 : c == 'N' ? 0x02 : c == 'D' ? 0x03 : c == 'C' ? 0x04 : c == 'Q' ? 0x05 : c == 'E' ? 0x06 : c == 'G' ? 0x07 : c == 'H' ? 0x08 : c == 'I' ? 0x09 : c == 'L' ? 0x0a : c == 'K' ? 0x0b : c == 'M' ? 0x0c : c == 'F' ? 0x0d : c == 'P' ? 0x0e : c == 'S' ? 0x0f : c == 'T' ? 0x10 : c == 'W' ? 0x11 : c == 'Y' ? 0x12 : c == 'V' ? 0x13 : c == 'B' ? 0x14 : c == 'Z' ? 0x15 : c == 'X' ? 0x16 : c == '*' ? 0x17 : -1)
typedef struct _dpAlign_alnoutput {
- char * aln1; /* aligned subsequence of sequence 1 with space '-' inserted */
- int start1; /* start point of aligned subsequence 1 */
- int end1; /* end point of aligned subsequence 1 */
- char * aln2; /* aligned subsequence of sequence 2 with space '-' inserted */
- int start2; /* start point of aligned subsequence 2 */
- int end2; /* end point of aligned subsequence 2 */
- int score; /* score of this alignment */
+ char * aln1; /* aligned subsequence of sequence 1 with space '-' inserted */
+ int start1; /* start point of aligned subsequence 1 */
+ int end1; /* end point of aligned subsequence 1 */
+ char * aln2; /* aligned subsequence of sequence 2 with space '-' inserted */
+ int start2; /* start point of aligned subsequence 2 */
+ int end2; /* end point of aligned subsequence 2 */
} dpAlign_AlignOutput;
-typedef struct _dpAlign_SequenceProfile {
- int * waa; /* sequence profile */
- int gap;
- int ext;
- int len; /* length of sequence */
- int type; /* 1 is DNA, 2 is Protein, 3 is RNA */
- int a[256]; /* alphabet array that maps a character in the alphabet to an integer index that indexes the columns and rows in the scoring matrix */
-} dpAlign_SequenceProfile;
-
-typedef struct _dpAlign_ScoringMatrix {
- int ** s; /* scoring matrix pointer */
- int a[256]; /* alphabet array that maps a character in the alphabet to an integer index that indexes the columns and rows in the scoring matrix */
- int gap; /* gap opening penalty for this matrix */
- int ext; /* gap extension penalty for this matrix */
- int sz; /* size of alphabet */
-} dpAlign_ScoringMatrix;
-
dpAlign_AlignOutput * dpAlign_Local_DNA_MillerMyers(char *, char *, int, int, int, int);
-dpAlign_AlignOutput * dpAlign_Global_DNA_MillerMyers(char *, char *, int, int, int, int);
-dpAlign_AlignOutput * dpAlign_Local_Protein_MillerMyers(char *, char *, dpAlign_ScoringMatrix *);
-dpAlign_AlignOutput * dpAlign_Global_Protein_MillerMyers(char *, char *, dpAlign_ScoringMatrix *);
-dpAlign_SequenceProfile * dpAlign_Protein_Profile(char *, dpAlign_ScoringMatrix *);
-int dpAlign_Local_Protein_PhilGreen(dpAlign_SequenceProfile *, char *);
-dpAlign_SequenceProfile * dpAlign_DNA_Profile(char *, int, int, int, int);
-int dpAlign_Local_DNA_PhilGreen(dpAlign_SequenceProfile *, char *);
-dpAlign_ScoringMatrix * new_dpAlign_ScoringMatrix(char *, int, int);
-void set_dpAlign_ScoringMatrix(dpAlign_ScoringMatrix *, char *, char *, int);
+dpAlign_AlignOutput * dpAlign_Local_Protein_MillerMyers(char *, char *, char *);
dpAlign_AlignOutput * dpAlign_Local_DNA_Green(char *, char *, int, int, int, int);
void dpAlign_fatal(char *);
int align(unsigned char *, unsigned char *, int, int, int **, int, int, struct swstr *, struct swstr *, int *, int *);
-int pgreen(int *, int, unsigned char *, int, int, int, struct swstr *);
#endif
View
503 Bio/Ext/Align/libs/linspc.c
@@ -60,7 +60,54 @@ dpAlign_Local_DNA_MillerMyers(char * seq1, char * seq2, int match, int mismatch,
dpAlign_fatal("Cannot allocate memory for scoring matrix col!\n");
for (j = 0; j < 17; ++j) {
if (i == 16 || j == 16) s[i][j] = mismatch; /* X mismatches all */
+// else if (i == 15 || j == 15) s[i][j] = match; /* N matches all */
else if (i == j) s[i][j] = match;
+/*
+// A matches A,R,M,W,B,N,V
+ else if ((i == 0 && j != 1 && j != 2 && j != 3 && j != 4 && j != 6 && j != 9 && j != 10 && j != 14) ||
+ (j == 0 && i != 1 && i != 2 && i != 3 && i != 4 && i != 6 && i != 9 && i != 10 && i != 14)) s[i][j] = match;
+// C matches C,Y,M,S,B,M,V
+ else if ((i == 1 && j != 0 && j != 2 && j != 3 && j != 4 && j != 5 && j != 8 && j != 10 && j != 11) ||
+ (j == 1 && i != 0 && i != 2 && i != 3 && i != 4 && i != 5 && i != 8 && i != 10 && i != 11)) s[i][j] = match;
+// G matches G,R,K,S,B,D,V
+ else if ((i == 2 && j != 0 && j != 1 && j != 3 && j != 4 && j != 5 && j != 9 && j != 10 && j != 12) ||
+ (j == 2 && i != 0 && i != 1 && i != 3 && i != 4 && i != 5 && i != 9 && i != 10 && i != 12)) s[i][j] = match;
+// T,U matches T,U,Y,K,W,B,D,H
+ else if (((i == 3 || i == 4) && j != 0 && j != 1 && j != 2 && j != 5 && j != 7 && j != 9 && j != 13) ||
+ ((j == 3 || j == 4) && i != 0 && i != 1 && i != 2 && i != 5 && i != 7 && i != 9 && i != 13)) s[i][j] = match;
+// R matches all but C,T,U,Y
+ else if ((i == 5 && j != 1 && j != 3 && j != 4 && j != 6) ||
+ (j == 5 && i != 1 && i != 3 && i != 4 && i != 6))
+ s[i][j] = match;
+// Y matches all but A,G,R
+ else if ((i == 6 && j != 0 && j != 2 && j != 5) ||
+ (j == 6 && i != 0 && i != 2 && i != 5))
+ s[i][j] = match;
+// M matches all but G,T,U,K
+ else if ((i == 7 && j != 2 && j != 3 && j != 4 && j != 10) ||
+ (j == 7 && i != 2 && i != 3 && i != 4 && i != 10))
+ s[i][j] = match;
+// W matches all but C,G,S
+ else if ((i == 8 && j != 1 && j != 2 && j != 9) ||
+ (j == 8 && i != 1 && i != 2 && i != 9))
+ s[i][j] = match;
+// S matches all but A,T,U,W
+ else if ((i == 9 && j != 0 && j != 3 && j != 4 && j != 8) ||
+ (j == 9 && i != 0 && i != 3 && i != 4 && i != 8))
+ s[i][j] = match;
+// K matches all but A,C,M
+ else if ((i == 10 && j != 0 && j != 1 && j != 7) ||
+ (j == 10 && i != 0 && i != 1 && i != 7))
+ s[i][j] = match;
+ else if ((i == 11 && j != 1) || // D matches all but C
+ (j == 11 && i != 1)) s[i][j] = match;
+ else if ((i == 12 && j != 2) || // H matches all but G
+ (j == 12 && i != 2)) s[i][j] = match;
+ else if ((i == 13 && j != 3 && j != 4) || // V matches all but T,U
+ (j == 13 && i != 3 && i != 4)) s[i][j] = match;
+ else if ((i == 14 && j != 0) || // B matches all but A
+ (j == 14 && i != 0)) s[i][j] = match;
+*/
else s[i][j] = mismatch;
}
}
@@ -82,318 +129,11 @@ dpAlign_Local_DNA_MillerMyers(char * seq1, char * seq2, int match, int mismatch,
score */
find_ends(as);
- if (as->score <= 0)
- return NULL;
-
/* align the subsequences bounded by the end points */
as->score = align(as->s1 + as->start1 - 1, as->s2 + as->start2 - 1, as->end1 - as->start1 + 1, as->end2 - as->start2 + 1, as->s, as->gap, as->ext, as->FF, as->RR, as->spc1, as->spc2);
-/*
- for (i = 0; i < 17; ++i) {
- free(as->s[i]);
- }
- free(as->s);
-*/
- return traceback(as);
-}
-
-/*
- dpAlign_Global_DNA_MillerMyers implements the Miller-Myers algorithm
- to align DNA sequences as defined in their 1988 paper. It takes two
- character string sequences seq1 and seq2, together with the scoring
- parameters for match, mismatch, gap opening and gap extension as
- arguments. At the end, it returns the dpAlign_AlignOutput data
- structure which can be translated into a Bio::SimpleAlign object
- pretty easily.
- */
-dpAlign_AlignOutput *
-dpAlign_Global_DNA_MillerMyers(char * seq1, char * seq2, int match, int mismatch, int gap, int ext)
-{
- sw_AlignStruct * as;
- int ** s;
- int i, j;
-
- if (seq1 == NULL)
- dpAlign_fatal("Sequence 1 is a NULL pointer!\n");
- if (seq2 == NULL)
- dpAlign_fatal("Sequence 2 is a NULL pointer!\n");
-
-/* initialize DNA scoring matrix */
- s = (int **) malloc(17*sizeof(int *));
- if (s == NULL)
- dpAlign_fatal("Cannot allocate memory for scoring matrix row!\n");
- for (i = 0; i < 17; ++i) {
- s[i] = (int *) malloc(17*sizeof(int));
- if (s[i] == NULL)
- dpAlign_fatal("Cannot allocate memory for scoring matrix col!\n");
- for (j = 0; j < 17; ++j) {
- if (i == 16 || j == 16) s[i][j] = mismatch; /* X mismatches all */
- else if (i == j) s[i][j] = match;
- else s[i][j] = mismatch;
- }
- }
-
-/* initialize the alignment data structure */
- as = init_AlignStruct(seq1, seq2, s, gap, ext);
-
-/* uppercase the sequence and then encode it */
- for (i = 0; i < as->len1; ++i) {
- if (as->seq1[i] >= 'a' && as->seq1[i] <= 'z') as->seq1[i] -= 0x20;
- as->s1[i] = dna_encode(as->seq1[i]);
- }
- for (i = 0; i < as->len2; ++i) {
- if (as->seq2[i] >= 'a' && as->seq2[i] <= 'z') as->seq2[i] -= 0x20;
- as->s2[i] = dna_encode(as->seq2[i]);
- }
-
-/* initialize the spaces arrays */
- as->spc1 = (int *) calloc(as->len1 + 1, sizeof(int));
- if (as->spc1 == NULL)
- dpAlign_fatal("Can't allocate memory for spaces array for seq 1!\n");
- as->spc2 = (int *) calloc(as->len2 + 1, sizeof(int));
- if (as->spc2 == NULL)
- dpAlign_fatal("Can't allocate memory for spaces array for seq 2!\n");
-/* align the subsequences bounded by the end points */
- as->score = align(as->s1, as->s2, as->len1, as->len2, as->s, as->gap, as->ext, as->FF, as->RR, as->spc1, as->spc2);
-/*
- for (i = 0; i < 17; ++i) {
- free(as->s[i]);
- }
- free(as->s);
-*/
-/* set start and end for global alignment */
- as->start1 = 1;
- as->end1 = as->len1;
- as->start2 = 1;
- as->end2 = as->len2;
return traceback(as);
}
-/*
- new_dpAlign_ScoringMatrix instantiate a dpAlign_ScoringMatrix object
- with a alphabet string, gap opening penalty and gap extension penalty.
- The dpAlign_ScoringMatrix object created will have alphabet mapping
- array, gap penalities initialized. Memory will be allocated for the
- scoring matrix s and initialized to zeros.
- */
-dpAlign_ScoringMatrix *
-new_dpAlign_ScoringMatrix(char * alphabet, int gap, int ext)
-{
- int i;
- dpAlign_ScoringMatrix * matrix;
-
- matrix = (dpAlign_ScoringMatrix *) malloc(sizeof(dpAlign_ScoringMatrix));
- if (matrix == NULL)
- dpAlign_fatal("Can't allocate memory for dpAlign_ScoringMatrix!\n");
- matrix->sz = strlen(alphabet);
- matrix->s = (int **) malloc(matrix->sz*sizeof(int *));
- if (matrix->s == NULL)
- dpAlign_fatal("Can't allocate memory for dpAlign_ScoringMatrix's s!\n");
- for (i = 0; i < matrix->sz; ++i) {
- matrix->s[i] = (int *) calloc(matrix->sz, sizeof(int));
- if (matrix->s[i] == NULL)
- dpAlign_fatal("Can't allocate memory for dpAlign_ScoringMatrix's s!\n");
- }
- matrix->gap = gap;
- matrix->ext = ext;
- for (i = 0; i < matrix->sz; ++i)
- matrix->a[alphabet[i]] = i;
- return matrix;
-}
-
-/*
- set_dpAlign_ScoringMatrix initilizes the scoring matrix s of a
- dpAlign_ScoringMatrix object. It only intializes one cell. Therefore
- to use this function, you need to wrap it with two loops over the
- alphabet set to initialize all the fields.
- */
-void
-set_dpAlign_ScoringMatrix(dpAlign_ScoringMatrix * matrix, char * row, char * col, int val)
-{
- matrix->s[matrix->a[row[0]]][matrix->a[col[0]]] = val;
-}
-
-/*
- dpAlign_Protein_Profile creates a sequence profile from a protein
- sequence.
- */
-dpAlign_SequenceProfile *
-dpAlign_Protein_Profile(char * seq1, dpAlign_ScoringMatrix * matrix)
-{
- int i, j;
- unsigned char * s1;
- int gap = 7;
- int ext = 1;
- int sz = 24;
- int * smp; /* scoring matrix pointer */
- int * pwaa;
- dpAlign_SequenceProfile * sp;
-
- if (seq1 == NULL)
- dpAlign_fatal("Sequence 1 is a NULL pointer!\n");
- sp = (dpAlign_SequenceProfile *) malloc(sizeof(dpAlign_SequenceProfile));
- if (sp == NULL)
- dpAlign_fatal("Can't allocate memory for Sequence Profile!\n");
- sp->len = strlen(seq1);
- s1 = (unsigned char *) malloc(sp->len*sizeof(unsigned char));
- if (s1 == NULL)
- dpAlign_fatal("Cannot allocate memory for encoded sequence 1!\n");
- if (matrix == NULL) {
- sp->a['A'] = 0x00; sp->a['R'] = 0x01; sp->a['N'] = 0x02; sp->a['D'] = 0x03;
- sp->a['C'] = 0x04; sp->a['Q'] = 0x05; sp->a['E'] = 0x06; sp->a['G'] = 0x07;
- sp->a['H'] = 0x08; sp->a['I'] = 0x09; sp->a['L'] = 0x0a; sp->a['K'] = 0x0b;
- sp->a['M'] = 0x0c; sp->a['F'] = 0x0d; sp->a['P'] = 0x0e; sp->a['S'] = 0x0f;
- sp->a['T'] = 0x10; sp->a['W'] = 0x11; sp->a['Y'] = 0x12; sp->a['V'] = 0x13;
- sp->a['B'] = 0x14; sp->a['Z'] = 0x15; sp->a['X'] = 0x16; sp->a['*'] = 0x17;
- }
- else {
- gap = matrix->gap;
- ext = matrix->ext;
- sz = matrix->sz;
- for (i = 0; i < 256; ++i)
- sp->a[i] = matrix->a[i];
- }
- for (i = 0; i < sp->len; ++i) {
- if (seq1[i] >= 'a' && seq1[i] <= 'z') seq1[i] -= 0x20;
- s1[i] = sp->a[seq1[i]];
- }
-
- sp->waa = (int *) malloc(sizeof(int)*sz*sp->len);
- if (sp->waa == NULL)
- dpAlign_fatal("Can't allocate memory for waa!\n");
- pwaa = sp->waa;
- if (matrix == NULL) {
- for (i = 0; i < sz; ++i) {
- smp = blosum62[i];
- for (j = 0; j < sp->len; ++j)
- *pwaa++ = smp[s1[j]];
- }
- }
- else {
- for (i = 0; i < sz; ++i) {
- smp = matrix->s[i];
- for (j = 0; j < sp->len; ++j) {
-//printf("hi1\t%d\t%d\t%d\t%d\n", i, j, s1[j], smp[s1[j]]);
- *pwaa++ = smp[s1[j]];
-}
- }
- }
- sp->gap = gap;
- sp->ext = ext;
- sp->type = 2;
- free(s1);
- return sp;
-}
-
-/*
- dpAlign_Local_Protein_PhilGreen compares a protein sequence profile
- with a protein sequence and return the optimal local alignment score.
- */
-int
-dpAlign_Local_Protein_PhilGreen(dpAlign_SequenceProfile * sp, char * seq2)
-{
- int i;
- int N;
- struct swstr * ss;
- int score;
- unsigned char * s2;
-
- if (seq2 == NULL)
- dpAlign_fatal("Sequence 2 is a NULL pointer!\n");
-
- N = strlen(seq2);
-
- s2 = (unsigned char *) malloc(N*sizeof(unsigned char));
- if (s2 == NULL)
- dpAlign_fatal("Cannot allocate memory for encoded sequence 2!\n");
-
- for (i = 0; i < N; ++i) {
- if (seq2[i] >= 'a' && seq2[i] <= 'z') seq2[i] -= 0x20;
- s2[i] = sp->a[seq2[i]];
- }
-
- ss = (struct swstr *) malloc(sizeof(struct swstr)*(sp->len+1));
- if (ss == NULL)
- dpAlign_fatal("Cannot allocate memory for ss array!\n");
- score = pgreen(sp->waa, sp->len, s2, N, sp->gap, sp->ext, ss);
- free(ss);
- free(s2);
- return score;
-}
-
-/*
- dpAlign_Protein_Profile creates a sequence profile from a DNA sequence.
- */
-dpAlign_SequenceProfile *
-dpAlign_DNA_Profile(char * seq1, int match, int mismatch, int gap, int ext)
-{
- int i, j;
- unsigned char * s1;
- int * pwaa;
- dpAlign_SequenceProfile * sp;
-
- if (seq1 == NULL)
- dpAlign_fatal("Sequence 1 is a NULL pointer!\n");
- sp = (dpAlign_SequenceProfile *) malloc(sizeof(dpAlign_SequenceProfile));
- if (sp == NULL)
- dpAlign_fatal("Can't allocate memory for Sequence Profile!\n");
- sp->len = strlen(seq1);
- s1 = (unsigned char *) malloc(sp->len*sizeof(unsigned char));
- if (s1 == NULL)
- dpAlign_fatal("Cannot allocate memory for encoded sequence 1!\n");
- for (i = 0; i < sp->len; ++i) {
- if (seq1[i] >= 'a' && seq1[i] <= 'z') seq1[i] -= 0x20;
- s1[i] = dna_encode(seq1[i]);
- }
- sp->waa = (int *) malloc(sizeof(int)*24*sp->len);
- if (sp->waa == NULL)
- dpAlign_fatal("Can't allocate memory for waa!\n");
- pwaa = sp->waa;
- for (i = 0; i < 17; ++i)
- for (j = 0; j < sp->len; ++j)
- *pwaa++ = s1[j] == (unsigned char) i ? match : mismatch;
- sp->gap = gap;
- sp->ext = ext;
- sp->type = 1;
- free(s1);
- return sp;
-}
-
-/*
- dpAlign_Local_DNA_PhilGreen compares a DNA sequence profile
- with a DNA sequence and return the optimal local alignment score.
- */
-int
-dpAlign_Local_DNA_PhilGreen(dpAlign_SequenceProfile * sp, char * seq2)
-{
- int i;
- int N;
- struct swstr * ss;
- int score;
- unsigned char * s2;
-
- if (seq2 == NULL)
- dpAlign_fatal("Sequence 2 is a NULL pointer!\n");
-
- N = strlen(seq2);
-
- s2 = (unsigned char *) malloc(N*sizeof(unsigned char));
- if (s2 == NULL)
- dpAlign_fatal("Cannot allocate memory for encoded sequence 2!\n");
-
- for (i = 0; i < N; ++i) {
- if (seq2[i] >= 'a' && seq2[i] <= 'z') seq2[i] -= 0x20;
- s2[i] = dna_encode(seq2[i]);
- }
-
- ss = (struct swstr *) malloc(sizeof(struct swstr)*(sp->len+1));
- if (ss == NULL)
- dpAlign_fatal("Cannot allocate memory for ss array!\n");
- score = pgreen(sp->waa, sp->len, s2, N, sp->gap, sp->ext, ss);
- free(ss);
- free(s2);
- return score;
-}
-
/*
dpAlign_Local_Protein_MillerMyers implements the Miller-Myers algorithm
that aligns protein sequences as defined in their 1988 paper. It takes
@@ -403,14 +143,10 @@ dpAlign_Local_DNA_PhilGreen(dpAlign_SequenceProfile * sp, char * seq2)
structure that can be easily converted into a Bio::SimpleAlign object.
*/
dpAlign_AlignOutput *
-dpAlign_Local_Protein_MillerMyers(char * seq1, char * seq2, dpAlign_ScoringMatrix * matrix)
+dpAlign_Local_Protein_MillerMyers(char * seq1, char * seq2, char * matrix)
{
sw_AlignStruct * as;
int ** s;
- int * a; /* alphabet array */
- int gap = 7;
- int ext = 1;
- int sz = 24; /* size of alphabet */
int i, j;
if (seq1 == NULL)
@@ -419,162 +155,36 @@ dpAlign_Local_Protein_MillerMyers(char * seq1, char * seq2, dpAlign_ScoringMatri
dpAlign_fatal("Sequence 2 is a NULL pointer!\n");
/* initialize the scoring matrix */
- if (matrix == NULL) {
- s = (int **) malloc(sz*sizeof(int *));
+ s = (int **) malloc(24*sizeof(int *));
if (s == NULL)
dpAlign_fatal("Cannot allocate memory for scoring matrix row!\n");
- for (i = 0; i < sz; ++i) {
+ for (i = 0; i < 24; ++i) {
s[i] = (int *) malloc(24*sizeof(int));
if (s[i] == NULL)
dpAlign_fatal("Cannot allocate memory for scoring matrix col!\n");
- for (j = 0; j < sz; ++j) {
+ for (j = 0; j < 24; ++j) {
s[i][j] = blosum62[i][j];
}
}
- a = (int *) malloc(256*sizeof(int));
- if (a == NULL)
- dpAlign_fatal("Cannot allocate memory for protein encoding array!\n");
-
- a['A'] = 0x00; a['R'] = 0x01; a['N'] = 0x02; a['D'] = 0x03;
- a['C'] = 0x04; a['Q'] = 0x05; a['E'] = 0x06; a['G'] = 0x07;
- a['H'] = 0x08; a['I'] = 0x09; a['L'] = 0x0a; a['K'] = 0x0b;
- a['M'] = 0x0c; a['F'] = 0x0d; a['P'] = 0x0e; a['S'] = 0x0f;
- a['T'] = 0x10; a['W'] = 0x11; a['Y'] = 0x12; a['V'] = 0x13;
- a['B'] = 0x14; a['Z'] = 0x15; a['X'] = 0x16; a['*'] = 0x17;
- }
- else {
- a = matrix->a;
- s = matrix->s;
- gap = matrix->gap;
- ext = matrix->ext;
- sz = matrix->sz;
- }
/* initialize alignment data structure */
- as = init_AlignStruct(seq1, seq2, s, gap, ext);
+ as = init_AlignStruct(seq1, seq2, s, 10, 2);
/* uppercase the sequence and encode it */
for (i = 0; i < as->len1; ++i) {
if (as->seq1[i] >= 'a' && as->seq1[i] <= 'z') as->seq1[i] -= 0x20;
- as->s1[i] = a[as->seq1[i]];
+ as->s1[i] = prot_encode(as->seq1[i]);
}
for (i = 0; i < as->len2; ++i) {
if (as->seq2[i] >= 'a' && as->seq2[i] <= 'z') as->seq2[i] -= 0x20;
- as->s2[i] = a[as->seq2[i]];
+ as->s2[i] = prot_encode(as->seq2[i]);
}
- if (matrix == NULL)
- free(a); /* free array after encoding */
-
/* locate the end points of the subsequence that results in the maximal score */
find_ends(as);
/* align the subsequences bounded by the end points */
as->score = align(as->s1 + as->start1 - 1, as->s2 + as->start2 - 1, as->end1 - as->start1 + 1, as->end2 - as->start2 + 1, as->s, as->gap, as->ext, as->FF, as->RR, as->spc1, as->spc2);
-/* free scoring matrix
- for (i = 0; i < sz; ++i) {
- free(as->s[i]);
- }
- free(as->s);
-*/
- return traceback(as);
-}
-
-/*
- dpAlign_Global_Protein_MillerMyers implements the Miller-Myers algorithm
- that aligns protein sequences as defined in their 1988 paper. It takes
- two character strings seq1 and seq2, the name of a scoring matrix.
- Currently, we only support "BLOSUM62" matrix.
- dpAlign_Local_Protein_MillerMyers returns a dpAlign_AlignOutput data
- structure that can be easily converted into a Bio::SimpleAlign object.
- */
-dpAlign_AlignOutput *
-dpAlign_Global_Protein_MillerMyers(char * seq1, char * seq2, dpAlign_ScoringMatrix * matrix)
-{
- sw_AlignStruct * as;
- int ** s;
- int * a; /* alphabet array */
- int gap = 7;
- int ext = 1;
- int sz = 24; /* size of alphabet */
- int i, j;
-
- if (seq1 == NULL)
- dpAlign_fatal("Sequence 1 is a NULL pointer!\n");
- if (seq2 == NULL)
- dpAlign_fatal("Sequence 2 is a NULL pointer!\n");
-
-/* initialize the scoring matrix */
- if (matrix == NULL) {
- s = (int **) malloc(sz*sizeof(int *));
- if (s == NULL)
- dpAlign_fatal("Cannot allocate memory for scoring matrix row!\n");
- for (i = 0; i < sz; ++i) {
- s[i] = (int *) malloc(sz*sizeof(int));
- if (s[i] == NULL)
- dpAlign_fatal("Cannot allocate memory for scoring matrix col!\n");
- for (j = 0; j < sz; ++j) {
- s[i][j] = blosum62[i][j];
- }
- }
- a = (int *) malloc(256*sizeof(int));
- if (a == NULL)
- dpAlign_fatal("Cannot allocate memory for protein encoding array!\n");
-
- a['A'] = 0x00; a['R'] = 0x01; a['N'] = 0x02; a['D'] = 0x03;
- a['C'] = 0x04; a['Q'] = 0x05; a['E'] = 0x06; a['G'] = 0x07;
- a['H'] = 0x08; a['I'] = 0x09; a['L'] = 0x0a; a['K'] = 0x0b;
- a['M'] = 0x0c; a['F'] = 0x0d; a['P'] = 0x0e; a['S'] = 0x0f;
- a['T'] = 0x10; a['W'] = 0x11; a['Y'] = 0x12; a['V'] = 0x13;
- a['B'] = 0x14; a['Z'] = 0x15; a['X'] = 0x16; a['*'] = 0x17;
- }
- else {
- a = matrix->a;
- s = matrix->s;
- gap = matrix->gap;
- ext = matrix->ext;
- sz = matrix->sz;
- }
-
-/* initialize alignment data structure */
- as = init_AlignStruct(seq1, seq2, s, gap, ext);
-
-/* uppercase the sequence and encode it */
- for (i = 0; i < as->len1; ++i) {
- if (as->seq1[i] >= 'a' && as->seq1[i] <= 'z') as->seq1[i] -= 0x20;
- as->s1[i] = a[as->seq1[i]];
- }
- for (i = 0; i < as->len2; ++i) {
- if (as->seq2[i] >= 'a' && as->seq2[i] <= 'z') as->seq2[i] -= 0x20;
- as->s2[i] = a[as->seq2[i]];
- }
-
- if (matrix == NULL)
- free(a); /* free array after encoding */
-
-/* initialize the spaces arrays */
- as->spc1 = (int *) calloc(as->len1 + 1, sizeof(int));
- if (as->spc1 == NULL)
- dpAlign_fatal("Can't allocate memory for spaces array for seq 1!\n");
- as->spc2 = (int *) calloc(as->len2 + 1, sizeof(int));
- if (as->spc2 == NULL)
- dpAlign_fatal("Can't allocate memory for spaces array for seq 2!\n");
-/* align the subsequences bounded by the end points */
- as->score = align(as->s1, as->s2, as->len1, as->len2, as->s, as->gap, as->ext, as->FF, as->RR, as->spc1, as->spc2);
-
-/* free scoring matrix
- for (i = 0; i < sz; ++i) {
- free(as->s[i]);
- }
- free(as->s);
-*/
-
-/* set start and end for global alignment */
- as->start1 = 1;
- as->end1 = as->len1;
- as->start2 = 1;
- as->end2 = as->len2;
-
return traceback(as);
}
@@ -642,6 +252,9 @@ traceback(sw_AlignStruct * as)
int i, j, k;
/* free all allocated memory before we exit this module */
+ for (i = 0; i < 4; ++i)
+ free(as->s[i]);
+ free(as->s);
free(as->s1);
free(as->s2);
free(as->FF);
@@ -651,7 +264,6 @@ traceback(sw_AlignStruct * as)
if (ao == NULL)
dpAlign_fatal("Can't allocate memory for AlignOutput!\n");
- ao->score = as->score;
/* insert gaps to sequence 1 */
k = 0;
for (i = 0; i <= aln1_len; ++i) {
@@ -768,7 +380,6 @@ find_ends(sw_AlignStruct * as)
}
found:
- as->score = score1;
/* initialize the spaces arrays */
as->spc1 = (int *) calloc(as->end1 - as->start1 + 2, sizeof(int));
if (as->spc1 == NULL)
View
4 Bio/Ext/Align/libs/makefile
@@ -42,8 +42,8 @@ OBJS = aln.o\
libsw.a : $(OBJS)
ar ru libsw.a $(OBJS)
-#wisefile.o : wisefile.c
-# $(CC) $(CFLAGS) -DNOERROR wisefile.c
+wisefile.o : wisefile.c
+ $(CC) $(CFLAGS) -DNOERROR wisefile.c
#
# For NetBSD or Sun (solaris) installs, add -fPIC to the CFLAGS lines
View
24 Bio/Ext/Align/md_20.mat
@@ -1,24 +0,0 @@
- A R N D C Q E G H I L K M F P S T W Y V B Z X
-A 10 -10 -9 -8 -10 -10 -7 -5 -12 -10 -12 -11 -9 -15 -5 -2 -1 -17 -16 -3 -9 -8 -1
-R -10 12 -10 -14 -7 -3 -11 -6 -3 -14 -12 0 -11 -18 -9 -7 -9 -6 -14 -14 -12 -7 -1
-N -9 -10 13 -1 -11 -8 -9 -8 -2 -11 -15 -4 -12 -16 -13 -1 -4 -18 -9 -14 6 -8 -1
-D -8 -14 -1 12 -16 -9 1 -6 -7 -16 -18 -11 -15 -20 -15 -9 -11 -20 -11 -12 6 -4 -1
-C -10 -7 -11 -16 17 -16 -19 -9 -9 -14 -13 -17 -12 -8 -14 -4 -11 -7 -4 -10 -14 -17 -1
-Q -10 -3 -8 -9 -16 13 -3 -12 0 -16 -9 -3 -11 -18 -5 -10 -10 -14 -12 -14 -9 5 -1
-E -7 -11 -9 1 -19 -3 11 -7 -12 -16 -17 -5 -14 -20 -14 -12 -12 -17 -18 -11 -4 4 -1
-G -5 -6 -8 -6 -9 -12 -7 11 -13 -17 -18 -12 -15 -19 -12 -5 -11 -10 -17 -11 -7 -9 -1
-H -12 -3 -2 -7 -9 0 -12 -13 15 -14 -10 -9 -12 -11 -7 -8 -10 -16 0 -15 -4 -6 -1
-I -10 -14 -11 -16 -14 -16 -16 -17 -14 12 -4 -14 -1 -8 -15 -11 -4 -16 -12 2 -13 -16 -1
-L -12 -11 -15 -18 -13 -9 -17 -18 -10 -4 10 -15 -2 -4 -7 -10 -12 -10 -13 -5 -17 -13 -1
-K -11 0 -4 -12 -17 -3 -5 -12 -9 -14 -15 12 -9 -21 -13 -10 -7 -16 -17 -15 -8 -4 -1
-M -9 -11 -12 -15 -12 -11 -15 -16 -12 -1 -2 -9 15 -10 -14 -12 -4 -13 -14 -3 -13 -13 -1
-F -15 -19 -16 -19 -8 -18 -20 -19 -11 -8 -4 -19 -10 13 -14 -8 -15 -10 0 -9 -17 -19 -1
-P -5 -9 -13 -15 -14 -5 -14 -12 -7 -15 -7 -13 -14 -14 12 -3 -7 -18 -16 -13 -14 -10 -1
-S -2 -8 -1 -9 -4 -10 -12 -5 -8 -11 -10 -10 -12 -8 -3 10 -1 -12 -9 -11 -5 -11 -1
-T -1 -9 -4 -11 -10 -10 -12 -11 -10 -4 -12 -7 -4 -15 -7 -1 11 -16 -14 -7 -7 -11 -1
-W -17 -6 -18 -18 -7 -14 -18 -10 -17 -17 -10 -17 -14 -10 -18 -12 -15 18 -9 -13 -18 -16 -1
-Y -16 -14 -9 -11 -4 -12 -18 -17 0 -12 -12 -17 -14 0 -16 -9 -13 -9 14 -15 -10 -15 -1
-V -3 -14 -14 -12 -9 -14 -11 -11 -15 2 -5 -15 -2 -9 -13 -11 -7 -13 -14 11 -13 -12 -1
-B -9 -12 6 6 -14 -9 -4 -7 -4 -13 -17 -8 -13 -18 -14 -5 -7 -19 -10 -13 12 -6 -1
-Z -12 -13 -13 -4 -27 4 10 -13 -12 -24 -21 -6 -20 -29 -17 -17 -17 -24 -24 -18 -6 12 -1
-X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
View
89 Bio/Ext/Align/test.pl
@@ -1,97 +1,38 @@
-# test framework rewritten by Jason Stajich
+#!/usr/local/bin/perl
## Test framework for pSW XS stuff
## $Id$
## We start with some black magic to print on failure.
-my $DEBUG = $ENV{'BIOPERLDEBUG'} || 0;
-BEGIN {
- eval { require Test; };
- use Test;
- plan tests => 9;
-}
+BEGIN { $| = 1; print "1..2\n"; }
+END {print "not ok 1\n" unless $loaded;}
use Bio::Ext::Align;
-use Bio::Tools::dpAlign;
-use Bio::Seq;
-use Bio::AlignIO;
+use lib '.';
$loaded = 1;
-ok(1); # modules loaded
+print "ok 1\n"; # 1st test passes.
+
+print "\n2..2\nTesting bp_sw with a protein alignment...\n\n";
&Bio::Ext::Align::change_max_BaseMatrix_kbytes(20000);
$cm = &Bio::Ext::Align::CompMat::read_Blast_file_CompMat("blosum62.bla");
-ok($cm);
$seq1 = &Bio::Ext::Align::new_Sequence_from_strings("one","WLGQRNLVSSTGGNLLNVWLKDW");
-ok($seq1);
$seq2 = &Bio::Ext::Align::new_Sequence_from_strings("two","WMGNRNVVNLLNVWFRDW");
-ok($seq2);
-
-$alb = &Bio::Ext::Align::Align_Sequences_ProteinSmithWaterman($seq1,$seq2,
- $cm,-12,-2);
-&Bio::Ext::Align::write_pretty_str_align($alb,$seq1->name,
- $seq1->seq,$seq2->name,
- $seq2->seq,15,50,STDERR) if $DEBUG;
-
-
-warn( "Testing Local Alignment case...\n") if $DEBUg;
-
-$alnout = new Bio::AlignIO(-format => 'pfam', -fh => \*STDERR);
-$aln = &Bio::Ext::Align::Align_DNA_Sequences("AATGCCATTGACGG",
- "CAGCCTCGCTTAG",3,-1,3,1,
- Bio::Tools::dpAlign::DPALIGN_LOCAL_MILLER_MYERS);
-
-$out = Bio::SimpleAlign->new();
-
-$out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln1,
- -start => $aln->start1,
- -end => $aln->end1,
- -id => "one"));
-
-$out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln2,
- -start => $aln->start2,
- -end => $aln->end2,
- -id => "two"));
-$alnout->write_aln($out) if $DEBUG;
-$aln = &Bio::Ext::Align::Align_Protein_Sequences("WLGQRNLVSSTGGNLLNVWLKDW","WMGNRNVVNLLNVWFRDW",0,
- Bio::Tools::dpAlign::DPALIGN_LOCAL_MILLER_MYERS);
-$out = Bio::SimpleAlign->new();
-ok($aln);
+$alb = &Bio::Ext::Align::Align_Sequences_ProteinSmithWaterman($seq1,$seq2,$cm,-12,-2);
-$out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln1,
- -start => $aln->start1,
- -end => $aln->end1,
- -id => "one"));
+&Bio::Ext::Align::write_pretty_str_align($alb,$seq1->name,$seq1->seq,$seq2->name,$seq2->seq,15,50,STDERR);
-$out->add_seq(Bio::LocatableSeq->new(-seq => $aln->aln2,
- -start => $aln->start2,
- -end => $aln->end2,
- -id => "two"));
-$alnout->write_aln($out) if $DEBUG;
-ok(1);
+print "ok 2\n"; # Assume 2nd test worked.
-warn( "Testing Global Alignment case...\n") if $DEBUG;
+## End of black magic.
+##
+## Insert additional test code below but remember to change
+## the print "1..x\n" in the BEGIN block to reflect the
+## total number of tests that will be run.
-$factory = new Bio::Tools::dpAlign('-alg' => Bio::Tools::dpAlign::DPALIGN_GLOBAL_MILLER_MYERS);
-$s1 = new Bio::Seq(-id => "one", -seq => "AATGCCATTGACGG", -alphabet => 'dna');
-$s2 = new Bio::Seq(-id => "two", -seq => "CAGCCTCGCTTAG", -alphabet => 'dna');
-$aln = $factory->pairwise_alignment($s1, $s2);
-$alnout->write_aln($aln) if $DEBUG;
-$factory->align_and_show($s1, $s2) if $DEBUG;
-ok(1);
-$s1 = new Bio::Seq(-id => "one", -seq => "WLGQRNLVSSTGGNLLNVWLKDW",
- -alphabet => 'protein');
-$s2 = new Bio::Seq(-id => "two", -seq => "WMGNRNVVNLLNVWFRDW",
- -alphabet => 'protein');
-$aln = $factory->pairwise_alignment($s1, $s2);
-$alnout->write_aln($aln) if $DEBUG;
-$factory->align_and_show($s1, $s2) if $DEBUG;
-ok(1);
-$prof = $factory->sequence_profile($s1);
-warn( "Optimal Alignment Score = %d\n", $factory->pairwise_alignment_score($prof, $s2)) if $DEBUG;
-ok($factory->pairwise_alignment_score($prof,$s2),77);
View
11 Bio/Ext/Align/typemap
@@ -352,21 +352,12 @@ T_bp_sw_Translation
TYPEMAP
dpAlign_AlignOutput * T_AlignOutput
-dpAlign_SequenceProfile * T_SequenceProfile
-dpAlign_ScoringMatrix * T_ScoringMatrix
INPUT
T_AlignOutput
$var = ($type) (SvROK($arg) == 0 ? ($type) NULL : ($type) SvIV((SV*)SvRV($arg)))
-T_SequenceProfile
- $var = ($type) (SvROK($arg) == 0 ? ($type) NULL : ($type) SvIV((SV*)SvRV($arg)))
-T_ScoringMatrix
- $var = ($type) (SvROK($arg) == 0 ? ($type) NULL : ($type) SvIV((SV*)SvRV($arg)))
OUTPUT
T_AlignOutput
sv_setref_pv($arg, "Bio::Ext::Align::AlignOutput", (void*) $var);
-T_SequenceProfile
- sv_setref_pv($arg, "Bio::Ext::Align::SequenceProfile", (void*) $var);
-T_ScoringMatrix
- sv_setref_pv($arg, "Bio::Ext::Align::ScoringMatrix", (void*) $var);
+
View
49 Bio/Ext/HMM/HMM.pm
@@ -1,49 +0,0 @@
-
-package Bio::Ext::HMM;
-
-use vars qw($AUTOLOAD @ISA @EXPORT_OK $dl_debug);
-use Exporter;
-use Carp;
-use strict;
-
-use DynaLoader;
-
-
-@ISA = qw(Exporter DynaLoader);
-# Items to export into callers namespace by default. Note: do not export
-# names by default without a very good reason. Use EXPORT_OK instead.
-# Do not simply export all your public functions/methods/constants.
-
-sub AUTOLOAD {
- # This AUTOLOAD is used to 'autoload' constants from the constant()
- # XS function. If a constant is not found then control is passed
- # to the AUTOLOAD in AutoLoader.
-
- my $constname;
- ($constname = $AUTOLOAD) =~ s/.*:://;
- my $val = constant($constname, @_ ? $_[0] : 0);
- if ($! != 0) {
- if ($! =~ /Invalid/) {
- $AutoLoader::AUTOLOAD = $AUTOLOAD;
- goto &AutoLoader::AUTOLOAD;
- }
- else {
- croak "Your vendor has not defined Test macro $constname";
- }
- }
- eval "sub $AUTOLOAD { $val }";
- goto &$AUTOLOAD;
-}
-
-BEGIN {
- $dl_debug = 40;
-}
-
-bootstrap Bio::Ext::HMM; # hopefully has the correct things...
-
-# Preloaded methods go here.
-
-# Autoload methods go after __END__, and are processed by the autosplit program.
-
-1;
-__END__
View
207 Bio/Ext/HMM/HMM.xs
@@ -1,207 +0,0 @@
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-#ifdef __cplusplus
-}
-#endif
-
-#include "hmm.h"
-
-MODULE = Bio::Ext::HMM PACKAGE = Bio::Ext::HMM
-
-void
-HMM_statistical_training(class, hmm, obs, hs)
- char * class
- HMM * hmm
- SV * obs
- SV * hs
- PPCODE:
- AV * obs_av = (AV *) SvRV(obs);
- AV * hs_av = (AV *) SvRV(hs);
- int i;
- int avlen = av_len(obs_av);
- char ** obs_ar = (char **) malloc(avlen*sizeof(char *));
- char ** hs_ar = (char **) malloc(avlen*sizeof(char *));
- int * obs_len = (int *) malloc(avlen*sizeof(int));
- if (obs_ar == NULL || hs_ar == NULL)
- croak("Can't allocate memory for observation and/or state arrays!\n");
- if (obs_len == NULL)
- croak("Can't allocate memory for observation length array!\n");
- for (i = 0; i < avlen; ++i) {
- obs_ar[i] = (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na);
- obs_len[i] = strlen(obs_ar[i]);
- obs_ar[i] = (char *) malloc((obs_len[i]+1)*sizeof(char));
- if (obs_ar[i] == NULL)
- croak("Can't allocate memory for observation array!\n");
- strcpy(obs_ar[i], (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na));
- hs_ar[i] = (char *) malloc((obs_len[i]+1)*sizeof(char));
- if (hs_ar[i] == NULL)
- croak("Can't allocate memory for state array!\n");
- strcpy(hs_ar[i], (char *) SvPV(*av_fetch(hs_av, i, 0), PL_na));
- omap(hmm, obs_ar[i], obs_len[i]);
- smap(hmm, hs_ar[i], obs_len[i]);
- }
- state_est(hmm, obs_ar, hs_ar, obs_len, avlen);
- for (i = 0; i < avlen; ++i) {
- free(obs_ar[i]);
- free(hs_ar[i]);
- }
- free(obs_ar);
- free(hs_ar);
- free(obs_len);
-
-double
-HMM_likelihood(class, hmm, seq)
- char * class
- HMM * hmm
- char * seq
- CODE:
- int T = strlen(seq);
- char obs[T+1];
- strcpy(obs, seq);
- omap(hmm, obs, T);
- RETVAL = Palpha(hmm, obs, T);
- OUTPUT:
- RETVAL
-
-void
-HMM_baum_welch_training(class, hmm, obs)
- char * class
- HMM * hmm
- SV * obs
- PPCODE:
- AV * obs_av = (AV *) SvRV(obs);
- int i;
- int avlen = av_len(obs_av);
- char ** obs_ar = (char **) malloc(avlen*sizeof(char *));
- int * obs_len = (int *) malloc(avlen*sizeof(int));
- if (obs_ar == NULL)
- croak("Can't allocate memory for observation arrays!\n");
- if (obs_len == NULL)
- croak("Can't allocate memory for observation length array!\n");
- for (i = 0; i < avlen; ++i) {
- obs_ar[i] = (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na);
- obs_len[i] = strlen(obs_ar[i]);
- obs_ar[i] = (char *) malloc((obs_len[i]+1)*sizeof(char));
- if (obs_ar[i] == NULL)
- croak("Can't allocate memory for observation array!\n");
- strcpy(obs_ar[i], (char *) SvPV(*av_fetch(obs_av, i, 0), PL_na));
- omap(hmm, obs_ar[i], obs_len[i]);
- }
- baum_welch(hmm, obs_ar, obs_len, avlen);
- for (i = 0; i < avlen; ++i)
- free(obs_ar[i]);
- free(obs_ar);
-
-SV *
-HMM_viterbi(class, hmm, seq)
- char * class
- HMM * hmm
- char * seq
- PPCODE:
- SV * sv;
- int T = strlen(seq);
- char * hss = (char *) malloc(T*sizeof(char));
- char obs[T+1];
- if (hss == NULL)
- croak("Can't allocate memory for hidden state sequence!\n");
- strcpy(obs, seq);
- omap(hmm, obs, T);
- viterbi(hmm, hss, obs, T);
- sv = newSVpv(hss, strlen(hss));
- free(hss);
- PUSHs(sv_2mortal(sv));
-
-
-MODULE = Bio::Ext::HMM PACKAGE = Bio::Ext::HMM::HMM
-
-HMM *
-new(class, symbols, states)
- char * class
- char * symbols
- char * states
- PPCODE:
- HMM * out;
- out = HMM_new(symbols, states);
- ST(0) = sv_newmortal();
- sv_setref_pv(ST(0), class, (void *) out);
- XSRETURN(1);
-
-double
-get_init_entry(class, hmm, state)
- char * class
- HMM * hmm
- char * state
- CODE:
- RETVAL = HMM_get_init_entry(hmm, state);
- OUTPUT:
- RETVAL
-
-void
-set_init_entry(class, hmm, state, val)
- char * class
- HMM * hmm
- char * state
- double val
- CODE:
- HMM_set_init_entry(hmm, state, val);
-
-double
-get_a_entry(class, hmm, state1, state2)
- char * class
- HMM * hmm
- char * state1
- char * state2
- CODE:
- RETVAL = HMM_get_a_entry(hmm, state1, state2);
- OUTPUT:
- RETVAL
-
-void
-set_a_entry(class, hmm, state1, state2, val)
- char * class
- HMM * hmm
- char * state1
- char * state2
- double val
- CODE:
- HMM_set_a_entry(hmm, state1, state2, val);
-
-double
-get_e_entry(class, hmm, state, symbol)
- char * class
- HMM * hmm
- char * state
- char * symbol
- CODE:
- RETVAL = HMM_get_e_entry(hmm, state, symbol);
- OUTPUT:
- RETVAL
-
-void
-set_e_entry(class, hmm, state, symbol, val)
- char * class
- HMM * hmm
- char * state
- char * symbol
- double val
- CODE:
- HMM_set_e_entry(hmm, state, symbol, val);
-
-void
-DESTROY(obj)
- HMM * obj
- PPCODE:
- int i;
- free(obj->init);
- for (i = 0; i < obj->N; ++i)
- free(obj->a_mat[i]);
- free(obj->a_mat);
- for (i = 0; i < obj->N; ++i)
- free(obj->e_mat[i]);
- free(obj->e_mat);
-
View
13 Bio/Ext/HMM/Makefile.PL
@@ -1,13 +0,0 @@
-use ExtUtils::MakeMaker;
-# See lib/ExtUtils/MakeMaker.pm for details of how to influence
-# the contents of the Makefile that is written.
-WriteMakefile(
- 'NAME' => 'Bio::Ext::HMM',
- 'VERSION' => '0.1',
- 'LIBS' => ['-lm'], # e.g., '-lm'
- 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING'
- 'INC' => '', # e.g., '-I/usr/include/other'
- 'OBJECT' => 'HMM.o hmmlib.o',
- 'clean' => { 'FILES' => '*.o' }
-);
-
View
36 Bio/Ext/HMM/hmm.h
@@ -1,36 +0,0 @@
-#ifndef _HMM_H_
-#define _HMM_H_
-
-typedef struct HMM
-{
- int M;
- int N;
- char * symbols;
- unsigned char omap[256];
- char * states;
- unsigned char smap[256];
- double * init;
- double ** a_mat;
- double ** e_mat;
-} HMM;
-
-#define UPPER_TOL 0.000001
-#define LOWER_TOL 1e-100
-
-HMM * init_HMM(char *, char *, double *, double **, double **);
-HMM * HMM_new(char *, char *);
-double HMM_get_init_entry(HMM *, char *);
-void HMM_set_init_entry(HMM *, char *, double);
-double HMM_get_a_entry(HMM *, char *, char *);
-void HMM_set_a_entry(HMM *, char *, char *, double);
-double HMM_get_e_entry(HMM *, char *, char *);
-void HMM_set_e_entry(HMM *, char *, char *, double);
-void omap(HMM *, char *, int);
-void smap(HMM *, char *, int);
-double Palpha(HMM *, char *, int);
-double Pbeta(HMM *, char *, int);
-void viterbi(HMM *, char *, char *, int);
-void state_est(HMM *, char **, char **, int *, int);
-void baum_welch(HMM *, char **, int *, int);
-
-#endif
View
779 Bio/Ext/HMM/hmmlib.c
@@ -1,779 +0,0 @@
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <sys/types.h>
-#include "hmm.h"
-
-static unsigned int hmm_seed;
-static void alphaT(HMM *, double *, char *, int);
-static void beta1(HMM *, double *, char *, int);
-static double * xi(HMM *);
-static void alpha_mat(HMM *, double **, char *, int);
-static void beta_mat(HMM *, double **, char *, int);
-static void HMM_fatal(char *);
-static double sumLogProbs(double *, int);
-static double sumLogProb(double, double);
-
-/*
- sumLogProbs adds up the log probabilities stored in the array
- logprobs of size sz by dividing every items with the largest
- item in the array. This prevents underflow such that calculations
- for long sequence are allowed. The function returns the log of the
- sum of log probabilities.
- log(a_0+a_1+...+a_n) = log(a_max)+log(a_0/a_max+a_1/a_max+...+a_n/a_max)
- */
-static double
-sumLogProbs(double * logprobs, int sz)
-{
- double max = 0.0;
- double p = 0.0;
- int i;
- for (i = 0; i < sz; ++i)
- if (i == 0 || logprobs[i] > max)
- max = logprobs[i];
- if (max == log(0))
- return max;
- for (i = 0; i < sz; ++i)
- p += exp(logprobs[i] - max);
- return max + log(p);
-}
-
-/*
- sumLogProb is similar to sumLogProbs except that it is adding up
- only two log probabilities p1 and p2. It returns the log of the sum
- of p1 and p2.
- */
-static double
-sumLogProb(double p1, double p2)
-{
- if (p1 == log(0) && p2 == log(0))
- return p1;
- if (p1 > p2)
- return p1 + log(1 + exp(p2 - p1));
- else
- return p2 + log(1 + exp(p1 - p2));
-}
-
-/*
- HMM_fatal prints the string s to STDERR and then exit
- */
-static void
-HMM_fatal(char * s)
-{
- fprintf(stderr, s);
- exit(-1);
-}
-
-/*
- HMM_new initializes an HMM object with string of all possible
- symbols and string of all possible states. The other parameters
- like initial probabilities (HMM->init), state transition matrix
- (HMM->a_mat) and emission matrix (HMM->e_mat) are initialized
- randomly such that it is ready for training.
- */
-HMM *
-HMM_new(char * symbols, char * states)
-{
- HMM * hmm;
- double sum, random;
- int i, j;
-
- hmm_seed = time(NULL);
- hmm = (HMM *) malloc(sizeof(HMM));
- if (hmm == NULL)
- HMM_fatal("Can't allocate memory for HMM, die!\n");
- hmm->symbols = symbols;
- hmm->states = states;
- hmm->M = strlen(symbols);
- hmm->N = strlen(states);
- for (i = 0; i < hmm->M; ++i)
- hmm->omap[symbols[i]] = i;
- for (i = 0; i < hmm->N; ++i)
- hmm->smap[states[i]] = i;
- hmm->init = (double *) malloc(hmm->N*sizeof(double));
- if (hmm->init == NULL)
- HMM_fatal("Can't allocate memory for init array of HMM, die!\n");
-/* initialize the initial state array */
- sum = 0.0;
- for (j = 0; j < hmm->N; ++j) {
- srand(hmm_seed++);
- random = (double) rand();
- hmm->init[j] = random;
- sum += random;
- }
- for (j = 0; j < hmm->N; ++j)
- hmm->init[j] /= sum;
- hmm->e_mat = (double **) malloc(hmm->N*sizeof(double *));
- hmm->a_mat = (double **) malloc(hmm->N*sizeof(double *));
- if (hmm->a_mat == NULL)
- HMM_fatal("Can't allocate memory for transition matrix of HMM, die!\n");
- for (i = 0; i < hmm->N; ++i) {
- hmm->a_mat[i] = (double *) malloc(hmm->N*sizeof(double));
- if (hmm->a_mat[i] == NULL)
- HMM_fatal("Can't allocate memory for transition matrix of HMM, die!\n");
-
-/* randomize the transition matrix */
- sum = 0.0;
- for (j = 0; j < hmm->N; ++j) {
- srand(hmm_seed++);
- random = (double) rand();
- hmm->a_mat[i][j] = random;
- sum += random;
- }
- for (j = 0; j < hmm->N; ++j)
- hmm->a_mat[i][j] /= sum;
- }
- hmm->e_mat = (double **) malloc(hmm->N*sizeof(double *));
- if (hmm->e_mat == NULL)
- HMM_fatal("Can't allocate memory for emission matrix of HMM, die!\n");
- for (i = 0; i < hmm->N; ++i) {
- hmm->e_mat[i] = (double *) malloc(hmm->M*sizeof(double));
- if (hmm->e_mat[i] == NULL)
- HMM_fatal("Can't allocate memory for emission matrix of HMM, die!\n");
-
-/* randomize the emission matrix */
- sum = 0.0;
- for (j = 0; j < hmm->M; ++j) {
- srand(hmm_seed++);
- random = (double) rand();
- hmm->e_mat[i][j] = random;
- sum += random;
- }
- for (j = 0; j < hmm->M; ++j)
- hmm->e_mat[i][j] /= sum;
- }
- return hmm;
-}
-
-/*
- init_HMM is similar to HMM_new except that it allows you to initialize
- the HMM with your own initial probabilities (init), your own state
- transition matrix (a_mat) and your own emission matrix (e_mat).
- */
-HMM *
-init_HMM(char * symbols, char * states, double * init, double ** a_mat, double ** e_mat)
-{
- HMM * hmm;
- int i;
-
- hmm = (HMM *) calloc(1, sizeof(HMM));
- if (hmm == NULL) {
- fprintf(stderr, "Can't allocate memory for HMM, die!\n");
- exit(-1);
- }
- hmm->symbols = symbols;
- hmm->states = states;
- hmm->M = strlen(symbols);
- hmm->N = strlen(states);
- for (i = 0; i < hmm->M; ++i)
- hmm->omap[symbols[i]] = i;
- for (i = 0; i < hmm->N; ++i)
- hmm->smap[states[i]] = i;
- hmm->init = init;
- hmm->a_mat = a_mat;
- hmm->e_mat = e_mat;
- return hmm;
-}
-
-/*
- HMM_get_init_entry returns the value of a cell in the initial
- probability array based on the state supplied.
- HMM_get_init_entry is written such that XS can access the C array.
- */
-double
-HMM_get_init_entry(HMM * hmm, char * state)
-{
- return hmm->init[hmm->smap[state[0]]];
-}
-
-/*
- HMM_set_init_entry sets the value of a cell in the initial
- probability array based on the val supplied.
- HMM_set_init_entry is written such that XS can modify the C array.
- */
-void
-HMM_set_init_entry(HMM * hmm, char * state, double val)
-{
- hmm->init[hmm->smap[state[0]]] = val;
-}
-
-/*
- HMM_get_a_entry returns the value of a cell in the state transition
- matrix based on the from-state (state1) and the to-state (state2)
- supplied.
- HMM_get_a_entry is written such that XS can access the C matrix.
- */
-double
-HMM_get_a_entry(HMM * hmm, char * state1, char * state2)
-{
- return hmm->a_mat[hmm->smap[state1[0]]][hmm->smap[state2[0]]];
-}
-
-/*
- HMM_set_a_entry sets the value of a cell in the state transition
- matrix to val based on the from-state (state1) and the to-state
- (state2) supplied.
- HMM_set_a_entry is written such that XS can modify the C matrix.
- */
-void
-HMM_set_a_entry(HMM * hmm, char * state1, char * state2, double val)
-{
- hmm->a_mat[hmm->smap[state1[0]]][hmm->smap[state2[0]]] = val;
-}
-
-/*
- HMM_get_e_entry returns the value of a cell in the emission
- matrix based on the from-state (state1) and the to-state (state2)
- supplied.
- HMM_get_e_entry is written such that XS can access the C matrix.
- */
-double
-HMM_get_e_entry(HMM * hmm, char * state, char * symbol)
-{
- return hmm->e_mat[hmm->smap[state[0]]][hmm->omap[symbol[0]]];
-}
-
-/*
- HMM_set_e_entry sets the value of a cell in the emission matrix
- to val based on the from-state (state1) and the to-state (state2)
- supplied.
- HMM_set_e_entry is written such that XS can modify the C matrix.
- */
-void
-HMM_set_e_entry(HMM * hmm, char * state, char * symbol, double val)
-{
- hmm->e_mat[hmm->smap[state[0]]][hmm->omap[symbol[0]]] = val;
-}
-
-/*
- omap takes a symbol character and return its index in the state
- transition matrix.
- */
-void
-omap(HMM * hmm, char * c, int L)
-{
- int i;
-
- for (i = 0; i < L; ++i)
- c[i] = hmm->omap[c[i]];
-}
-
-/*
- smap takes a state character and return its index in the emission
- matrix.
- */
-void
-smap(HMM * hmm, char * c, int L)
-{
- int i;
-
- for (i = 0; i < L; ++i)
- c[i] = hmm->smap[c[i]];
-}
-
-/*
- alpha function for HMM. It computes the alpha function value
- at the time T which is the length of the observation sequence
- obs for every state. Note that alpha_vec needs to be malloc'd
- before we supply it to alphaT.
- Note that the return vector is in logarithmic form.
- */
-static void
-alphaT(HMM * hmm, double * alpha_vec, char * obs, int T)
-{
- int N = hmm->N;
- double alpha_vec_prev[N];
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int i, j, k;
-
- if (T <= 0) {
- fprintf(stderr, "Nonpositive T, die!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i)
- alpha_vec[i] = log(init[i]) + log(e_mat[i][obs[0]]);
- if (T == 1)
- return;
- if (T > 1) {
- for (i = 1; i < T; ++i) {
- for (j = 0; j < N; ++j) {
- alpha_vec_prev[j] = alpha_vec[j];
- alpha_vec[j] = log(0);
- }
-
- for (j = 0; j < N; ++j) {
- for (k = 0; k < N; ++k)
- alpha_vec[j] = sumLogProb(alpha_vec[j], alpha_vec_prev[k] + log(a_mat[k][j]));
- alpha_vec[j] = alpha_vec[j] + log(e_mat[j][obs[i]]);
- }
- }
- }
- return;
-}
-
-/*
- Palpha computes the probability of an observation sequence
- using the alpha function.
- */
-double
-Palpha(HMM * hmm, char * obs, int T)
-{
- double P = 0.0;
- int i;
- double * alpha_vec;
-
- alpha_vec = (double *) calloc(hmm->N, sizeof(double));
- if (alpha_vec == NULL) {
- fprintf(stderr, "Can't allocate memory for alpha vector, die!\n");
- exit(-1);
- }
- alphaT(hmm, alpha_vec, obs, T);
- P = sumLogProbs(alpha_vec, hmm->N);
- free(alpha_vec);
- return P;
-}
-
-
-/*
- beta function for HMM. It computes the beta function value
- at the time 1 for every state. Note that beta_vec should
- be allocated memory thru malloc before we use it.
- Note that the return vector is in logarithmic form.
- */
-static void
-beta1(HMM * hmm, double * beta_vec, char * obs, int T)
-{
- int N = hmm->N;
- double beta_vec_next[N];
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int i, j, k;
-
- if (T <= 0) {
- fprintf(stderr, "Nonpositive T, die!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i)
- beta_vec[i] = 0;
- if (T == 1)
- return;
- if (T > 1) {
- for (i = T - 1; i >= 1; --i) {
- for (j = 0; j < N; ++j) {
- beta_vec_next[j] = beta_vec[j];
- beta_vec[j] = log(0);
- }
-
- for (j = 0; j < N; ++j) {
- for (k = 0; k < N; ++k)
- beta_vec[j] = sumLogProb(beta_vec[j], beta_vec_next[k] + log(a_mat[j][k]) + log(e_mat[k][obs[i]]));
- }
- }
- }
- return;
-}
-
-/*
- Pbeta computes the probability of an observation sequence
- using the beta function.
- */
-double
-Pbeta(HMM * hmm, char * obs, int T)
-{
- double P = log(0);
- int i;
- double * beta_vec;
-
- beta_vec = (double *) calloc(hmm->N, sizeof(double));
- if (beta_vec == NULL) {
- fprintf(stderr, "Can't allocate memory for beta vector, die!\n");
- exit(-1);
- }
- beta1(hmm, beta_vec, obs, T);
- for (i = 0; i < hmm->N; ++i) {
- P = sumLogProb(P, log(hmm->init[i]) + log(hmm->e_mat[i][obs[0]]) + beta_vec[i]);
- }
- free(beta_vec);
- return P;
-}
-
-/*
- alpha_mat computes the values for the alpha function in the TxN space
- where T is the length of the observation sequence and N is the number
- of states. Note that the alpha matrix has to be malloc'd before we
- supply it to alpha_mat. The computed values will be stored in
- alpha.
- */
-static void
-alpha_mat(HMM * hmm, double ** alpha, char * obs, int T)
-{
- int i, j, t;
- int N = hmm->N;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- double * init = hmm->init;
-
- for (i = 0; i < N; ++i)
- alpha[i][0] = log(init[i]) + log(e_mat[i][obs[0]]);
- for (t = 1; t < T; ++t) {
- for (i = 0; i < N; ++i)
- alpha[i][t] = log(0);
- for (i = 0; i < N; ++i) {
- for (j = 0; j < N; ++j)
- alpha[i][t] = sumLogProb(alpha[i][t], alpha[j][t-1] + log(a_mat[j][i]));
- alpha[i][t] += log(e_mat[i][obs[t]]);
- }
- }
-}
-
-/*
- beta_mat computes the values for the alpha function in the TxN space
- where T is the length of the observation sequence and N is the number
- of states. Note that the beta matrix has to be malloc'd before we
- supply it to beta_mat. The computed values will be stored in
- beta.
- */
-static void
-beta_mat(HMM * hmm, double ** beta, char * obs, int T)
-{
- int i, j, t;
- int N = hmm->N;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- double * init = hmm->init;
-
- for (i = 0; i < N; ++i)
- beta[i][T-1] = 0;
- for (t = T-2; t >= 0; --t) {
- for (i = 0; i < N; ++i)
- beta[i][t] = log(0);
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- beta[i][t] = sumLogProb(beta[i][t], beta[j][t+1] + log(a_mat[i][j]) + log(e_mat[j][obs[t+1]]));
- }
-}
-
-/*
- baum_welch takes an array of observation sequence strings (obs) and
- the number of strings in the array (L) to estimate the parameters
- of an HMM using a special case of Expectation Maximization called
- Baum-Welch algorithm. baum_welch doesn't return anything but upon
- its completion, it will set the init array, a_mat matrix, e_mat
- matrix with parameters that maximize the chance of seeing the
- observation sequences you supplied.
- */
-void
-baum_welch(HMM * hmm, char ** obs, int * T, int L)
-{
- int i, j, l, t;
- int maxL = -1; /* length of longest training sequence */
- double P[L];
- double newP[L];
- double S, newS, diff;
- int N = hmm->N, M = hmm->M;
- double ** alpha;
- double ** beta;
- double A[N][N];
- double E[N][M];
- double init[N];
- double a_mat[N][N];
- double e_mat[N][M];
-
- for (l = 0; l < L; ++l) {
- P[l] = Palpha(hmm, obs[l], T[l]);
- if (maxL == -1)
- maxL = T[l];
- else if (T[l] > maxL)
- maxL = T[l];
- }
-
- alpha = (double **) malloc(hmm->N*sizeof(double *));
- if (alpha == NULL) {
- fprintf(stderr, "Can't allocate memory for alpha matrix!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i) {
- alpha[i] = (double *) malloc(maxL*sizeof(double));
- if (alpha[i] == NULL) {
- fprintf(stderr, "Can't allocate memory for rows of alpha matrix!\n");
- exit(-1);
- }
- }
- beta = (double **) malloc(hmm->N*sizeof(double *));
- if (beta == NULL) {
- fprintf(stderr, "Can't allocate memory for beta matrix!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i) {
- beta[i] = (double *) malloc(maxL*sizeof(double));
- if (beta[i] == NULL) {
- fprintf(stderr, "Can't allocate memory for rows of beta matrix!\n");
- exit(-1);
- }
- }
-
- do { /* do...while loop */
-
-/* initialize the matrices */
- for (i = 0; i < N; ++i)
- init[i] = 0.0;
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- A[i][j] = 0.0;
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j)
- E[i][j] = 0.0;
-
-/* use all training sequences to train */
- for (l = 0; l < L; ++l) {
- alpha_mat(hmm, alpha, obs[l], T[l]);
- beta_mat(hmm, beta, obs[l], T[l]);
-
-/* estimate new initial state probability */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- init[i] += exp(alpha[i][0] + log(hmm->a_mat[i][j]) + log(hmm->e_mat[j][obs[l][1]]) + beta[j][1] - P[l]);
-
-/* estimate A matrix */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- for (t = 0; t < T[l]-1; ++t)
- A[i][j] += exp(alpha[i][t] + log(hmm->a_mat[i][j]) + log(hmm->e_mat[j][obs[l][t+1]]) + beta[j][t+1] - P[l]);
-
-/* Estimate E matrix */
- for (i = 0; i < N; ++i)
- for (t = 0; t < T[l]; ++t)
- E[i][obs[l][t]] += exp(alpha[i][t] + beta[i][t] - P[l]);
- }
-
-/* Estimate init */
- for (i = 0; i < N; ++i)
- hmm->init[i] = init[i] / (double) L;
-
-/* Estimate a_mat */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j)
- a_mat[i][j] = hmm->a_mat[i][j];
-
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j) {
- hmm->a_mat[i][j] = 0.0;
- for (t = 0; t < N; ++t)
- hmm->a_mat[i][j] += A[i][t];
- hmm->a_mat[i][j] = A[i][j] / hmm->a_mat[i][j];
- }
-
-/* Estimate e_mat */
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j)
- e_mat[i][j] = hmm->e_mat[i][j];
-
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j) {
- hmm->e_mat[i][j] = 0.0;
- for (t = 0; t < M; ++t)
- hmm->e_mat[i][j] += E[i][t];
- hmm->e_mat[i][j] = E[i][j] / hmm->e_mat[i][j];
- }
-
-/*
-// print parameter estimates for debugging
- for (i = 0; i < N; ++i)
- printf("%.2f\t", hmm->init[i]);
- printf("\n");
- for (i = 0; i < N; ++i) {
- for (j = 0; j < N; ++j)
- printf("%.2f\t", hmm->a_mat[i][j]);
- printf("\n");
- }
- for (i = 0; i < N; ++i) {
- for (j = 0; j < M; ++j)
- printf("%.2f\t", hmm->e_mat[i][j]);
- printf("\n");
- }
-*/
-
-/* find new P */
- S = 0.0;
- newS = 0.0;
- for (l = 0; l < L; ++l) {
- S += P[l];
- newP[l] = Palpha(hmm, obs[l], T[l]);
- newS += newP[l];
- P[l] = newP[l];
- }
- diff = newS - S;
-//printf("%g(%g)\t%g(%g)\t%g\n", newS, log(newS), S, log(S), diff);
- if (diff < 0 && fabs(diff) >= LOWER_TOL) {
- fprintf(stderr, "S should be monotonic increasing!\n");
- exit(-1);
- }
- }
- while (diff >= UPPER_TOL);
-
- for (i = 0; i < N; ++i) {
- free(alpha[i]);
- free(beta[i]);
- }
- free(alpha);
- free(beta);
-}
-
-/*
- viterbi algorithm takes a observation sequence (obs) and the HMM model
- (hmm) and then returns the hidden state sequence (hss) that maximizes
- the probability of seeing obs. The hss argument supplied should be
- allocated to the same amount of memory as obs.
- */
-void
-viterbi(HMM * hmm, char * hss, char * obs, int T)
-{
- double delta_vec[hmm->N];
- double delta_vec_prev[hmm->N];
- int N = hmm->N;
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int i, j, k;
- double max;
- int max_i = -1;
- int phi_vec[T];
- int phi_mat[T][N];
-
- if (T <= 0) {
- fprintf(stderr, "Nonpositive T, die!\n");
- exit(-1);
- }
- for (i = 0; i < N; ++i) {
-// delta_vec[i] = init[i]*e_mat[i][obs[0] - '1'];
-// delta_vec[i] = i == 0 ? log(0.5) : -HUGE_VAL;
- delta_vec[i] = log(init[i]);
- if (max_i < 0) {
- max_i = i;
- max = delta_vec[i];
- }
- else if (delta_vec[i] > max) {
- max_i = i;
- max = delta_vec[i];
- }
- }
- if (T == 1) {
- phi_vec[0] = max_i;
- hss[0] = hmm->states[phi_vec[0]];
- hss[1] = '\0';
- return;
- }
- if (T > 1) {
- for (i = 0; i < T; ++i) {
- for (j = 0; j < N; ++j) {
- delta_vec_prev[j] = delta_vec[j];
- delta_vec[j] = 0;
- }
- for (j = 0; j < N; ++j) {
- max_i = -1;
- for (k = 0; k < N; ++k) {
- double val = delta_vec_prev[k] + log(a_mat[k][j]);
- if (max_i < 0) {
- max_i = k;
- max = val;
- }
- else if (val > max) {
- max_i = k;
- max = val;
- }
- }
- delta_vec[j] = max + log(e_mat[j][obs[i]]);
- phi_mat[i][j] = max_i;
- }
- }
- }
- max_i = -1;
- for (i = 0; i < N; ++i) {
- if (max_i < 0) {
- max_i = i;
- max = delta_vec[i];
- }
- else if (delta_vec[i] > max) {
- max_i = i;
- max = delta_vec[i];
- }
- }
- phi_vec[T-1] = max_i;
-// traceback
- for (i = T-2; i >= 0; --i)
- phi_vec[i] = phi_mat[i+1][phi_vec[i+1]];
- for (i = 0; i < T; ++i)
- hss[i] = hmm->states[phi_vec[i]];
- hss[T] = '\0';
- return;
-}
-
-/*
- state_est takes an array of observation sequence strings (obs),
- an array of the corresponding hidden state sequence strings (sta)
- and the number of strings (L) to find the HMM parameters that
- maximize the chance of seeing the supplied observation sequences
- and their corresponding hidden state sequences. At the completion
- of this function, init, a_mat and e_mat will be set to the
- proper values.
- */
-void
-state_est(HMM * hmm, char ** obs, char ** sta, int * T, int L)
-{
- int N = hmm->N, M = hmm->M;
- int i, j, k, l;
- double * init = hmm->init;
- double ** a_mat = hmm->a_mat;
- double ** e_mat = hmm->e_mat;
- int pi[N];
- int A[N][N];
- int E[N][M];
-
-/* initialize the matrices */
- for (i = 0; i < N; ++i)
- pi[i] = 0;
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j) {
- A[i][j] = 0;
- a_mat[i][j] = 0.0;
- }
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j) {
- E[i][j] = 0;
- e_mat[i][j] = 0.0;
- }
-
-/* count occurrences */
- for (l = 0; l < L; ++l) {
- ++pi[sta[l][0]];
-
- for (i = 0; i < T[l] - 1; ++i)
- ++A[sta[l][i]][sta[l][i+1]];
- for (i = 0; i < T[l]; ++i)
- ++E[sta[l][i]][obs[l][i]];
- }
-
-/* Estimate initial state probability */
- for (i = 0; i < N; ++i)
- init[i] = (double) pi[i] / (double) L;
-
-/* Estimate state transition probability matrix */
- for (i = 0; i < N; ++i)
- for (j = 0; j < N; ++j) {
- for (k = 0; k < N; ++k)
- a_mat[i][j] += A[i][k];
- a_mat[i][j] = A[i][j] / a_mat[i][j];
- }
-
-/* Estimate Symbol Emission matrix */
- for (i = 0; i < N; ++i)
- for (j = 0; j < M; ++j) {
- for (k = 0; k < M; ++k)
- e_mat[i][j] += E[i][k];
- e_mat[i][j] = E[i][j] / e_mat[i][j];
- }
-}
View
102 Bio/Ext/HMM/test.pl
@@ -1,102 +0,0 @@
-## Test framework for HMM XS stuff
-## $Id$
-
-use Bio::Matrix::Scoring;
-use Bio::Tools::HMM;
-
-$hmm = new Bio::Tools::HMM('-symbols' => "123456", '-states' => "FL");
-
-$seq1 = "315116246446644245311321631164152133625144543631656626566666";
-$obs1 = "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFLLLLLLLLLLLLLLL";
-$seq1 .= "651166453132651245636664631636663162326455236266666625151631";
-$obs1 .= "LLLLLLFFFFFFFFFFFFLLLLLLLLLLLLLLLLFFFLLLLLLLLLLLLLLFFFFFFFFF";
-$seq1 .= "222555441666566563564324364131513465146353411126414626253356";
-$obs1 .= "FFFFFFFFLLLLLLLLLLLLLFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-$seq1 .= "366163666466232534413661661163252562462255265252265435353336";
-$obs1 .= "LLLLLLLLFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-$seq1 .= "233121625364414432335163243633665562466662632666612355245242";
-$obs1 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFLLLLLLLLLLLLLLLLLLLLLLFFFFFFFFFFF";
-
-$seq2 = "544552213525245666363632432522253566166546666666533666543261";
-$obs2 = "FFFFFFFFFFFFLLLLLLLLLLLFFFFFFFFFFFFLLLLLLLLLLLLLLLLLLLFFFFFF";
-$seq2 .= "363546253252546524422555242223224344432423341365415551632161";
-$obs2 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-$seq2 .= "144212242323456563652263346116214136666156616666566421456123";
-$obs2 .= "FFFFFFLLLFFFFFFFFFFFFFFFFFFFFFFFFLFLLLLLLLLLLLLLLLLFFFFFFFFF";
-$seq2 .= "346313546514332164351242356166641344615135266642261112465663";
-$obs2 .= "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF";
-
-@seqs = ($seq1, $seq2);
-
-printf "Baum-Welch Training\n";
-printf "===================\n";
-
-
-$hmm->baum_welch_training(\@seqs);
-
-printf "Initial Probability Array:\n";
-$init = $hmm->init_prob;
-foreach $s (@{$init}) {
- printf "%g\t", $s;
-}
-printf "\n";
-
-printf "Transition Probability Matrix:\n";
-$matrix = $hmm->transition_prob;
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "Emission Probability Matrix:\n";
-$matrix = $hmm->emission_prob;
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "\n";
-printf "Log Probability of sequence 1: %g\n", $hmm->likelihood($seq1);
-printf "Log Probability of sequence 2: %g\n", $hmm->likelihood($seq2);
-printf "\n";
-printf "Statistical Training\n";
-printf "====================\n";
-
-@obs = ($obs1, $obs2);
-$hmm->statistical_training(\@seqs, \@obs);
-
-printf "Initial Probability Array:\n";
-$init = $hmm->init_prob;
-$hmm->init_prob($init);
-foreach $s (@{$init}) {
- printf "%g\t", $s;
-}
-printf "\n";
-
-printf "Transition Probability Matrix:\n";
-$matrix = $hmm->transition_prob;
-$hmm->transition_prob($matrix);
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "Emission Probability Matrix:\n";
-$matrix = $hmm->emission_prob;
-$hmm->emission_prob($matrix);
-foreach $r ($matrix->row_names) {
- foreach $c ($matrix->column_names) {
- printf "%g\t", $matrix->entry($r, $c);
- }
- printf "\n";
-}
-
-printf "Vitebi Algorithm:\n";
-$obs3 = $hmm->viterbi($seq1);
-printf "%s\n", $obs3;
View
10 Bio/Ext/HMM/typemap
@@ -1,10 +0,0 @@
-TYPEMAP
-HMM * T_HMM
-
-INPUT
-T_HMM
- $var = ($type) (SvROK($arg) == 0 ? ($type) NULL : ($type) SvIV((SV*)SvRV($arg)))
-
-OUTPUT
-T_HMM
- sv_setref_pv($arg, "Bio::Ext::HMM::HMM", (void*) $var);
View
7 Bio/Ext/Makefile.PL
@@ -0,0 +1,7 @@
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'Bio::Ext',
+ 'VERSION' => '0.07.0'
+);
View
2  Bio/SeqIO/staden/Makefile.PL
@@ -69,7 +69,7 @@ sub MY::post_initialize {
$iolibinc = ExtUtils::MakeMaker::prompt("Please tell us where your Staden io_lib \"Read.h\" header is installed: ", $prompt);
}
- `perl -pi -e 's{(LIBS\\s*=>\\s*")([^"]+)}{\$1-L$ioliblib -lread -lz};' read.pm`;
+ `perl -pi -e 's{(LIBS\\s*=>\\s*")([^"]+)}{\$1-L$ioliblib -lread};' read.pm`;
`perl -pi -e 's{(INC\\s*=>\\s*")([^"]+)}{\$1-I$iolibinc};' read.pm`;
return '';
View
8 Bio/SeqIO/staden/read.pm
@@ -78,15 +78,15 @@ sub BEGIN {
use Inline (C => 'DATA',
VERSION => '0.01',
NAME => 'Bio::SeqIO::staden::read',
- LIBS => "-L/usr/local/lib -lread -lz", # leave these as double quotes - necessary for Makefile.PL function
- INC => "-I/usr/local/include/io_lib", # leave these as double quotes - necessary for Makefile.PL function
+ LIBS => "-L/usr/local/lib -lread", # leave these as double quotes - necessary for Makefile.PL function
+ INC => "-I/usr/local/include/io_lib" # leave these as double quotes - necessary for Makefile.PL function
);
} or Bio::Root::Root::throw( -class => 'Bio::Root::SystemException',
-text => "No Inline::C (or maybe io-lib?) support available",
);
}
-$VERSION = 1.5.1;
+$VERSION = 0.01;
my %formats = ( scf => 1,
abi => 2,
@@ -196,7 +196,7 @@ int staden_write_trace(SV *self, FILE *fh, int format,
read->leftCutoff = 0;
read->rightCutoff = len + 1;
- qualarr = (AV *) SvRV(qual);
+ qualarr = SvRV(qual);
n = av_len(qualarr) + 1;
for (i = 0 ; i < n && i < len ; i++) {
val = *(av_fetch(qualarr, i, 0));
View
3  Makefile.PL
@@ -3,8 +3,7 @@ use ExtUtils::MakeMaker;
# the contents of the Makefile that is written.
WriteMakefile(
'NAME' => 'Bio',
- 'DISTNAME' => 'bioperl-ext',
- 'VERSION' => '1.5.1-RC1',
+ 'VERSION' => '0.08.0',
'DIR' => [ qw( Bio/Ext/Align Bio/SeqIO/staden )],
'dist' => { COMPRESS => 'gzip -9f',
SUFFIX => '.gz',
View
15 README
@@ -1,6 +1,6 @@
o Version
- This is Bioperl-ext version 1.5.1 from CVS HEAD
+ This is Bioperl-ext version 1.3 from CVS HEAD
o Summary
@@ -65,14 +65,9 @@ o Notes for Bio::SeqIO::staden::read
ftp://ftp.mrc-lmb.cam.ac.uk/pub/staden/io_lib/
- Many users have noted that the io_lib install process often forgets
- to install the "os.h" file along with the rest of the include files;
- you may have to do this manually. If so, you may also have to
- install the "config.h" file for "os.h" to find all the information it
- needs. Finally, you may also need to edit "os.h" to #include
- <config.h> instead of #include "config.h" if you continue to get
- undefined symbol errors during compilation. On some OSes you may in fact
- have to do the OPPOSITE, meaning change <config.h> to "config.h".
+ Many users have noted that the io_lib install process often forgets to
+ install the "os.h" file along with the rest of the include files; you
+ may have to do this manually.
The bioperl-ext make process will prompt you for the LIB and INCLUDE
locations (usually /usr/local/lib and usr/local/include/io_lib,
@@ -87,5 +82,5 @@ o Notes for Bio::SeqIO::staden::read
A failed compilation is most likely due to an incomplete io_lib
installation; make sure that all the required io_lib ".h" files are in
- place (see above regarding "os.h" and "config.h").
+ place.
Please sign in to comment.
Something went wrong with that request. Please try again.