Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Will write tests in the next commit. Just fixing up some obvious bugs…

… in the first run Naive Bayes pseudoimplemenation.
  • Loading branch information...
commit 7723f09432f0b85b419ecfc46c8fbc9552e3f5ad 1 parent dfcb4d1
@gburtini authored
Showing with 12 additions and 16 deletions.
  1. +12 −16 lib/parametric/naivebayes.php
View
28 lib/parametric/naivebayes.php
@@ -6,18 +6,24 @@
// $xs is a bunch of "strings" and ys are their labels.
function ll_naivebayes($xs, $ys, $testStrings) {
- $wordCounts = _ll_computeWordCounts($xs);
- //$wordCounts = _ll_computeTotalWordCounts($wordCounts);
+ $topicWords = array();
+ foreach($xs as $i=>$x) {
+ if(isset($topicWords[$ys[$i]]))
+ $topicWords[$ys[$i]] .= $x;
+ else
+ $topicWords[$ys[$i]] = $x;
+ }
+ $topicWords = _ll_computeWordCounts($topicWords); // get the number of each word, by topic.
$probWordsGivenTopic = array(); // probability of each word in a given topic.
$countTopics = array();
- $totalWords = 0;
- foreach($wordCounts as $i=>$xWordCounts) {
+
+ foreach($topicWords as $topicIndex=>$xWordCounts) {
$totalWordsTopic = array_sum($xWordCounts);
- $countTopics[$i] = $total_wordsTopic;
+ $countTopics[$topicIndex] = $total_wordsTopic;
foreach($xCount as $hash=>$count) {
- $probWordsGivenTopic[$i][$hash] = ($count/$totalWordsTopic);
+ $probWordsGivenTopic[$topicIndex][$hash] = ($count/$totalWordsTopic);
}
}
@@ -50,16 +56,6 @@ function ll_naivebayes($xs, $ys, $testStrings) {
return $return;
}
-function _ll_computeTotalWordCounts($wordCounts) {
- $total = array();
- foreach($wordCounts as $wc) {
- foreach($wc as $hash=>$value) {
- $total[$hash] += $value;
- }
- }
- return $total;
-}
-
function _ll_computeWordCounts($strings) {
$wcs = array();
foreach($strings as $string) {
Please sign in to comment.
Something went wrong with that request. Please try again.