From 8f914f9e74469962d79d2a7a86dc5b82017c5acd Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Wed, 29 Mar 2017 19:07:15 +0300
Subject: [PATCH 1/8] Add Gaussian Function

---
 utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/utils.py b/utils.py
index ed44f1e9e..f5d960442 100644
--- a/utils.py
+++ b/utils.py
@@ -258,6 +258,10 @@ def step(x):
     """Return activation value of x with sign function"""
     return 1 if x >= 0 else 0
 
+def gaussian(mean, st_dev, x):
+    """Given the mean and standard deviation of a distribution, it returns the probability of x."""
+    return 1/(math.sqrt(2*math.pi)*st_dev)*math.e**(-0.5*(float(x-mean)/st_dev)**2)
+
 
 try:  # math.isclose was added in Python 3.5; but we might be in 3.4
     from math import isclose

From 293a414087eb3ef5bfcdbc4f0bbc81491dda930e Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Wed, 29 Mar 2017 19:08:39 +0300
Subject: [PATCH 2/8] Added Tests

Add tests for Continuous Naive Bayes + Means/Standard Deviation
---
 tests/test_learning.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/tests/test_learning.py b/tests/test_learning.py
index 4f618f7c1..8b29437b6 100644
--- a/tests/test_learning.py
+++ b/tests/test_learning.py
@@ -22,6 +22,20 @@ def test_weighted_replicate():
     assert weighted_replicate('ABC', [1, 2, 1], 4) == ['A', 'B', 'B', 'C']
 
 
+def test_means_and_deviation():
+    iris = DataSet(name="iris")
+
+    means, deviations = iris.find_means_and_deviations()
+    
+    assert means["setosa"] == [5.006, 3.418, 1.464, 0.244]
+    assert means["versicolor"] == [5.936, 2.77, 4.26, 1.326]
+    assert means["virginica"] == [6.588, 2.974, 5.552, 2.026]
+
+    assert round(deviations["setosa"][0],3) == 0.352
+    assert round(deviations["versicolor"][0],3) == 0.516
+    assert round(deviations["virginica"][0],3) == 0.636
+
+
 def test_plurality_learner():
     zoo = DataSet(name="zoo")
 
@@ -32,8 +46,14 @@ def test_plurality_learner():
 def test_naive_bayes():
     iris = DataSet(name="iris")
 
-    nB = NaiveBayesLearner(iris)
-    assert nB([5,3,1,0.1]) == "setosa"
+    # Discrete
+    nBD = NaiveBayesLearner(iris)
+    assert nBD([5,3,1,0.1]) == "setosa"
+
+    # Continuous
+    nBC = NaiveBayesLearner(iris, continuous=True)
+    assert nBC([5,3,1,0.1]) == "setosa"
+    assert nBC([7,3,6.5,2]) == "virginica"
 
 
 def test_k_nearest_neighbors():

From 1abefb856858cc0c5443eb17727d454e5234f02c Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Wed, 29 Mar 2017 19:13:58 +0300
Subject: [PATCH 3/8] Update learning.py

---
 learning.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/learning.py b/learning.py
index ec685131d..1768782e6 100644
--- a/learning.py
+++ b/learning.py
@@ -1,7 +1,7 @@
 """Learn to estimate functions from examples. (Chapters 18-20)"""
 
 from utils import (
-    removeall, unique, product, mode, argmax, argmax_random_tie, isclose,
+    removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian,
     dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement,
     weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, DataFile
 )
@@ -11,7 +11,7 @@
 import math
 import random
 
-from statistics import mean
+from statistics import mean, stdev
 from collections import defaultdict
 
 # ______________________________________________________________________________
@@ -174,6 +174,45 @@ def remove_examples(self, value=""):
         self.examples = [x for x in self.examples if value not in x]
         self.update_values()
 
+    def split_values_by_classes(self):
+        """Split values into buckets according to their class."""
+        buckets = defaultdict(lambda: [])
+        target_names = self.values[self.target]
+
+        for v in self.examples:
+            item = [a for a in v if a not in target_names] # Remove target from item
+            buckets[v[self.target]].append(item) # Add item to bucket of its class
+
+        return buckets
+
+    def find_means_and_deviations(self):
+        """Finds the means and standard deviations of self.dataset.
+        means     : A dictionary for each class/target. Holds a list of the means
+                    of the features for the class.
+        deviations: A dictionary for each class/target. Holds a list of the sample
+                    standard deviations of the features for the class."""
+        target_names = self.values[self.target]
+        feature_numbers = len(self.inputs)
+
+        item_buckets = self.split_values_by_classes()
+        
+        means = defaultdict(lambda: [0 for i in range(feature_numbers)])
+        deviations = defaultdict(lambda: [0 for i in range(feature_numbers)])
+
+        for t in target_names:
+            # Find all the item feature values for item in class t
+            features = [[] for i in range(feature_numbers)]
+            for item in item_buckets[t]:
+                features = [features[i] + [item[i]] for i in range(feature_numbers)]
+
+            # Calculate means and deviations fo the class
+            for i in range(feature_numbers):
+                means[t][i] = mean(features[i])
+                deviations[t][i] = stdev(features[i])
+
+        return means, deviations
+
+
     def __repr__(self):
         return '<DataSet({}): {:d} examples, {:d} attributes>'.format(
             self.name, len(self.examples), len(self.attrs))
@@ -263,15 +302,22 @@ def predict(example):
 # ______________________________________________________________________________
 
 
-def NaiveBayesLearner(dataset):
+def NaiveBayesLearner(dataset, continuous=True):
+    if(continuous):
+        return NaiveBayesContinuous(dataset)
+    else:
+        return NaiveBayesDiscrete(dataset)
+
+
+def NaiveBayesDiscrete(dataset):
     """Just count how many times each value of each input attribute
     occurs, conditional on the target value. Count the different
     target values too."""
 
-    targetvals = dataset.values[dataset.target]
-    target_dist = CountingProbDist(targetvals)
+    target_vals = dataset.values[dataset.target]
+    target_dist = CountingProbDist(target_vals)
     attr_dists = {(gv, attr): CountingProbDist(dataset.values[attr])
-                  for gv in targetvals
+                  for gv in target_vals
                   for attr in dataset.inputs}
     for example in dataset.examples:
         targetval = example[dataset.target]
@@ -286,7 +332,27 @@ def class_probability(targetval):
             return (target_dist[targetval] *
                     product(attr_dists[targetval, attr][example[attr]]
                             for attr in dataset.inputs))
-        return argmax(targetvals, key=class_probability)
+        return argmax(target_vals, key=class_probability)
+
+    return predict
+
+
+def NaiveBayesContinuous(dataset):
+    means, deviations = dataset.find_means_and_deviations()
+
+    target_vals = dataset.values[dataset.target]
+    target_dist = CountingProbDist(target_vals)
+
+    def predict(example):
+        """Predict the target value for example. Consider each possible value,
+        and pick the most likely by looking at each attribute independently."""
+        def class_probability(targetval):
+            prob = target_dist[targetval]
+            for attr in dataset.inputs:
+                prob *= gaussian(means[targetval][attr], deviations[targetval][attr], example[attr])
+            return prob
+
+        return argmax(target_vals, key=class_probability)
 
     return predict
 

From 5b627d4ca1e4e3f372c24bf10cc1c21ce5cc4a93 Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Wed, 29 Mar 2017 19:21:22 +0300
Subject: [PATCH 4/8] Commenting Fix

---
 learning.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/learning.py b/learning.py
index 1768782e6..caf1af60a 100644
--- a/learning.py
+++ b/learning.py
@@ -338,6 +338,8 @@ def class_probability(targetval):
 
 
 def NaiveBayesContinuous(dataset):
+    """Count how many times each target value occurs.
+    Also, find the means and deviations of input attribute values for each target value."""
     means, deviations = dataset.find_means_and_deviations()
 
     target_vals = dataset.values[dataset.target]

From cf5d7727187e4c0d2c71a37ba1d566637bcec9ad Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Thu, 30 Mar 2017 20:47:46 +0300
Subject: [PATCH 5/8] Add test for gaussian

---
 tests/test_utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 76e0421b3..6e0ca160d 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -136,6 +136,12 @@ def test_sigmoid():
     assert isclose(0.2689414213699951, sigmoid(-1))
 
 
+def test_gaussian():
+    assert gaussian(1,0.5,0.7) == 0.6664492057835993
+    assert gaussian(5,2,4.5) == 0.19333405840142462
+    assert gaussian(3,1,3) == 0.3989422804014327
+
+
 def test_step():
     assert step(1) == step(0.5) == 1
     assert step(0) == 1

From a86c1027112c541410038c5df7200cdbc53bcb51 Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Fri, 7 Apr 2017 12:27:44 +0300
Subject: [PATCH 6/8] test for every class

---
 tests/test_learning.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_learning.py b/tests/test_learning.py
index 8b29437b6..4b1d2ecb3 100644
--- a/tests/test_learning.py
+++ b/tests/test_learning.py
@@ -49,10 +49,13 @@ def test_naive_bayes():
     # Discrete
     nBD = NaiveBayesLearner(iris)
     assert nBD([5,3,1,0.1]) == "setosa"
+    assert nBD([6,5,3,1.5]) == "versicolor"
+    assert nBD([7,3,6.5,2]) == "virginica"
 
     # Continuous
     nBC = NaiveBayesLearner(iris, continuous=True)
     assert nBC([5,3,1,0.1]) == "setosa"
+    assert nBC([6,5,3,1.5]) == "versicolor"
     assert nBC([7,3,6.5,2]) == "virginica"
 
 

From c8ae88a9f702872c13bd3194886edddc266901d3 Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Fri, 7 Apr 2017 22:33:06 +0300
Subject: [PATCH 7/8] Update test_learning.py

---
 tests/test_learning.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_learning.py b/tests/test_learning.py
index 109c7a7d4..5a3302cbf 100644
--- a/tests/test_learning.py
+++ b/tests/test_learning.py
@@ -35,6 +35,20 @@ def test_weighted_replicate():
     assert weighted_replicate('ABC', [1, 2, 1], 4) == ['A', 'B', 'B', 'C']
 
 
+def test_means_and_deviation():
+    iris = DataSet(name="iris")
+
+    means, deviations = iris.find_means_and_deviations()
+
+    assert means["setosa"] == [5.006, 3.418, 1.464, 0.244]
+    assert means["versicolor"] == [5.936, 2.77, 4.26, 1.326]
+    assert means["virginica"] == [6.588, 2.974, 5.552, 2.026]
+
+    assert round(deviations["setosa"][0],3) == 0.352
+    assert round(deviations["versicolor"][0],3) == 0.516
+    assert round(deviations["virginica"][0],3) == 0.636
+
+
 def test_plurality_learner():
     zoo = DataSet(name="zoo")
 

From 1442d42c2facfae84e939322d91c7406608bf2cf Mon Sep 17 00:00:00 2001
From: Antonis Maronikolakis <antmarakis@programmers.gr>
Date: Thu, 13 Apr 2017 15:29:10 +0300
Subject: [PATCH 8/8] Round float results to make sure test passes

---
 tests/test_learning.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/test_learning.py b/tests/test_learning.py
index 5a3302cbf..6f3a2ca03 100644
--- a/tests/test_learning.py
+++ b/tests/test_learning.py
@@ -40,13 +40,13 @@ def test_means_and_deviation():
 
     means, deviations = iris.find_means_and_deviations()
 
-    assert means["setosa"] == [5.006, 3.418, 1.464, 0.244]
-    assert means["versicolor"] == [5.936, 2.77, 4.26, 1.326]
-    assert means["virginica"] == [6.588, 2.974, 5.552, 2.026]
+    assert round(means["setosa"][0], 3) == 5.006
+    assert round(means["versicolor"][0], 3) == 5.936
+    assert round(means["virginica"][0], 3) == 6.588
 
-    assert round(deviations["setosa"][0],3) == 0.352
-    assert round(deviations["versicolor"][0],3) == 0.516
-    assert round(deviations["virginica"][0],3) == 0.636
+    assert round(deviations["setosa"][0], 3) == 0.352
+    assert round(deviations["versicolor"][0], 3) == 0.516
+    assert round(deviations["virginica"][0], 3) == 0.636
 
 
 def test_plurality_learner():