Add naive from scratch

ankit2001 · May 22, 2020 · b904c59 · b904c59
1 parent 0600064
commit b904c59
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 10 deletions.
diff --git a/NaiveBayes/Naive.py b/NaiveBayes/Naive.py
@@ -3,14 +3,32 @@
 class NaiveBayes:
 	def fit(self, X, Y):
 		total_samples, total_features = X.shape
-		self.Classes = np.unique(Y);
-		self.total_classes = Classes.length
-		self.Mean = np.zeros(total_samples, total_features, dtype = np.float64)
-		self.Var = np.zeros(total_samples, total_features, dtype = np.float64)
+		self.Classes = np.unique(Y)
+		total_classes = len(self.Classes)
+		self.Mean = np.zeros((total_classes, total_features), dtype = np.float64)
+		self.Var = np.zeros((total_classes, total_features), dtype = np.float64)
 		self.Freq = np.zeros(total_classes, dtype = np.float64)
+		for (i, c) in enumerate(self.Classes):
+			X_tmp = X[c == Y]
+			self.Mean[i] = X_tmp.mean(axis = 0)
+			self.Var[i] = X_tmp.var(axis = 0)
+			self.Freq[i] = len(X_tmp) / float(total_samples)
 
 	def predict(self, X_test):
-		y_predicted = [self._predict(x)  for x in X]
+		y_predicted = [self._predict(x) for x in X_test]
+		return y_predicted
 
 	def _predict(self, x):
-		pass
+		which_class = []
+		for i in range(len(self.Classes)):
+			which_class.append(np.log(self.Freq[i]) + np.sum(np.log(self.normal_dist(x, i))))
+		class_index = np.argmax(which_class)
+		return self.Classes[class_index]
+
+	def normal_dist(self, x, class_index):
+		variance = self.Var[class_index]
+		mean = self.Mean[class_index]
+		num = np.exp(-((x - mean) ** 2) / (2 * variance))
+		den = np.sqrt(2 * np.pi * variance)
+		return num / den
+
diff --git a/NaiveBayes/__pycache__/Naive.cpython-36.pyc b/NaiveBayes/__pycache__/Naive.cpython-36.pyc
diff --git a/NaiveBayes/nb_testing.py b/NaiveBayes/nb_testing.py
@@ -13,16 +13,16 @@ def accuracy(y_true, y_predicted):
 	return 
 
 #I have create tested data here
-X, Y = datasets.make_classification(n_samples = 1000, n_features = 10, n_classes = 5, random_state = 121, n_informative = 5)
-X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 121)
+X, Y = datasets.make_classification(n_samples = 100000, n_features = 50, n_classes = 5, random_state = 121, n_informative = 41)
+X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 121)
 
 # You  can check random data through it
 print(X)
 print(Y)
 
 nb = NaiveBayes()
-np.fit(X_train, Y_train) # fitting test data
-final_predictions = np.predict(X_test) #final predicted values for testing data
+nb.fit(X_train, Y_train) # fitting test data
+final_predictions = nb.predict(X_test) #final predicted values for testing data
 
 #finding accuracy after comparing through original classes through testing data
 accuracy(Y_test, final_predictions)