Skip to content

Commit

Permalink
Add naive from scratch
Browse files Browse the repository at this point in the history
  • Loading branch information
ankit2001 committed May 22, 2020
1 parent 0600064 commit b904c59
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
30 changes: 24 additions & 6 deletions NaiveBayes/Naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,32 @@
class NaiveBayes:
def fit(self, X, Y):
total_samples, total_features = X.shape
self.Classes = np.unique(Y);
self.total_classes = Classes.length
self.Mean = np.zeros(total_samples, total_features, dtype = np.float64)
self.Var = np.zeros(total_samples, total_features, dtype = np.float64)
self.Classes = np.unique(Y)
total_classes = len(self.Classes)
self.Mean = np.zeros((total_classes, total_features), dtype = np.float64)
self.Var = np.zeros((total_classes, total_features), dtype = np.float64)
self.Freq = np.zeros(total_classes, dtype = np.float64)
for (i, c) in enumerate(self.Classes):
X_tmp = X[c == Y]
self.Mean[i] = X_tmp.mean(axis = 0)
self.Var[i] = X_tmp.var(axis = 0)
self.Freq[i] = len(X_tmp) / float(total_samples)

def predict(self, X_test):
y_predicted = [self._predict(x) for x in X]
y_predicted = [self._predict(x) for x in X_test]
return y_predicted

def _predict(self, x):
pass
which_class = []
for i in range(len(self.Classes)):
which_class.append(np.log(self.Freq[i]) + np.sum(np.log(self.normal_dist(x, i))))
class_index = np.argmax(which_class)
return self.Classes[class_index]

def normal_dist(self, x, class_index):
variance = self.Var[class_index]
mean = self.Mean[class_index]
num = np.exp(-((x - mean) ** 2) / (2 * variance))
den = np.sqrt(2 * np.pi * variance)
return num / den

Binary file added NaiveBayes/__pycache__/Naive.cpython-36.pyc
Binary file not shown.
8 changes: 4 additions & 4 deletions NaiveBayes/nb_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ def accuracy(y_true, y_predicted):
return

#I have create tested data here
X, Y = datasets.make_classification(n_samples = 1000, n_features = 10, n_classes = 5, random_state = 121, n_informative = 5)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 121)
X, Y = datasets.make_classification(n_samples = 100000, n_features = 50, n_classes = 5, random_state = 121, n_informative = 41)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 121)

# You can check random data through it
print(X)
print(Y)

nb = NaiveBayes()
np.fit(X_train, Y_train) # fitting test data
final_predictions = np.predict(X_test) #final predicted values for testing data
nb.fit(X_train, Y_train) # fitting test data
final_predictions = nb.predict(X_test) #final predicted values for testing data

#finding accuracy after comparing through original classes through testing data
accuracy(Y_test, final_predictions)
Expand Down

0 comments on commit b904c59

Please sign in to comment.