In [6]:
class MultinomialNB:
    
    def fit(self, X, y, ls=0.01):
        self.ls = ls
        self.y_classes, y_counts = np.unique(y, return_counts=True)
        self.x_classes = [np.unique(x) for x in X.T]
        self.phi_y = 1.0 * y_counts/y_counts.sum()
        self.phi_x = self.mean_X(X, y)
        self.c_x = self.count_x(X, y)
        return self
    
    def mean_X(self, X, y):
        return [[self.ls_mean_x(X, y, k, j) for j in range(len(self.x_classes))] for k in self.y_classes]
    
    def ls_mean_x(self, X, y, k, j):
        x_data = (X[:,j][y==k].reshape(-1,1) == self.x_classes[j])
        return (x_data.sum(axis=0) + self.ls ) / (len(x_data) + (len(self.x_classes) * self.ls))
    
    def get_mean_x(self, y, j):
        return 1 + self.ls / (self.c_x[y][j] + (len(self.x_classes) * self.ls))
        
    def count_x(self, X, y):
        return [[len(X[:,j][y==k].reshape(-1,1) == self.x_classes[j])
                       for j in range(len(self.x_classes))]
                      for k in self.y_classes]

    def predict(self, X):
        return np.apply_along_axis(lambda x: self.compute_probs(x), 1, X)
    
    def compute_probs(self, x):
        probs = np.array([self.compute_prob(x, y) for y in range(len(self.y_classes))])
        return self.y_classes[np.argmax(probs)]
    
    def compute_prob(self, x, y):
        Pxy = 1
        for j in range(len(x)):
            x_clas = self.x_classes[j]
            if x[j] in x_clas:
                i = list(x_clas).index(x[j])
                p_x_j_y = self.phi_x[y][j][i] # p(xj|y)
                Pxy *= p_x_j_y
            else:
                Pxy *= get_mean_x(y, j)
        return Pxy * self.phi_y[y]
    
    def evaluate(self, X, y):
        return (self.predict(X) == y).mean()

In [5]:
import numpy as np   

class GDAClassifier:
    
    def fit(self, X, y, epsilon = 1e-10):
        self.y_classes, y_counts = np.unique(y, return_counts=True)
        self.phi_y = 1.0 * y_counts/len(y)
        self.u = np.array([ X[y==k].mean(axis=0) for k in self.y_classes])
        self.E = self.compute_sigma(X, y)
        self.E += np.ones_like(self.E) * epsilon # fix zero overflow
        self.invE = np.linalg.pinv(self.E)
        return self
    
    def compute_sigma(self,X, y):
        X_u = X.copy().astype('float64')
        for i in range(len(self.u)):
            X_u[y==self.y_classes[i]] -= self.u[i]
        return X_u.T.dot(X_u) / len(y)

    def predict(self, X):
        return np.apply_along_axis(self.get_prob, 1, X)
    
    def score(self, X, y):
        return (self.predict(X) == y).mean()
    
    def get_prob(self, x):
        p = np.exp(-0.5 * np.sum((x - self.u).dot(self.invE) * (x - self.u), axis =1)) * self.phi_y
        print(p)
        return np.argmax(p)

In [4]:
# %load gaussian_naive_bayes.py
import numpy as np
     
class GaussianNB:
    
    def fit(self, X, y, epsilon = 1e-10):
        self.y_classes, y_counts = np.unique(y, return_counts=True)
        self.x_classes = np.array([np.unique(x) for x in X.T])
        self.phi_y = 1.0 * y_counts/y_counts.sum()
        self.u = np.array([X[y==k].mean(axis=0) for k in self.y_classes])
        self.var_x = np.array([X[y==k].var(axis=0)  + epsilon for k in self.y_classes])
        return self
    
    def predict(self, X):
        return np.apply_along_axis(lambda x: self.compute_probs(x), 1, X)
    
    def compute_probs(self, x):
        probs = np.array([self.compute_prob(x, y) for y in range(len(self.y_classes))])
        return self.y_classes[np.argmax(probs)]
    
    def compute_prob(self, x, y):
        c = 1.0 /np.sqrt(2.0 * np.pi * (self.var_x[y]))
        return np.prod(c * np.exp(-1.0 * np.square(x - self.u[y]) / (2.0 * self.var_x[y])))
    
    def evaluate(self, X, y):
        return (self.predict(X) == y).mean()

In [3]:
# %load gda.py
import numpy as np

class GDABinaryClassifier:
    
    def fit(self, X, y):
        self.fi = y.mean()
        self.u = np.array([ X[y==k].mean(axis=0) for k in [0,1]])
        X_u = X.copy()
        for k in [0,1]: X_u[y==k] -= self.u[k]
        self.E = X_u.T.dot(X_u) / len(y)
        self.invE = np.linalg.pinv(self.E)
        return self
    
    def predict(self, X):
        return np.argmax([self.compute_prob(X, i) for i in range(len(self.u))], axis=0)
    
    def compute_prob(self, X, i):
        u, phi = self.u[i], ((self.fi)**i * (1 - self.fi)**(1 - i))
        return np.exp(-1.0 * np.sum((X-u).dot(self.invE)*(X-u), axis=1)) * phi
    
    def score(self, X, y):
        return (self.predict(X) == y).mean()

In [2]:
# %load naive_bayes.py
import numpy as np

class NaiveBayesBinaryClassifier:
    
    def fit(self, X, y):
        self.y_classes, y_counts = np.unique(y, return_counts=True)
        self.phi_y = 1.0 * y_counts/y_counts.sum()
        self.phi_x = [1.0 * X[y==k].mean(axis=0) for k in self.y_classes]
        return self
    
    def predict(self, X):
        return np.apply_along_axis(lambda x: self.compute_probs(x), 1, X)
    
    def compute_probs(self, x):
        probs = [self.compute_prob(x, y) for y in range(len(self.y_classes))]
        return self.y_classes[np.argmax(probs)]
    
    def compute_prob(self, x, y):
        res = 1
        for j in range(len(x)):
            Pxy = self.phi_x[y][j] # p(xj=1|y)
            res *= (Pxy**x[j])*((1-Pxy)**(1-x[j])) # p(xj=0|y)
        return res * self.phi_y[y]
    
    def score(self, X, y):
        return (self.predict(X) == y).mean()

In [1]:
import pandas as pd

class BayesianClassifier:

    def __init__(self, path, col):
        self.cols = {}
        self.op_cols = {}
        self.col = col
        self.data = pd.read_csv(path)
        self.dataM = self.data
        self.op = self.data[col]
        self.data = self.data.drop(col, axis=1)
        self.total = len(self.dataM)
    
    def get_probability_table(self):
        for i in self.op.unique():
            self.cols[i] = {}
            for j in self.data.columns:
                self.cols[i][j] = {}
                for k in self.data[j]:
                    if k not in self.cols[i][j]:
                        self.cols[i][j][k] = ""
        dfs = []
        for i in self.op.unique():
            for j in self.data.columns:
                for k in self.data[j].unique():
                    dfs.append(self.dataM[self.dataM[self.col] == i])

                    
        print("COLUMN\tVALUE\tOUTPUT\tPROBABILITY")
        for x in self.cols:
            for y in self.cols[x]:
                for z in self.cols[x][y]:
                    self.op_cols[x] = len(self.dataM[self.dataM[self.col] == x])
                    total = len(self.dataM[(self.dataM[self.col] == x) & (self.dataM[y])])
                    p = len(self.dataM[(self.dataM[y] == z) & (self.dataM[self.col] == x)]) / total
                    self.cols[x][y][z] = p
                    print("{}--->{}--->{}--->{}".format(y,z,x,p))
        print(self.op_cols)
                    
    def classify(self, values):
        p = {}
        tot = 0
        for i in self.op.unique():
            a = 1
            for key, value in values.items():
                a *= self.cols[i][key][value]
            p[i] = a*self.op_cols[i]/self.total
            tot += a*self.op_cols[i]/self.total
        
        for i in p:
            p[i] = p[i] / tot
        
        return p
            
b = BayesianClassifier("new.csv", "Weather")

b.get_probability_table()
print(b.classify({"Temp": 'Mild', "Windy": "Low", "Humidity": "High", "Cloudy": "Yes"}))

FileNotFoundError: [Errno 2] No such file or directory: 'new.csv'