In [11]:
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt

from scipy.stats import multivariate_normal as mvn
from scipy.stats import multinomial as mlvn
from scipy.stats import bernoulli as brn
# A multivariate normal random variable. Think about a 3 dimensionsal guassian 

%matplotlib inline

In [56]:
class GenBayes():
    
    
    def fit(self, X, y, DistStr, epsilon = 1e-3):
        
        self.likelihoods = dict() #
        self.priors = dict() #
        
        self.K = set(y.astype(int)) #
        
        
        if DistStr == "Gauss":
            
            for k in self.K:
                X_k = X[y == k,:] # All the values in class k
                N_k, D = X_k.shape # N_k number of observations, D is number of features
                mu_k = X_k.mean(axis=0)
                
                self.likelihoods[k] = {'mean': X_k.mean(axis=0) , 'cov' : (1/(N_k -1)) 
                                       * np.matmul((X_k - mu_k).T, X_k - mu_k)
                                      + epsilon * np.identity(D)}
                
                self.priors[k] = len(X_k)/len(X)
                # we dont return anything because we are using a global variable 
                
                
        if DistStr == "Multinomial":
            for k in self.K:
                X_k = X[y == k,:] # All the values in class k
                N_k, D = X_k.shape # N_k number of observations, D is number of features
                mu_k = X_k.mean(axis=0)
                
                N = len(X)
                self.likihoods[k] = {"N" : N, "p" : sum(N_k/len(X))}
                self.priors[k] = len(X_k)/len(X)
                
                
        if DistStr == "Bernoulli":
            for k in self.K:
                X_k = X[y == k, :]
                N_k, D = X_k.shape
                
                self.likelihoods[k] = {"P" : N_k/len(x)}
                self.priors[k] = len(X_k)/len(X)
    
    def predict(self, X, DistStr):
        
        N, D = X.shape
        
        if DistStr == "Gauss":
            P_hat = np.zeros((N, len(self.K)))
            
            for k, l in self.likelihoods.items():
                P_hat[:,k] = mvn.logpdf(X, l['mean'],l['cov']) + np.log(self.priors[k])
        
            return(P_hat.argmax(axis = 1))
        
        
        if DistStr == "Multinomial":
            P_hat = np.zeros((N, len(self.K)))
            
            for k, l in self.likelihoods.items():
                #P_hat[:,k] = mlvn.logpdf(X, l['N'],l['P']) + np.log(self.priors[k])
                P_hat[:,k] = mvn.logpdf(X, l['N'],l['P']) + np.log(self.priors[k])
        
            return(P_hat.argmax(axis = 1))
                
                
        if DistStr == "Bernoulli":
            P_hat = np.zeros((N, len(self.K)))
            
            for k, l in self.likelihoods.items():
                # REWRITE THIS CORRECTLY
                P_hat[:,k] = brn.logpdf(X, l['P']) + np.log(self.priors[k])
            return(P_hat.argmax(axis = 1))
        

In [47]:
# function definition 
def accuracy(y,y_hat):
    return np.mean(y == y_hat)

In [40]:
df = pd.read_csv('xor.csv')

In [41]:
X = df.to_numpy()
y = X[:,-1] # storing just the labels
X = X[:,:-1] # Dropping the the last column, AKA the labels

In [42]:
g = GenBayes()
# gm = GenBayes()
# gb = GenBayes()

In [43]:
g.fit(X, y,'Gauss')

In [45]:
y_hat = g.predict(X, 'Gauss')

## Naive Gaussian Bayes

In [1]:
class GaussNB():
        
    def fit(self, X, y, epsilon = 1e-2):
        self.likelihoods = dict() # 
        self.priors = dict() #
        
        self.K = set(y.astype(int)) #
        
        for k in self.K:
            
            X_k  = X[y==k,:] 
            self.likelihoods[k] = {"mean" : X_k.mean(axis=0), "cov": X_k.var(axis = 0) + epsilon } 
            self.priors[k] = len(X_k)/len(X)  #        

    def predictNB(self, X):
        
        N,D = X.shape 
        P_hat = np.zeros((N,len(self.K))) 
        
        for k, l in self.likelihoods.items():
            P_hat[:,k] = mvn.logpdf(X,l["mean"], l["cov"]) + np.log(self.priors[k])
            
        return P_hat.argmax(axis=1) # 
        

## Gaussian Bayes 

In [2]:
class GaussBayes(): # not naive 
    
    
    def fit(self, X, y, epsilon = 1e-2):

        
        self.likihoods = dict() # 
        self.priors = dict() #     
        self.K = set(y.astype(int)) # 
        
        for k in self.K:   
            X_k = X[y==k,:]
            N_k, D = X_k.shape
            mu_k = X_k.mean(axis = 0)
            self.likihoods[k] = {"mean" : X_k.mean(axis=0), "cov": (1/(N_k - 1))*np.matmul((X_k - mu_k).T, X_k - mu_k) + epsilon*np.identity(D) } 
            # when multiplying a matrix by its transpose is what? What does that do
            self.priors[k] = len(X_k)/len(X)  
        
    
    # Now we get to the prediction
    def predictGB(self, X):
        N,D = X.shape # 
        P_hat = np.zeros((N,len(self.K))) # this is a tuple
        
        for k, l in self.likihoods.items(): 
 
            P_hat[:,k] = mvn.logpdf(X,l["mean"], l["cov"]) + np.log(self.priors[k]) # getting a function that gives us the probablity density
            
        return P_hat.argmax(axis=1) # maximum postier estimator

## Bernoulli Bayes 

In [3]:
class BernBayes():
  def fit(self, X, y, epsilon = 1e-3):
    N, D = X.shape
    self.likelihoods = {}
    self.priors = {}
    self.K = set(y.astype(int))

    for k in self.K:
      X_k = X[y==k,:]
      p = (sum(X_k)+1) / (len(X_k)+2)
      self.likelihoods[k] = {'mean': p, 'cov': p * (1 - p) + epsilon}
      self.priors[k] = len(X_k)/len(X)

  def predict(self, X):
    N, D = X.shape
    P_hat = np.zeros((N, len(self.K)))

    for k,l in self.likelihoods.items():
      # Using the Bernoulli funtion/formula. Trick is to get the matrices/vectors to go from mxn to a 1x1 number for each k value.
      P_hat[:,k] = np.log(self.priors[k]) + np.matmul(X, np.log(l['mean'])) + np.matmul((1 - X), np.log(abs(1-l['mean'])))

    return P_hat.argmax(axis =1)

## Multinomial Bayes

In [5]:
class GaussMB(): # not naive 
    
    
    def fit(self, X, y, epsilon = 1e-3):

        
        self.likihoods = dict() # 
        self.priors = dict() #     
        self.K = set(y.astype(int)) # 
        
        for k in self.K:
            X_k = X[y == k,:] # All the values in class k
            N_k, D = X_k.shape # N_k number of observations, D is number of features
            mu_k = X_k.mean(axis=0)
                
            N = len(X)
            self.likihoods[k] = {"N" : N, "P" : sum(N_k/len(X))}
            self.priors[k] = len(X_k)/len(X)  
        
    
    # Now we get to the prediction
    def predictMB(self, X):
        N,D = X.shape # 
        P_hat = np.zeros((N, len(self.K)))
            
        for k, l in self.likelihoods.items():
            P_hat[:,k] = mlvn.logpmf(X, l['N'],l['P']) + np.log(self.priors[k])
            #P_hat[:,k] = mvn.logpdf(X, l['N'],l['P']) + np.log(self.priors[k])
        
        return(P_hat.argmax(axis = 1))

## K - Nearest Neighbors

In [6]:
class KNNB():
    def fit(self, X, y):
        self.X = X
        self.y = y
        
    def predictKNN(self,X, K, epsilon = 1e-3):
        N = len(X) # has no self, this was the X it was given in the parameters, self.X and X are different variables
        y_hat = np.zeros(N)
        
        for i in range(N): # we are going through every single point. So thats a good thing or bad thing
            #   use cases... wanted to find out all the members of a neighborhood 
            #   use cases... have map of the US that is built from lights that satellites can see, and we want to map the cities using that light
            #   use cases... a model that mapped the disbursion of the dieses of the rona using the light map model from satellites 
            dist2 = np.sum((self.X - X[i])**2, axis=1) # by substracting from the ith member of x, we are getting the distance of each
            idxt = np.argsort(dist2)[:K] # going to sort to each based on the distance 
            gamma_K = 1/(np.sqrt(dist2[idxt])+ epsilon) # we add epislon to avoid division by 0
            y_hat[i] =  np.bincount(self.y[idxt], weights = gamma_K).argmax() # we want the smallest gamma_k
            # taking the biggest chunks and summing them
            # getting the probability that 
            # bincount produces the probability
            # the argmax gives the actual value of the class, the maximum class
            # bincount adds chunks, we're getting the distances of the sections we're adding
            # 
        return  y_hat

In [None]:
gnbA_0 = accuracy(y_test[y_test == 0],y_hat[y_test == 0])
gnbA_1 = accuracy(y_test[y_test == 1],y_hat[y_test == 1])
gnbA_2 = accuracy(y_test[y_test == 2],y_hat[y_test == 2])
gnbA_3 = accuracy(y_test[y_test == 3],y_hat[y_test == 3])
gnbA_4 = accuracy(y_test[y_test == 4],y_hat[y_test == 4])
gnbA_5 = accuracy(y_test[y_test == 5],y_hat[y_test == 5])
gnbA_6 = accuracy(y_test[y_test == 6],y_hat[y_test == 6])
gnbA_7 = accuracy(y_test[y_test == 7],y_hat[y_test == 7])
gnbA_8 = accuracy(y_test[y_test == 8],y_hat[y_test == 8])
gnbA_9 = accuracy(y_test[y_test == 9],y_hat[y_test == 9])

gnb_A = [gnbA_0, gnbA_1, gnbA_2, gnbA_3, gnbA_4, gnbA_5, gnbA_6, gnbA_7, gnbA_8, gnbA_9]

In [None]:
gb_A