## DO NOT USE FOR LOOP ON number of samples N but ONLY ON number of classes C

In [1]:
import numpy as np
from sklearn.datasets import load_iris, load_digits, load_digits
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis, LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.preprocessing import Binarizer

def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Gaussian Discriminant Analysis

In [2]:
data = load_iris()
X_train, y_train = data.data, data.target

In [3]:
def compute_priors(X, y):
    """
    Prior probability for each class 
    
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 

    Returns:
    - priors : array of shape (C,)
    """
    C = (np.max(y) + 1)
    N = len(y)
    priors = np.zeros(C)
    temp = np.bincount(y)
    #priors = np.bincount(y)
    #return (1 / N) * priors
    for i in range(C):
        priors[i] = temp[i] / N
    return priors

In [4]:
sk_model = QuadraticDiscriminantAnalysis()
sk_model.fit(X_train, y_train)

priors = compute_priors(X_train, y_train)
error = rel_error(sk_model.priors_, priors)
print(error)
assert  error < 1e-12

0.0


In [5]:
def compute_means(X, y):
    """
    Mean estimate for each class, NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 

    Returns:
    - means : array of shape (C, D)
    """
    N, D = X.shape    
    C = (np.max(y) + 1)
    means = np.zeros((C, D))
    
    I = np.zeros(N)
    I = y
    Nc = np.bincount(y)
    zeros = np.zeros(N)
    ones = np.ones(N)
    temp = np.zeros(X.shape)

    for i in range(C):
        I = np.where(y == i, ones, zeros)
        temp = np.transpose(np.full((D,N), I)) * X
        means[i] = (1 / Nc[i]) * np.sum(temp, axis=0)
    return means
    

In [6]:
sk_model = QuadraticDiscriminantAnalysis()
sk_model.fit(X_train, y_train)

means = compute_means(X_train, y_train)
error = rel_error(sk_model.means_, means)
print(error)
assert  error < 1e-12

8.372722659314906e-17


In [7]:
def compute_sigmas_gda(X, y, means):
    """
    Covariance estimate for each class, NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE np.cov
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - means: array of shape (C, D)

    Returns:
    - covariances : array of shape (C, D, D)
    """
    N, D = X.shape    
    C = (np.max(y) + 1)
    covariances = np.zeros((C, D, D))
    
    classmean = np.zeros(X.shape)
    I = np.zeros(N)
    I_2d = np.zeros(X.shape)
    zeros = np.zeros(N)
    ones = np.ones(N)
    temp = np.zeros(X.shape)
    Nc = np.bincount(y)
    for c in range(C):
        I = np.where(y == c, ones, zeros)
        I_2d = np.transpose(np.full((D,N), I))
        classmean = np.full((N, D), means[c])
        temp = I_2d * (X - classmean)
        covariances[c] =  (np.transpose(temp) @ temp)
        covariances[c] *= 1 / (Nc[c] - 1)
    return covariances

In [8]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)

covariances = compute_sigmas_gda(X_train, y_train, sk_model.means_)
error = rel_error(np.asarray(sk_model.covariance_), covariances)
print(error)
assert  error < 1e-12

5.920351755031412e-16


In [9]:
def compute_sigma_lda(X, y, means):
    """
    Covariance estimate for LDA, NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE np.cov
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - means: array of shape (C, D)

    Returns:
    - covariance : array of shape (D, D)
    """
    N, D = X.shape    
    C = (np.max(y) + 1)
    covariance = np.zeros((D, D))
    
    classmean = np.zeros(X.shape)
    I = np.zeros(N)
    I_2d = np.zeros(X.shape)
    zeros = np.zeros(N)
    ones = np.ones(N)
    temp = np.zeros(X.shape)
    for c in range(C):
        I = np.where(y == c, ones, zeros)
        I_2d = np.transpose(np.full((D,N), I))
        classmean = np.full((N, D), means[c])
        temp = I_2d * (X - classmean)
        covariance +=  (np.transpose(temp) @ temp)
    covariance *= 1 / N
    return covariance

In [10]:
sk_model = LinearDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)

covariances = compute_sigma_lda(X_train, y_train, sk_model.means_)
error = rel_error(np.asarray(sk_model.covariance_), covariances)
print(error)
assert  error < 1e-12

1.0823081332523129e-16


In [11]:
def compute_log_posterior_lda(X, C, priors, means, covariance):
    """
    Covariance log posterior for each class and observation, 
    NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE scipy or np multivariate gaussian
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - C: number of classes
    - priors : array of shape (C,)
    - means : array of shape (C, D)
    - covariance : array of shape (D, D)

    Returns:
    - log_posterior : array of shape (N, C)
    """
    N, D = X.shape    
    log_posterior = np.zeros((N, C))
    W = np.zeros((C,D))
    b = np.zeros(C)
    
    priors_N = np.zeros(N)
    diff = np.zeros(X.shape)
    classmean = np.zeros(X.shape)
    temp = np.zeros((N, D))
    for c in range(C):
        priors_N = np.full(N, priors[c])
        classmean = (np.full((N, D), means[c]))
        diff = X - classmean
        temp = diff @ np.linalg.inv(covariance)
        log_posterior[:, c] = np.sum(temp * diff, axis=1)
        log_posterior[:, c] = np.log(priors_N) - (1 / 2.0) * log_posterior[:, c]
    return log_posterior

In [12]:
# NO TEST FOR LOG-POSTERIOR LDA. Mitambatra eo ambany ny test

In [13]:
def compute_log_posterior_gda(X, C, priors, means, covariances):
    """
    Covariance log posterior for each class and observation, 
    NO FOR LOOP ON number of samples N but ONLY ON number of classes C
    DO NOT USE scipy or np multivariate gaussian
    Inputs:
    - X: array of shape (N, D) 
    - y: array of shape (N,) 
    - C: number of classes
    - priors : array of shape (C,)
    - means : array of shape (C, D)
    - covariances : array of shape (C, D, D)

    Returns:
    - log_posterior : array of shape (N, C)
    """
    N, D = X.shape    
    log_posterior = np.zeros((N, C))
    
    
    priors_N = np.zeros(N)
    diff = np.zeros(X.shape)
    classmean = np.zeros(X.shape)
    temp = np.zeros((N, D))
    for c in range(C):
        priors_N = np.full(N, priors[c])
        classmean = (np.full((N, D), means[c]))
        diff = X - classmean
        temp = diff @ np.linalg.inv(covariances[c])
        log_posterior[:, c] = np.sum(temp * diff, axis=1)
        log_posterior[:, c] = np.log(priors_N) - (1 / 2.0) * log_posterior[:, c] 
        log_posterior[:, c] -= (1 / 2.0) * np.log(np.linalg.det(covariances[c]))
    return log_posterior

In [14]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)

C = (np.max(y_train) + 1)
log_posterior = compute_log_posterior_gda(X_train, C, sk_model.priors_, sk_model.means_, sk_model.covariance_)
error = rel_error(np.asarray(sk_model._decision_function(X_train)), log_posterior)
print(error)
assert  error < 1e-12

6.605541792973586e-14


In [15]:
class ProbClassifier():
    def fit(self, X, y):
        pass
    
    def compute_log_posterior(self, X):
        pass
    
    def predict(self, X):
        log_post = self.compute_log_posterior(X)
        return np.argmax(log_post, axis=1)
    
    def predict_proba(self, X):
        log_post = self.compute_log_posterior(X)
        return np.exp(log_post) / np.sum(np.exp(log_post), axis=1)[:, None]

In [16]:
class LDA(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.means = None
        self.cov = None
        self.C = None
    
    def fit(self, X, y):
        self.C = (np.max(y) + 1)
        self.priors = compute_priors(X, y)
        self.means = compute_means(X, y)
        self.cov = compute_sigma_lda(X, y, self.means)
    
    def compute_log_posterior(self, X):
        return compute_log_posterior_lda(X, self.C, self.priors, self.means, self.cov)

In [17]:
sk_model = LinearDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

lda = LDA()
lda.fit(X_train, y_train)
pred = lda.predict(X_train)

assert (sk_pred == pred).all()
print("Accuracy scikit-learn : ", accuracy_score(y_train, sk_pred))
print("Your Accuracy : ", accuracy_score(y_train, pred))

Accuracy scikit-learn :  0.98
Your Accuracy :  0.98


In [18]:
class QDA(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.means = None
        self.cov = None
        self.C = None
    
    def fit(self, X, y):
        self.C = (np.max(y) + 1)
        self.priors = compute_priors(X, y)
        self.means = compute_means(X, y)
        self.cov = compute_sigmas_gda(X, y, self.means)
    
    def compute_log_posterior(self, X):
        return compute_log_posterior_gda(X, self.C, self.priors, self.means, self.cov)

In [19]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

qda = QDA()
qda.fit(X_train, y_train)
pred = qda.predict(X_train)

assert (sk_pred == pred).all()
print("Accuracy scikit-learn : ", accuracy_score(y_train, sk_pred))
print("Your Accuracy : ", accuracy_score(y_train, pred))

Accuracy scikit-learn :  0.98
Your Accuracy :  0.98


In [20]:
sk_model = QuadraticDiscriminantAnalysis(store_covariance=True)
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict_proba(X_train)

qda = QDA()
qda.fit(X_train, y_train)
pred = qda.predict_proba(X_train)

error = rel_error(pred, sk_pred)
print(error)
assert error < 1e-12

1.157450494397167e-14


# Naive Bayes Classifiers

##  Bernouilli Naive Bayes

In [21]:
data = load_digits()
X_train2, y_train2 = data.data, data.target
X_train2_transf = Binarizer().fit_transform(X_train2)

In [106]:
class BernouilliNaiveBayes(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.C = None
        self.theta = None
    
    def fit(self, X, y):
        """
        Estimate the parameter theta
        NO FOR LOOP ON number of samples N but ONLY ON number of classes C
        DO NOT USE scipy or np density
        """
        N, D = X.shape
        self.C = (np.max(y) + 1)
        self.theta = np.zeros((D, self.C))
        self.priors = np.zeros(self.C)
        Nc = np.bincount(y)
        self.priors = Nc / N
        for c in range(self.C):
            self.theta[:, c] = np.sum(X[y == c], axis=0) / Nc[c]
    
    def compute_log_posterior(self, X):
        N, D = X.shape
        log_post = np.zeros((N,self.C))
        # YOUR CODE HERE
        #raise NotImplementedError()
        classprior = np.zeros(N)
        classtheta = np.zeros(X.shape)
        X_inv = np.zeros(X.shape)
        X_inv = np.where(X == 1, 0, 1)
        for c in range(self.C):
            classprior = np.full(N, self.priors[c])
            classtheta = np.full(X.shape, self.theta[:, c])
            log_post[:, c] = np.log(classprior) + np.sum(np.log((X * classtheta) +  X_inv * (1 - classtheta)+ 1e-8), axis=1) 
        return log_post

In [107]:
sk_model = BernoulliNB()
sk_model.fit(X_train2_transf, y_train2)
sk_pred = sk_model.predict(X_train2_transf)

model = BernouilliNaiveBayes()
model.fit(X_train2_transf, y_train2)
pred = model.predict(X_train2_transf)

sk_acc = accuracy_score(y_train2, sk_pred)
model_acc = accuracy_score(y_train2, pred)
print("Accuracy scikit-learn : ", sk_acc)
print("Your Accuracy : ", model_acc)
assert sk_acc - model_acc < 0.01

Accuracy scikit-learn :  0.8636616583194212
Your Accuracy :  0.8742348358375069


## Gaussian Naive Bayes

In [66]:
class GaussianNaiveBayes(ProbClassifier):
    def __init__(self):
        self.priors = None
        self.C = None
        self.mu = None
        self.sigma = None
    
    def fit(self, X, y):
        """
        Estimate the parameters mu and sigma
        NO FOR LOOP ON number of samples N but ONLY ON number of classes C
        DO NOT USE scipy or np density
        """
        N, D = X.shape
        self.C = (np.max(y) + 1)
        self.sigma = np.zeros((D, self.C))
        self.mu = np.zeros((D,self.C))
        self.priors = np.zeros(self.C)
        
        I = np.zeros(N)
        full_mu = np.zeros(X.shape)
        I_2d = np.zeros(X.shape)
        zeros = np.zeros(N)
        ones = np.ones(N)
        
        Nc = np.bincount(y)
        
        self.priors = Nc / N
        for c in range(self.C):
            self.mu[:, c] = np.sum(X[y == c], axis=0) / Nc[c]
        for c in range(self. C):
            I = np.where(y == c, ones, zeros)
            I_2d = np.transpose(np.full((D,N), I))
            full_mu = np.full((N, D), self.mu[:, c])
            self.sigma[:, c] = np.sum(I_2d * ((X - full_mu) ** 2), axis=0) / Nc[c]
            
    def compute_log_posterior(self, X):
        N, D = X.shape
        log_post = np.zeros((N,self.C))
        classprior = np.zeros(N)
        
        for c in range(self.C):
            classprior = np.full(N, self.priors[c])
            log_post[:, c] = classprior 
            log_post[:, c] -= (1 / 2.) * np.sum(np.log(2 * np.pi * self.sigma[:, c]) + (((X - self.mu[:, c])**2)/self.sigma[:, c]), axis=1)
        return log_post

In [67]:
sk_model = GaussianNB()
sk_model.fit(X_train, y_train)
sk_pred = sk_model.predict(X_train)

model = GaussianNaiveBayes()
model.fit(X_train, y_train)
pred = model.predict(X_train)

sk_acc = accuracy_score(y_train, sk_pred)
model_acc = accuracy_score(y_train, pred)
print("Accuracy scikit-learn : ", sk_acc)
print("Your Accuracy : ", model_acc)
assert sk_acc - model_acc < 0.01

Accuracy scikit-learn :  0.96
Your Accuracy :  0.96
