In [1]:
import mat73
import numpy as np
from scipy.stats import norm, multivariate_normal
from naive_bayes import NaiveBayes

In [2]:
test_data = mat73.loadmat('preprocessed_data/all_subjects/test.mat')

In [20]:
train_data = mat73.loadmat('preprocessed_data/all_subjects/train.mat')

In [59]:
test_data_one_sample = mat73.loadmat('preprocessed_data/one_subject/test.mat')
train_data_one_sample = mat73.loadmat('preprocessed_data/one_subject/train.mat')

In [None]:
# Which class are we predicting
CLASS = 'visible'

In [3]:
cov = np.load(('distributions/' + CLASS + '/cov.npy'))
mu_bar = np.load(('distributions/' + CLASS + '/mu_bar.npy'))
single_normal = np.load(('distributions/' + CLASS + '/single_normal.npy'))
parents = np.load(('distributions/' + CLASS + '/parents.npy'))
parents = parents.astype(int)

In [71]:
import numpy as np
from scipy.stats import multivariate_normal, norm
import math

class Model():

    def __init__(self, 
                 single_normal, 
                 mu_bar, 
                 cov, 
                 parents, 
                 priors,
                 num_classes=2, 
                 num_dims=300):

        self.single_normal = single_normal
        self.mu_bar = mu_bar
        self.cov = cov
        self.parents = parents
        self.priors = priors
        self.num_classes = num_classes
        self.num_dims = num_dims
        self.mu_parents = single_normal[parents, :, 0]
        self.sig_parents = single_normal[parents, :, 1]

    def _normal_pdf(self, x, mu, sigma):
        coef = 1 / (sigma * math.sqrt(2 * math.pi))
        return coef * math.exp(-0.5 * math.pow((x - mu) / sigma, 2))

    def classify(self, x):
        likelihood = np.ones((self.num_classes,))
        for c in range(self.num_classes):

            node_probabilities = np.ones((self.num_dims, ))

            mu_0 = self.single_normal[0][c][0]
            sigma_0 = self.single_normal[0][c][1]

            node_probabilities[0] = self._normal_pdf(x[0], mu_0, sigma_0) 

            for node in range(1, self.num_dims):
                mu_bar = self.mu_bar[node, c]
                cov = self.cov[node, c]

                mu_parent = self.mu_parents[node, c] 
                sigma_parent = self.sig_parents[node, c] 

                node_value = x[node]
                parent_value = x[self.parents[node]]
                joint_value = np.array([node_value, parent_value])

                joint_prob = multivariate_normal.pdf(joint_value, mu_bar, cov)
                parent_prob = self._normal_pdf(parent_value, mu_parent, sigma_parent)
                node_probabilities[node] = joint_prob / parent_prob

            log_node_probabilities = np.log(node_probabilities) 
            likelihood[c] = np.sum(log_node_probabilities) + math.log(self.priors[c])

        pred = np.argmax(likelihood)
        return pred, likelihood

    def run_test(self, X, y):
        N, _ = X.shape

        correct = 0
        correct_local = 0

        predictions = []
        for i in range(N):
            x = X[i]
            prediction, likelihood = self.classify(x)
            predictions.append(prediction)

            if prediction == y[i]:
                correct += 1
                correct_local += 1

            if i % 100 == 0 and i != 0:
                print(f'Running Accuracy: {correct / i} windowed: {correct_local / 100} for {i} samples')
                print(predictions)
                predictions = []
                correct_local = 0

        accuracy = correct / N
        print(f'Accuracy is {accuracy}')

### Naive Bayes

In [38]:
import numpy as np
from scipy.stats import norm, multivariate_normal

class NaiveBayes():

    def __init__(self, single_normal, priors, num_classes=2, num_dims=300):
        self.single_normal = single_normal
        self.num_classes = num_classes
        self.num_dims = num_dims
        self.priors = priors

    def classify(self, x):
        likelihood = np.zeros((self.num_classes, ))

        for c in range(self.num_classes):
            mus = self.single_normal[:, c, 0]
            sigmas = self.single_normal[:, c, 1]
            node_probs = norm.pdf(x, mus, sigmas)
            log_node_probs = np.log(node_probs)
            likelihood[c] = np.sum(log_node_probs) + self.priors[c]

        return np.argmax(likelihood)

    # def classi(self, x):
    #     node_probs = norm.pdf(
    #         x, 
    #         self.single_normal[:, :, 0], 
    #         self.single_normal[:, :, 1]
    #     )
    #     x_given_class = np.prod(node_probs, axis=0)
    #     return np.argmax(x_given_class)

    def test(self, X, y):
        N, _ = X.shape
        correct = 0

        num_predicted_0 = 0
        num_predicted_1 = 0

        for i in range(N):
            x = X[i]
            prediction = self.classify(x)

            if prediction == 0:
                num_predicted_0 += 1
            elif prediction == 1:
                num_predicted_1 += 1
            else:
                assert False

            if prediction == y[i]:
                correct += 1

            if i % 500 == 0 and i != 0:
                print(f'Running Accuracy is {correct / i} for {i} samples')
                print(f'    num 1s: {num_predicted_1}, num 0s: {num_predicted_0}')

### Test

In [47]:
test_data_dict = test_data['test']
test_X = test_data_dict['data']
test_X = np.array(test_X)
test_y = test_data_dict[CLASS]
test_y = np.array(test_y)

train_data_dict = train_data['train']
train_y = train_data_dict[CLASS]

# Calculate Priors
prior_1 = np.sum(train_y) / len(train_y)
prior_0 = 1 - prior_1
priors_state = [prior_0, prior_1]


model = Model(single_normal, mu_bar, cov, parents, priors_state)
model.run_test(test_X, test_y)

# nb = NaiveBayes(single_normal, priors)
# nb.test(test_X, test_y)


Running Accuracy: 0.42 windowed: 0.42 for 100 samples
Running Accuracy: 0.405 windowed: 0.39 for 200 samples
Running Accuracy: 0.4 windowed: 0.39 for 300 samples
Running Accuracy: 0.4 windowed: 0.4 for 400 samples
Running Accuracy: 0.402 windowed: 0.41 for 500 samples
Running Accuracy: 0.41833333333333333 windowed: 0.5 for 600 samples
Running Accuracy: 0.41285714285714287 windowed: 0.38 for 700 samples
Running Accuracy: 0.41375 windowed: 0.42 for 800 samples
Running Accuracy: 0.4166666666666667 windowed: 0.44 for 900 samples
Running Accuracy: 0.416 windowed: 0.41 for 1000 samples
Running Accuracy: 0.4218181818181818 windowed: 0.48 for 1100 samples
Running Accuracy: 0.42083333333333334 windowed: 0.41 for 1200 samples
Running Accuracy: 0.42230769230769233 windowed: 0.44 for 1300 samples
Running Accuracy: 0.42 windowed: 0.39 for 1400 samples
Running Accuracy: 0.4226666666666667 windowed: 0.46 for 1500 samples
Running Accuracy: 0.4225 windowed: 0.42 for 1600 samples
Running Accuracy: 0.419

KeyboardInterrupt: 

In [57]:
cov_state_intuit = np.load('data/node_probs_state_intuit/cov.npy')
mu_bar_state_intuit = np.load('data/node_probs_state_intuit/mu_bar.npy')
single_normal_state_intuit = np.load('data/node_probs_state_intuit/single_normal.npy')
parents_intuit = np.arange(-1, 299)

In [56]:
test_data_dict = test_data['test']
test_X = test_data_dict['data']
test_X = np.array(test_X)
test_y_vis = test_data_dict['visible']
test_y_vis = np.array(test_y_vis)

train_data_dict = train_data['train']
train_y_vis = train_data_dict['visible']
prior_1 = np.sum(train_y_vis) / len(train_y_vis)
prior_0 = 1 - prior_1
priors_state = [prior_0, prior_1]


In [58]:
model = Model(single_normal_state_intuit, mu_bar_state_intuit, cov_state_intuit, parents_intuit, priors_state)
model.run_test(test_X, test_y)

Running Accuracy: 0.42 windowed: 0.42 for 100 samples
Running Accuracy: 0.41 windowed: 0.4 for 200 samples
Running Accuracy: 0.4033333333333333 windowed: 0.39 for 300 samples
Running Accuracy: 0.4 windowed: 0.39 for 400 samples
Running Accuracy: 0.402 windowed: 0.41 for 500 samples
Running Accuracy: 0.41833333333333333 windowed: 0.5 for 600 samples
Running Accuracy: 0.41285714285714287 windowed: 0.38 for 700 samples
Running Accuracy: 0.41375 windowed: 0.42 for 800 samples
Running Accuracy: 0.4177777777777778 windowed: 0.45 for 900 samples
Running Accuracy: 0.417 windowed: 0.41 for 1000 samples
Running Accuracy: 0.4218181818181818 windowed: 0.47 for 1100 samples
Running Accuracy: 0.42083333333333334 windowed: 0.41 for 1200 samples
Running Accuracy: 0.42230769230769233 windowed: 0.44 for 1300 samples
Running Accuracy: 0.42 windowed: 0.39 for 1400 samples
Running Accuracy: 0.42333333333333334 windowed: 0.47 for 1500 samples
Running Accuracy: 0.423125 windowed: 0.42 for 1600 samples
Runnin

  log_node_probabilities = np.log(node_probabilities)


Running Accuracy: 0.40479166666666666 windowed: 0.43 for 4800 samples
Running Accuracy: 0.40551020408163263 windowed: 0.44 for 4900 samples
Running Accuracy: 0.4088 windowed: 0.57 for 5000 samples
Running Accuracy: 0.41 windowed: 0.47 for 5100 samples


  node_probabilities[node] = joint_prob / parent_prob


Running Accuracy: 0.4125 windowed: 0.54 for 5200 samples
Running Accuracy: 0.4130188679245283 windowed: 0.44 for 5300 samples
Running Accuracy: 0.4137037037037037 windowed: 0.45 for 5400 samples
Running Accuracy: 0.4172727272727273 windowed: 0.61 for 5500 samples
Running Accuracy: 0.41875 windowed: 0.5 for 5600 samples
Running Accuracy: 0.4203508771929825 windowed: 0.51 for 5700 samples
Running Accuracy: 0.42120689655172416 windowed: 0.47 for 5800 samples
Running Accuracy: 0.4216949152542373 windowed: 0.45 for 5900 samples
Running Accuracy: 0.4221666666666667 windowed: 0.45 for 6000 samples
Running Accuracy: 0.4242622950819672 windowed: 0.55 for 6100 samples
Running Accuracy: 0.42612903225806453 windowed: 0.54 for 6200 samples
Running Accuracy: 0.4265079365079365 windowed: 0.45 for 6300 samples


KeyboardInterrupt: 

In [60]:
root = 'data/node_probs_state_intuit_one_patient/'
cov_state_intuit_one = np.load(root + 'cov.npy')
mu_bar_state_intuit_one = np.load(root + 'mu_bar.npy')
single_normal_state_intuit_one = np.load(root + 'single_normal.npy')
parents_intuit = np.arange(-1, 299)

In [61]:
test_data_dict = test_data_one_sample['test']
test_X = test_data_dict['data']
test_X = np.array(test_X)
test_y = test_data_dict['visible']
test_y = np.array(test_y)

train_data_dict = train_data_one_sample['train']
train_y = train_data_dict['visible']
prior_1 = np.sum(train_y) / len(train_y)
prior_0 = 1 - prior_1
priors = [prior_0, prior_1]

[0.37524240465416936, 0.6247575953458306]


In [73]:
model = Model(
    single_normal=single_normal_state_intuit_one, 
    mu_bar=mu_bar_state_intuit_one, 
    cov=cov_state_intuit_one,
    parents=parents_intuit,
    priors=priors
    )

nb = NaiveBayes(single_normal=single_normal_state_intuit_one, priors=priors)
nb.test(test_X, test_y)

Running Accuracy is 0.524 for 500 samples
    num 1s: 104, num 0s: 397
