In [56]:
import numpy as np
from load_data import *

In [57]:
TRAIN_PATH = './atis/atis_train_actual.csv'
TEST_PATH = './atis/atis_test_actual.csv'

In [58]:
data_loader = SnipsDataLoader(train_path=TRAIN_PATH, valid_path=None, test_path=TEST_PATH)
data_loader.split_train_valid(valid_size=0.05, keep_class_ratios=True)

In [59]:
X_train, y_train = data_loader.get_train_data()
X_valid, y_valid = data_loader.get_valid_data()

In [60]:
feature_extractor = FeatureExtractor(X_train, X_valid)
feature_extractor.extract_features(keep_words_threshold=5)
X_train = feature_extractor.get_train_encodings()
X_valid = feature_extractor.get_valid_encodings()

In [61]:
class MultinomialNaiveBayes():
    def __init__(self):
        pass
    
    def fit(self, X, y):
        num_examples, vocab_size = X.shape
        num_labels = np.amax(y) + 1
        y_one_hot = np.eye(num_labels)[y]
        X_row_sum = np.sum(X, axis=1, keepdims=True)
        
        self.vocab_probs = (1 + np.dot(X.T, y_one_hot)) / (vocab_size + np.dot(X_row_sum.T, y_one_hot))
        self.prior_probs = np.mean(y_one_hot, axis=0)
        self.vocab_log_probs = np.log(self.vocab_probs)
        self.prior_log_probs = np.log(self.prior_probs)
    
    def predict(self, X):
        post_probs = np.dot(X, self.vocab_log_probs) + self.prior_log_probs
        predictions = np.argmax(post_probs, axis=1)
        return predictions

In [62]:
def calculate_accuracy(predictions, targets):
    return np.mean(predictions == targets)

In [63]:
model = MultinomialNaiveBayes()
model.fit(X_train, y_train)
y_predict = model.predict(X_valid)
calculate_accuracy(y_predict, y_valid)

0.9256198347107438