### Naive Bayes Training Script
#### Author: Austin Fernandez
#### Date Modified: April 25, 2020

In [1]:
# dependencies
import csv
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.naive_bayes import CategoricalNB, ComplementNB, MultinomialNB, GaussianNB
# from sklearn.naive_bayes import ComplementNB, MultinomialNB
import numpy as np
from scipy.stats import uniform, norm
from sklearn.metrics import plot_confusion_matrix
from imblearn.over_sampling import SMOTE, RandomOverSampler
from sklearn.preprocessing import MinMaxScaler
from XLB import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
x_train, y_train = extract_data("TrainingSet (Labeled Only).csv")
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)

In [None]:
rand_seed = 3454132

oversampler = SMOTE(sampling_strategy="not majority",random_state=rand_seed)

x_smote, y_smote = oversampler.fit_resample(x_train,y_train)

print(x_smote.shape,y_smote.shape)

oversampler = RandomOverSampler(sampling_strategy="not majority",random_state=rand_seed)

x_os, y_os = oversampler.fit_resample(x_train,y_train)

print(x_os.shape,y_os.shape)

In [3]:
# GaussianNB, GridSearch
k_folds = 5

parameters = {
    'var_smoothing' : [j / np.power(10,x) for x in range(1,10) for j in range(1,10)]
}
cnb = GaussianNB()
grid_search_gaussian = GridSearchCV(cnb, parameters,cv=k_folds,n_jobs=-1)

TypeError: __init__() got an unexpected keyword argument 'random_state'

In [None]:
print_res("Vanilla",x_train,y_train,grid_search_gaussian)

In [None]:
print_res("Random Oversampling",x_os,y_os,grid_search_gaussian)

In [None]:
print_res("SMOTE",x_smote,y_smote,grid_search_gaussian)

In [4]:
# MultinomialNB, Grid Search
k_folds = 5
parameters = {
    'alpha':[0.01 * i for i in range(1,101)], 
    'fit_prior':[True, False]
}
mnb = MultinomialNB()
grid_search_multinomial = GridSearchCV(mnb, parameters,cv=k_folds,n_jobs=-1)

TypeError: __init__() got an unexpected keyword argument 'random_state'

In [None]:
print_res("Vanilla",x_train[:,0:4],y_train,grid_search_multinomial)

In [None]:
print_res("Random Oversampling",x_os[:,0:4],y_os,grid_search_multinomial)

In [None]:
print_res("SMOTE",x_smote[:,0:4],y_smote,grid_search_multinomial)

In [5]:
# CategoricalNB, Grid Search
k_folds = 5
parameters = {
    'alpha':[0.01 * i for i in range(1,101)], 
    'fit_prior':[True, False]
}
canb = CategoricalNB()
grid_search_categorical = GridSearchCV(canb, parameters,cv=k_folds)

TypeError: __init__() got an unexpected keyword argument 'random_state'

In [None]:
print_res("Vanilla",x_train[:,:5],y_train,grid_search_categorical)

In [None]:
print_res("Random Oversampling",x_os[:,:5],y_os,grid_search_categorical)

In [None]:
print_res("SMOTE",x_smote[:,:5],y_smote,grid_search_categorical)

In [7]:
# ComplementNB, Grid Search
k_folds = 5
parameters = {
    'alpha':[0.01 * i for i in range(1,101)], 
    'fit_prior':[True, False], 
    'norm' : [True, False]}
cnb = ComplementNB()
grid_search_complement = GridSearchCV(cnb, parameters,cv=k_folds,n_jobs=-1)

In [None]:
print_res("Vanilla",x_train,y_train,grid_search_complement)

In [None]:
print_res("Random Oversampling",x_os,y_os,grid_search_complement)

In [None]:
print_res("SMOTE",x_smote,y_smote,grid_search_complement)

In [8]:
# ComplementNB, RandomizedSearch
random_search_iterations = 1000
k_folds = 5
rand_seed = 3249807

parameters = {
    'alpha':uniform(loc=0,scale=1.0), 
    'fit_prior':[True, False], 
    'norm' : [True, False]
}
cnb = ComplementNB()
random_search_complement = RandomizedSearchCV(cnb, parameters,cv=k_folds,\
                                              n_iter=random_search_iterations,\
                                              random_state=rand_seed,n_jobs=-1)

TypeError: __init__() got an unexpected keyword argument 'random_state'

In [None]:
print_res("Vanilla",x_train,y_train,random_search_complement)

In [None]:
print_res("Random Oversampling",x_os,y_os,random_search_complement)

In [None]:
print_res("SMOTE",x_smote,y_smote,random_search_complement)

In [9]:
# CategoricalNB, RandomizedSearch
random_search_iterations = 1000
k_folds = 5

parameters = {
    'alpha':uniform(loc=0,scale=1.0), 
    'fit_prior':[True, False]
}
canb = CategoricalNB()
random_search_categorical = RandomizedSearchCV(canb, parameters,cv=k_folds,\
                                           n_iter=random_search_iterations,\
                                            random_state=rand_seed)

TypeError: __init__() got an unexpected keyword argument 'random_state'

In [None]:
print_res("Vanilla",x_train[:,:5],y_train,random_search_categorical)

In [None]:
print_res("Random Oversampling",x_os[:,:5],y_os,random_search_categorical)

In [None]:
print_res("SMOTE",x_smote[:,:5],y_smote,random_search_categorical)

In [None]:
# MultinomialNB, RandomizedSearch
random_search_iterations = 1000
k_folds = 5

parameters = {
    'alpha':uniform(loc=0,scale=1.0), 
    'fit_prior':[True, False]
}
mnb = MultinomialNB()
random_search_multinomial = RandomizedSearchCV(mnb, parameters,cv=k_folds,\
                                            n_iter=random_search_iterations,\
                                            random_state=rand_seed,n_jobs=-1)

In [None]:
print_res("Vanilla",x_train,y_train,random_search_multinomial)

In [None]:
print_res("Random Oversampling",x_os,y_os,random_search_multinomial)

In [None]:
print_res("SMOTE",x_smote,y_smote,random_search_multinomial)