In [16]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
import seaborn as sns
from Rashtriya_Raksha_University_Gaussian_NB import rru_gaussian_nb
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import L
from sklearn.metrics import accuracy_score,precision_score,recall_score

In [17]:
class rru_gaussian_nb_scania(rru_gaussian_nb):
    
    def __init__(xerox_copy,data,non_missing_threshold,split_ratio,apply_pca_or_not,n_principal_components):
        
        data.replace(to_replace='na',value=np.nan,inplace=True)
        
        data.dropna(axis=1,inplace=True,thresh=int(non_missing_threshold*data.shape[0]))
        
        data_labels = data['class']
        
        imputer = SimpleImputer()
        
        data_array = imputer.fit_transform(X=data.iloc[:,1:])
        
        data_columns = data.columns
        
        data = pd.DataFrame(data=data_array,columns=data_columns[1:])
        
        np_array_list = list()
        
        for column in data.columns:
    
            data[column] = pd.qcut(x=data[column],q=10,duplicates='drop').cat.codes
        
            np_array_list.append(np.eye(10,10)[data[column]])
            
        data_array = np.concatenate(np_array_list,axis=1)
        
        data = pd.DataFrame(data=data_array)

        data['class'] = data_labels
        
        xerox_copy.data = data
        
        super().__init__(features=data.iloc[:,0:data.shape[1]-1],labels=data['class'],data_split_ratio=split_ratio,
                         apply_pca=apply_pca_or_not,n_components=n_principal_components)

In [18]:
data = pd.read_csv("./aps_failure_training_set.csv",header=None,skiprows=20)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [19]:
column_names = data.iloc[0]

data = pd.read_csv("./aps_failure_training_set.csv",header=None,skiprows=21,names=column_names)

In [20]:
naive_bayes_configs = dict()

for non_na_thresh in np.arange(0.7,1,0.1):
    
    for n_comp in np.arange(20,170,50):
        
        naive_bayes_configs[(non_na_thresh,n_comp)] = rru_gaussian_nb_scania(data,non_na_thresh,(0.8,0.2,0.0),True,n_comp)

In [21]:
naive_bayes_configs

{(0.7, 20): <__main__.rru_gaussian_nb_scania at 0x1c1e48de340>,
 (0.7, 70): <__main__.rru_gaussian_nb_scania at 0x1c1e48deac0>,
 (0.7, 120): <__main__.rru_gaussian_nb_scania at 0x1c1e48de2b0>,
 (0.7999999999999999, 20): <__main__.rru_gaussian_nb_scania at 0x1c1e48de5e0>,
 (0.7999999999999999, 70): <__main__.rru_gaussian_nb_scania at 0x1c1e48de5b0>,
 (0.7999999999999999, 120): <__main__.rru_gaussian_nb_scania at 0x1c1c00d5400>,
 (0.8999999999999999, 20): <__main__.rru_gaussian_nb_scania at 0x1c1f26cd0d0>,
 (0.8999999999999999, 70): <__main__.rru_gaussian_nb_scania at 0x1c1f26cd820>,
 (0.8999999999999999, 120): <__main__.rru_gaussian_nb_scania at 0x1c1e48de430>,
 (0.9999999999999999, 20): <__main__.rru_gaussian_nb_scania at 0x1c1e51ec8b0>,
 (0.9999999999999999, 70): <__main__.rru_gaussian_nb_scania at 0x1c1e5090b20>,
 (0.9999999999999999, 120): <__main__.rru_gaussian_nb_scania at 0x1c1e5090c70>}

In [22]:
naive_bayes = list()

cv_data_list = list()

for obj in naive_bayes_configs.values():
    
    X_resampled,y_resampled = SMOTE(sampling_strategy='minority').fit_sample(X=obj.X_new,y=data['class'])
    
    data_resampled = pd.DataFrame(data=X_resampled)
    
    data_resampled['class'] = data['class']
    
    train_data,cv_data,test_data = obj.data_splitting(data_resampled)
    
    cv_data_list.append(cv_data)
    
    naive_bayes.append(GaussianNB().fit(X=np.array(train_data.iloc[:,0:train_data.shape[1]-1]),y=train_data['label']))

In [23]:
metrics = dict()

for obj,cv_data,config in tuple(zip(naive_bayes,cv_data_list,naive_bayes_configs.keys())):
    
    predicted_category = obj.predict(X=np.array(cv_data.iloc[:,0:cv_data.shape[1]-1]))
    
    acc = accuracy_score(y_true=np.array(cv_data['label']),y_pred=predicted_category)
    
    metrics[config] = {'acc':acc}

In [24]:
metrics

{(0.7, 20): {'acc': 0.9361016949152542},
 (0.7, 70): {'acc': 0.9513559322033899},
 (0.7, 120): {'acc': 0.9513559322033899},
 (0.7999999999999999, 20): {'acc': 0.9408474576271186},
 (0.7999999999999999, 70): {'acc': 0.9533898305084746},
 (0.7999999999999999, 120): {'acc': 0.956864406779661},
 (0.8999999999999999, 20): {'acc': 0.9374576271186441},
 (0.8999999999999999, 70): {'acc': 0.9509322033898305},
 (0.8999999999999999, 120): {'acc': 0.9536440677966102},
 (0.9999999999999999, 20): {'acc': 0.9141525423728813},
 (0.9999999999999999, 70): {'acc': 0.9141525423728813},
 (0.9999999999999999, 120): {'acc': 0.9141525423728813}}