In [None]:
%matplotlib inline
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import minmax_scale
from sklearn.decomposition import PCA


In [None]:
#carrega o dataset
dados = pd.read_csv('/home/erasmor/Downloads/2017/todos_apenas_baixa_representatividade.csv',sep=",",encoding = 'utf-8',  header=0,na_values='.',dtype={'Label':'category'})

#remove valores infinitos
dados.replace(-np.Inf, np.nan)

#substitui valores NaN
dados.fillna(dados.mean())


In [None]:
#mostra como está a base de dados
#dados.head()
dados.dtypes

In [None]:
dados.memory_usage(deep=True)

In [None]:
# verifica quantas instâncias (linhas) e quantos atributos (colunas) a base de dados contém
print("numero de linhas e colunas: ",dados.shape)

In [None]:
#visualizar distribições por classes contidas no csv - informar nome da classe alvo
print(dados.groupby('Label').size())

In [None]:
#X_raw_normalize = MinMaxScaler(X_raw_normalize.reshape(0, 1)).reshape(len(X_raw_normalize))
#X_raw_normalizetd2 = (X_raw_normalize - X_raw_normalize.min(axis=0)) / (X_raw_normalize.max(axis=0) - X_raw_normalize.min(axis=0))
# Obtendo os nomes das colunas do DataFrame como uma lista.
cols = list(dados.columns)
# colunas que nao serao normalizadas
cols.remove('Label')
# Copiando os dados e aplicando a normalizacao por reescala nas colunas do DataFrame que contem
# valores continuos. Por padrao, o metodo minmax_scale reescala com min=0 e max=1.
dados = dados[~dados.isin([np.nan, np.inf, -np.inf]).any(1)]
dados[cols] = dados[cols].apply(minmax_scale)


In [None]:
#define as colunas de atributos e a coluna da classe (de 0 a 72 são atributos e após a 72 é a classe)
# "X_raw" é features/atributos e "y_raw" é target/classe ==> As duas formas abaixo dão certo.
#array = dataset.values
#X_raw = array[:,0:72]
#y_raw = array[:,72]
X_raw = dados.iloc[:, :-1].values # atributos
y_raw = dados.iloc[:, 78].values # classe de ataques
X_raw = np.nan_to_num(X_raw.astype(np.float32))


In [None]:
#transformar a variável Y com valores categóricos das classses de ataques em valores:
labelencoder_y = LabelEncoder()
y_raw = labelencoder_y.fit_transform(y_raw)

In [None]:
# Instanciando um PCA. O parametro n_components indica a quantidade de dimensoes que a base
# original sera reduzida.
pca = PCA(n_components=10, whiten=True,random_state=42)

In [None]:
# Aplicando o pca na base de dados. O atributo 'values' retorna um numpy.array
# de duas dimensões (matriz) contendo apenas os valores numericos do DataFrame.
X_raw = pca.fit_transform(X_raw)

In [None]:
def committee_sampling_knn(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    from modAL.uncertainty import classifier_uncertainty
    from modAL.models import ActiveLearner, Committee
    from modAL.uncertainty import uncertainty_sampling
    from modAL.disagreement import vote_entropy_sampling
    from functools import partial
    
    
        
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
    
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    #print("tamanho de X_train inicial: ",X_train_inicial.shape," tamanho de y_train inicial: ",y_train_inicial.shape)
    #print(y_train_inicial)  
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
    
    # initializing Committee members
    n_members = 3
    learner_list = list()
    
    #instancia classificadores de aprendizado ativo
    preset_batch = partial(vote_entropy_sampling,n_instances=BATCH_SIZE)
    learner_knn = ActiveLearner(estimator=KNeighborsClassifier(n_neighbors=5),X_training=X_train, y_training=y_train)
    learner_list.append(learner_knn)
    arquivo_performance_knn = open("committee_performance_knn_dobra_"+indica_pool+".txt","a")
    arquivo_history_knn = ("committee_history_knn_dobra_"+indica_pool+".csv")
              
    learner_list.append(learner_knn)
        
    # assembling the committee
    committee = Committee(learner_list=learner_list,query_strategy=preset_batch)
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    uncertain_sample_score_knn = committee.score(X_teste, y_teste)
    performance_history_knn.append(uncertain_sample_score_knn)
        
    for index in range(N_QUERIES):
        #recupera amostras do pool baseado na estratégia de consulta
        query_index, query_instance = committee.query(X_pool)
            
        # Ensina ao modelo ActiveLearner o registro solicitado (amostras vão para o topo).
        committee.teach(X=X_pool[query_index].reshape(BATCH_SIZE, -1),y=y_pool[query_index].reshape(BATCH_SIZE, ))
       
        # apaga os modelos consultados
        X_pool = np.delete(X_pool, query_index, axis=0)
        y_pool = np.delete(y_pool, query_index)
        
        # verifica a performance após a inclusão de dados novos 
        committee_sample_score_knn = committee.score(X_teste, y_teste)
        predictions = committee.predict(X_teste)
        clear_output(wait=True)
        print('Accuracy KNN after query no. %d: %f' % (index+1, committee_sample_score_knn))
        arquivo_performance_knn.write('Accuracy after query no. %d: %f \n' % (index+1,committee_sample_score_knn))
        performance_history_knn.append(committee_sample_score_knn)
        #print ('Accuracy after query no. %d: %f' % (index+1, accuracy_score(y_test, predictions)))
        #arquivo_performance_knn.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_performance_knn.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_performance_knn.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_performance_knn.write('F1 score after query no. %d: %f \n' % (index+1, f1score))   
        #print ("========================================")
        arquivo_performance_knn.write('======================================== \n')
                
    
    arquivo_performance_knn.write("\n Avaliação final KNN \n")
    arquivo_performance_knn.write(classification_report(y_teste, predictions,zero_division=1))
    np.savetxt(arquivo_history_knn, performance_history_knn,delimiter=",")
    arquivo_performance_knn.close()
    
def committee_sampling_rf(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    from modAL.uncertainty import classifier_uncertainty
    from modAL.models import ActiveLearner, Committee
    from modAL.uncertainty import uncertainty_sampling
    from modAL.disagreement import vote_entropy_sampling
    from functools import partial
    
    
        
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
    
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    #print("tamanho de X_train inicial: ",X_train_inicial.shape," tamanho de y_train inicial: ",y_train_inicial.shape)
    #print(y_train_inicial)  
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
    
    # initializing Committee members
    n_members = 3
    learner_list = list()
    
    #instancia classificadores de aprendizado ativo
    preset_batch = partial(vote_entropy_sampling,n_instances=BATCH_SIZE)
    learner_rf = ActiveLearner(estimator=RandomForestClassifier(random_state=42),X_training=X_train, y_training=y_train)
    learner_list.append(learner_rf)
    arquivo_performance_rf = open("committee_performance_rf_dobra_"+indica_pool+".txt","a")
    arquivo_history_rf = ("committee_history_rf_dobra_"+indica_pool+".csv")
              
    learner_list.append(learner_rf)
        
    # assembling the committee
    committee = Committee(learner_list=learner_list,query_strategy=preset_batch)
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    uncertain_sample_score_rf = committee.score(X_teste, y_teste)
    performance_history_rf.append(uncertain_sample_score_rf)
    
    #inicio aprendizado ativo
    
    for index in range(N_QUERIES):
        #recupera amostras do pool baseado na estratégia de consulta
        query_index, query_instance = committee.query(X_pool)
            
        # Ensina ao modelo ActiveLearner o registro solicitado (amostras vão para o topo).
        committee.teach(X=X_pool[query_index].reshape(BATCH_SIZE, -1),y=y_pool[query_index].reshape(BATCH_SIZE, ))
       
        # apaga os modelos consultados
        X_pool = np.delete(X_pool, query_index, axis=0)
        y_pool = np.delete(y_pool, query_index)
        
        # verifica a performance após a inclusão de dados novos 
        committee_sample_score_rf = committee.score(X_teste, y_teste)
        predictions = committee.predict(X_teste)
        clear_output(wait=True)
        print('Accuracy RF after query no. %d: %f' % (index+1, committee_sample_score_rf))
        arquivo_performance_rf.write('Accuracy after query no. %d: %f \n' % (index+1,committee_sample_score_rf))
        performance_history_rf.append(committee_sample_score_rf)
        #print ('Accuracy after query no. %d: %f' % (index+1, accuracy_score(y_test, predictions)))
        #arquivo_performance_rf.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_performance_rf.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_performance_rf.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_performance_rf.write('F1 score after query no. %d: %f \n' % (index+1, f1score))   
        #print ("========================================")
        arquivo_performance_rf.write('======================================== \n')
        
    
    arquivo_performance_rf.write("\n Avaliação final RF \n")
    arquivo_performance_rf.write(classification_report(y_teste, predictions,zero_division=1))
    np.savetxt(arquivo_history_rf, performance_history_rf,delimiter=",")
    arquivo_performance_rf.close()

def committee_sampling_tree(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from modAL.uncertainty import classifier_uncertainty
    from modAL.models import ActiveLearner, Committee
    from modAL.uncertainty import uncertainty_sampling
    from modAL.disagreement import vote_entropy_sampling
    from functools import partial
    from IPython.display import clear_output
    
        
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
    
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    #print("tamanho de X_train inicial: ",X_train_inicial.shape," tamanho de y_train inicial: ",y_train_inicial.shape)
    #print(y_train_inicial)  
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
    
    # initializing Committee members
    n_members = 3
    learner_list = list()
    
    #instancia classificadores de aprendizado ativo
    preset_batch = partial(vote_entropy_sampling,n_instances=BATCH_SIZE)
    learner_tree = ActiveLearner(estimator=DecisionTreeClassifier(),X_training=X_train, y_training=y_train)
    learner_list.append(learner_tree)
    arquivo_performance_tree = open("committee_performance_tree_dobra_"+indica_pool+".txt","a")
    arquivo_history_tree = ("committee_history_tree_dobra_"+indica_pool+".csv")
              
    learner_list.append(learner_tree)
        
    # assembling the committee
    committee = Committee(learner_list=learner_list,query_strategy=preset_batch)
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    uncertain_sample_score_tree = committee.score(X_teste, y_teste)
    performance_history_tree.append(uncertain_sample_score_tree)
    
    #inicio aprendizado ativo
    
    for index in range(N_QUERIES):
        #recupera amostras do pool baseado na estratégia de consulta
        query_index, query_instance = committee.query(X_pool)
            
        # Ensina ao modelo ActiveLearner o registro solicitado (amostras vão para o topo).
        committee.teach(X=X_pool[query_index].reshape(BATCH_SIZE, -1),y=y_pool[query_index].reshape(BATCH_SIZE, ))
       
        # apaga os modelos consultados
        X_pool = np.delete(X_pool, query_index, axis=0)
        y_pool = np.delete(y_pool, query_index)
        
        # verifica a performance após a inclusão de dados novos 
        committee_sample_score_tree = committee.score(X_teste, y_teste)
        predictions = committee.predict(X_teste)
        clear_output(wait=True)
        print('Accuracy TREE after query no. %d: %f' % (index+1, committee_sample_score_tree))
        arquivo_performance_tree.write('Accuracy after query no. %d: %f \n' % (index+1,committee_sample_score_tree))
        performance_history_tree.append(committee_sample_score_tree)
        #print ('Accuracy after query no. %d: %f' % (index+1, accuracy_score(y_test, predictions)))
        #arquivo_performance_tree.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_performance_tree.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_performance_tree.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_performance_tree.write('F1 score after query no. %d: %f \n' % (index+1, f1score))   
        #print ("========================================")
        arquivo_performance_tree.write('======================================== \n')
                
    
    arquivo_performance_tree.write("\n Avaliação final TREE \n")
    arquivo_performance_tree.write(classification_report(y_teste, predictions,zero_division=1))
    np.savetxt(arquivo_history_tree, performance_history_tree,delimiter=",")
    arquivo_performance_tree.close()

def committee_sampling_mlp(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from modAL.uncertainty import classifier_uncertainty
    from modAL.models import ActiveLearner, Committee
    from modAL.uncertainty import uncertainty_sampling
    from modAL.disagreement import vote_entropy_sampling
    from functools import partial
    from IPython.display import clear_output
    
        
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
    
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    #print("tamanho de X_train inicial: ",X_train_inicial.shape," tamanho de y_train inicial: ",y_train_inicial.shape)
    #print(y_train_inicial)  
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
    
    # initializing Committee members
    n_members = 3
    learner_list = list()
    
    #instancia classificadores de aprendizado ativo
    preset_batch = partial(vote_entropy_sampling,n_instances=BATCH_SIZE)
    learner_mlp = ActiveLearner(estimator=MLPClassifier(max_iter = 2000),X_training=X_train, y_training=y_train)
    learner_list.append(learner_mlp)
    arquivo_performance_mlp = open("committee_performance_mlp_dobra_"+indica_pool+".txt","a")
    arquivo_history_mlp = ("committee_history_mlp_dobra_"+indica_pool+".csv")
              
    learner_list.append(learner_mlp)
        
    # assembling the committee
    committee = Committee(learner_list=learner_list,query_strategy=preset_batch)
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    uncertain_sample_score_mlp = committee.score(X_teste, y_teste)
    performance_history_mlp.append(uncertain_sample_score_mlp)
    
    #inicio aprendizado ativo
    
    for index in range(N_QUERIES):
        #recupera amostras do pool baseado na estratégia de consulta
        query_index, query_instance = committee.query(X_pool)
            
        # Ensina ao modelo ActiveLearner o registro solicitado (amostras vão para o topo).
        committee.teach(X=X_pool[query_index].reshape(BATCH_SIZE, -1),y=y_pool[query_index].reshape(BATCH_SIZE, ))
       
        # apaga os modelos consultados
        X_pool = np.delete(X_pool, query_index, axis=0)
        y_pool = np.delete(y_pool, query_index)
        
        # verifica a performance após a inclusão de dados novos 
        committee_sample_score_mlp = committee.score(X_teste, y_teste)
        predictions = committee.predict(X_teste)
        clear_output(wait=True)
        print('Accuracy TREE after query no. %d: %f' % (index+1, committee_sample_score_mlp))
        arquivo_performance_mlp.write('Accuracy after query no. %d: %f \n' % (index+1,committee_sample_score_mlp))
        performance_history_mlp.append(committee_sample_score_mlp)
        #print ('Accuracy after query no. %d: %f' % (index+1, accuracy_score(y_test, predictions)))
        #arquivo_performance_mlp.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_performance_mlp.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_performance_mlp.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_performance_mlp.write('F1 score after query no. %d: %f \n' % (index+1, f1score))   
        #print ("========================================")
        arquivo_performance_mlp.write('======================================== \n')
        
        
    
    arquivo_performance_mlp.write("\n Avaliação final MLP \n")
    arquivo_performance_mlp.write(classification_report(y_teste, predictions,zero_division=1))
    np.savetxt(arquivo_history_mlp, performance_history_mlp,delimiter=",")
    arquivo_performance_mlp.close()

def committee_sampling_xgb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from xgboost import XGBClassifier
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from modAL.uncertainty import classifier_uncertainty
    from modAL.models import ActiveLearner, Committee
    from modAL.uncertainty import uncertainty_sampling
    from modAL.disagreement import vote_entropy_sampling
    from functools import partial
    from IPython.display import clear_output
    
        
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
    
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    #print("tamanho de X_train inicial: ",X_train_inicial.shape," tamanho de y_train inicial: ",y_train_inicial.shape)
    #print(y_train_inicial)  
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
    
    # initializing Committee members
    n_members = 3
    learner_list = list()
    
    #instancia classificadores de aprendizado ativo
    preset_batch = partial(vote_entropy_sampling,n_instances=BATCH_SIZE)
    learner_xgb = ActiveLearner(estimator=GradientBoostingClassifier(n_estimators=7, learning_rate=1.0,max_depth=1, random_state=42),X_training=X_train, y_training=y_train)
    learner_list.append(learner_xgb)
    arquivo_performance_xgb = open("committee_performance_xgb_dobra_"+indica_pool+".txt","a")
    arquivo_history_xgb = ("committee_history_xgb_dobra_"+indica_pool+".csv")
              
    learner_list.append(learner_xgb)
        
    # assembling the committee
    committee = Committee(learner_list=learner_list,query_strategy=preset_batch)
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    uncertain_sample_score_xgb = committee.score(X_teste, y_teste)
    performance_history_xgb.append(uncertain_sample_score_xgb)
    
    #inicio aprendizado ativo
    
    for index in range(N_QUERIES):
        #recupera amostras do pool baseado na estratégia de consulta
        query_index, query_instance = committee.query(X_pool)
            
        # Ensina ao modelo ActiveLearner o registro solicitado (amostras vão para o topo).
        committee.teach(X=X_pool[query_index].reshape(BATCH_SIZE, -1),y=y_pool[query_index].reshape(BATCH_SIZE, ))
       
        # apaga os modelos consultados
        X_pool = np.delete(X_pool, query_index, axis=0)
        y_pool = np.delete(y_pool, query_index)
        
        # verifica a performance após a inclusão de dados novos 
        committee_sample_score_xgb = committee.score(X_teste, y_teste)
        predictions = committee.predict(X_teste)
        clear_output(wait=True)
        print('Accuracy XGB after query no. %d: %f' % (index+1, committee_sample_score_xgb))
        arquivo_performance_xgb.write('Accuracy after query no. %d: %f \n' % (index+1,committee_sample_score_xgb))
        performance_history_xgb.append(committee_sample_score_xgb)
        #print ('Accuracy after query no. %d: %f' % (index+1, accuracy_score(y_test, predictions)))
        #arquivo_performance_xgb.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_performance_xgb.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_performance_xgb.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_performance_xgb.write('F1 score after query no. %d: %f \n' % (index+1, f1score))   
        #print ("========================================")
        arquivo_performance_xgb.write('======================================== \n')
        
    arquivo_performance_xgb.write("\n Avaliação final XGB \n")
    arquivo_performance_xgb.write(classification_report(y_teste, predictions,zero_division=1))
    np.savetxt(arquivo_history_xgb, performance_history_xgb,delimiter=",")
    arquivo_performance_xgb.close()

def committee_sampling_svm(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from xgboost import XGBClassifier
    from sklearn import svm
    from sklearn.svm import LinearSVC
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from modAL.uncertainty import classifier_uncertainty
    from modAL.models import ActiveLearner, Committee
    from modAL.uncertainty import uncertainty_sampling
    from modAL.disagreement import vote_entropy_sampling
    from functools import partial
    from IPython.display import clear_output
    
        
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
    
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    #print("tamanho de X_train inicial: ",X_train_inicial.shape," tamanho de y_train inicial: ",y_train_inicial.shape)
    #print(y_train_inicial)  
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
    
    # initializing Committee members
    n_members = 3
    learner_list = list()
    
    #instancia classificadores de aprendizado ativo
    preset_batch = partial(vote_entropy_sampling,n_instances=BATCH_SIZE)
    learner_svm = ActiveLearner(estimator=svm.SVC(kernel='linear',probability=True),X_training=X_train, y_training=y_train)
    learner_list.append(learner_svm)
    arquivo_performance_svm = open("committee_performance_svm_dobra_"+indica_pool+".txt","a")
    arquivo_history_svm = ("committee_history_svm_dobra_"+indica_pool+".csv")
              
    learner_list.append(learner_svm)
        
    # assembling the committee
    committee = Committee(learner_list=learner_list,query_strategy=preset_batch)
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    uncertain_sample_score_svm = committee.score(X_teste, y_teste)
    performance_history_svm.append(uncertain_sample_score_svm)
    
    #inicio aprendizado ativo
    
    for index in range(N_QUERIES):
        #recupera amostras do pool baseado na estratégia de consulta
        query_index, query_instance = committee.query(X_pool)
            
        # Ensina ao modelo ActiveLearner o registro solicitado (amostras vão para o topo).
        committee.teach(X=X_pool[query_index].reshape(BATCH_SIZE, -1),y=y_pool[query_index].reshape(BATCH_SIZE, ))
       
        # apaga os modelos consultados
        X_pool = np.delete(X_pool, query_index, axis=0)
        y_pool = np.delete(y_pool, query_index)
        
        # verifica a performance após a inclusão de dados novos 
        committee_sample_score_svm = committee.score(X_teste, y_teste)
        predictions = committee.predict(X_teste)
        clear_output(wait=True)
        print('Accuracy SVM after query no. %d: %f' % (index+1, committee_sample_score_svm))
        arquivo_performance_svm.write('Accuracy after query no. %d: %f \n' % (index+1,committee_sample_score_svm))
        performance_history_svm.append(committee_sample_score_svm)
        #print ('Accuracy after query no. %d: %f' % (index+1, accuracy_score(y_test, predictions)))
        #arquivo_performance_svm.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_performance_svm.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_performance_svm.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_performance_svm.write('F1 score after query no. %d: %f \n' % (index+1, f1score))   
        #print ("========================================")
        arquivo_performance_svm.write('======================================== \n')
        
    
    arquivo_performance_svm.write("\n Avaliação final SVM \n")
    arquivo_performance_svm.write(classification_report(y_teste, predictions,zero_division=1))
    np.savetxt(arquivo_history_svm, performance_history_svm,delimiter=",")
    arquivo_performance_svm.close()
    
def committee_sampling_nb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from xgboost import XGBClassifier
    from sklearn import svm
    from sklearn.svm import LinearSVC
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from modAL.uncertainty import classifier_uncertainty
    from modAL.models import ActiveLearner, Committee
    from modAL.uncertainty import uncertainty_sampling
    from modAL.disagreement import vote_entropy_sampling
    from functools import partial
    from IPython.display import clear_output
    
        
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
    
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    #print("tamanho de X_train inicial: ",X_train_inicial.shape," tamanho de y_train inicial: ",y_train_inicial.shape)
    #print(y_train_inicial)  
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
    
    # initializing Committee members
    n_members = 3
    learner_list = list()
    
    #instancia classificadores de aprendizado ativo
    preset_batch = partial(vote_entropy_sampling,n_instances=BATCH_SIZE)
    learner_nb = ActiveLearner(estimator=GaussianNB(),X_training=X_train, y_training=y_train)
    learner_list.append(learner_nb)
    arquivo_performance_nb = open("committee_performance_nb_dobra_"+indica_pool+".txt","a")
    arquivo_history_nb = ("committee_history_nb_dobra_"+indica_pool+".csv")
              
    learner_list.append(learner_nb)
        
    # assembling the committee
    committee = Committee(learner_list=learner_list,query_strategy=preset_batch)
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    uncertain_sample_score_nb = committee.score(X_teste, y_teste)
    performance_history_nb.append(uncertain_sample_score_nb)
    
    #inicio aprendizado ativo
   
    for index in range(N_QUERIES):
        #recupera amostras do pool baseado na estratégia de consulta
        query_index, query_instance = committee.query(X_pool)
            
        # Ensina ao modelo ActiveLearner o registro solicitado (amostras vão para o topo).
        committee.teach(X=X_pool[query_index].reshape(BATCH_SIZE, -1),y=y_pool[query_index].reshape(BATCH_SIZE, ))
       
        # apaga os modelos consultados
        X_pool = np.delete(X_pool, query_index, axis=0)
        y_pool = np.delete(y_pool, query_index)
        
        # verifica a performance após a inclusão de dados novos 
        committee_sample_score_nb = committee.score(X_teste, y_teste)
        predictions = committee.predict(X_teste)
        clear_output(wait=True)
        print('Accuracy NB after query no. %d: %f' % (index+1, committee_sample_score_nb))
        arquivo_performance_nb.write('Accuracy after query no. %d: %f \n' % (index+1,committee_sample_score_nb))
        performance_history_nb.append(committee_sample_score_nb)
        #print ('Accuracy after query no. %d: %f' % (index+1, accuracy_score(y_test, predictions)))
        #arquivo_performance_nb.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_performance_nb.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_performance_nb.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_performance_nb.write('F1 score after query no. %d: %f \n' % (index+1, f1score))   
        #print ("========================================")
        arquivo_performance_nb.write('======================================== \n')
        
    arquivo_performance_nb.write("\n Avaliação final NB \n")
    arquivo_performance_nb.write(classification_report(y_teste, predictions,zero_division=1))
    np.savetxt(arquivo_history_nb, performance_history_nb,delimiter=",")
    arquivo_performance_nb.close()


import time
import sys
import threading
from datetime import datetime
from datetime import date
from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit, train_test_split
from timeit import Timer
import time
import functools
# importing the multiprocessing module
import multiprocessing

#inicia relogio
t1 = time.time()
# Define o tamanho das divisões feitas no dataset (cross-validation)
n_dobras = 10
# Define Tamanho inicial da amostra (toda estratégia parte de um tamanho mínimo aleatório).
t_inicial = 10
#Define contador de amostras utilizadas pela estratégia
sample_size = 0 

#define array de indices das partições
idx_data =[]
# cross validation bags - n_splits
data_cv = StratifiedShuffleSplit(n_splits= n_dobras,random_state=42) 
data_cv.get_n_splits(X_raw, y_raw)
# chame a instância e gere os dados sobre a base original
type(data_cv.split(X_raw, y_raw))
# dividir os dados - A função split.split () retorna índices para amostras de treino e amostras de teste. 
# Ele examinará o número de validação cruzada especificado e retornará cada vez que treinar 
# e testar os índices de amostra usando os conjuntos de dados de treinamento e teste que podem 
# ser criados filtrando o conjunto de dados inteiro. Por exemplo idx_data[0][1], o primeiro indice faz referencia
# a dobra e o segundo indice faz referencia a posição da dobra (0 = treino e 1 = teste). Logo TRAIN=0 e TEST=1
for train_index, test_index in data_cv.split(X_raw,y_raw):
    #print("TRAIN:", train_index, "TEST:", test_index)
    #print("n_split",n_splits,"TRAIN:", train_index, "TEST:", test_index)
    idx_data.append([train_index, test_index])
#verifica tamanho das dobras (numero de instâncias de cada dobra)
print("tamanho de cada dobra: ",idx_data[3][0].shape)

TRAIN =0
TEST =1

# define arrays de performance
performance_history_knn = []
performance_history_rf = []
performance_history_tree = []
performance_history_mlp = []
performance_history_xgb = []
performance_history_svm = []
performance_history_nb = []

# DEFINE NUMERO DE QUERIES
BATCH_SIZE = 400
N_RAW_SAMPLES = 20000
N_QUERIES = N_RAW_SAMPLES // BATCH_SIZE


for idx_dobra in range(n_dobras):
    if __name__ == "__main__":
        # criando os processos
        p1 = multiprocessing.Process(target=committee_sampling_knn(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial))
        p2 = multiprocessing.Process(target=committee_sampling_rf(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial))
        p3 = multiprocessing.Process(target=committee_sampling_nb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial))
        p4 = multiprocessing.Process(target=committee_sampling_tree(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial))
        #p5 = multiprocessing.Process(target=committee_sampling_mlp(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial))
        #p6 = multiprocessing.Process(target=committee_sampling_xgb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial))
        #p7 = multiprocessing.Process(target=committee_sampling_svm(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial))
        
        # iniciando os processos
        p1.start()
        p2.start()
        p3.start()
        p4.start()
        #p5.start()
        #p6.start()
        #p7.start()
        
        # aguardando os processos serem finalizados
        p1.join()
        p2.join()
        p3.join()
        p4.join()
        #p5.join()
        #p6.join()
        #p7.join
        
        # todos os processos finalizados
        print("Terminado!")

    
t2 = time.time()
time_elapsed = (t2-t1)
hours, rem = divmod(time_elapsed, 3600)
minutes, seconds = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

In [None]:
# Plot our performance over time.
fig, ax = plt.subplots(figsize=(8.5, 6), dpi=130)

ax.plot(performance_history_knn,color="blue")
ax.plot(performance_history_rf,color="red")
ax.plot(performance_history_tree,color="green")
#ax.plot(performance_history_mlp,color="yellow")
#ax.plot(performance_history_xgb,color="orange")
#ax.plot(performance_history_svm,color="brown")
ax.plot(performance_history_nb,color="pink")
ax.scatter(range(len(performance_history_knn)), performance_history_knn,s=0)
ax.scatter(range(len(performance_history_rf)), performance_history_rf, s=0)
ax.scatter(range(len(performance_history_tree)), performance_history_tree,s=0)
#ax.scatter(range(len(performance_history_mlp)), performance_history_mlp,s=0)
#ax.scatter(range(len(performance_history_xgb)), performance_history_xgb,s=0)
#ax.scatter(range(len(performance_history_svm)), performance_history_svm,s=0)
ax.scatter(range(len(performance_history_nb)), performance_history_nb,s=0)

ax.xaxis.set_major_locator(mpl.ticker.MaxNLocator(nbins=5, integer=True))
ax.yaxis.set_major_locator(mpl.ticker.MaxNLocator(nbins=10))
ax.yaxis.set_major_formatter(mpl.ticker.PercentFormatter(xmax=1))

ax.set_ylim(bottom=0, top=1)
ax.grid(True)

ax.set_title('Incremental classification accuracy')
ax.set_xlabel('Query iteration')
ax.set_ylabel('Classification Accuracy')

plt.show()