In [None]:
%matplotlib inline
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import minmax_scale
from sklearn.decomposition import PCA


In [None]:
#carrega o dataset
dados = pd.read_csv('/home/erasmor/Downloads/2017/todos_apenas_baixa_representatividade.csv',sep=",",encoding = 'utf-8',  header=0,na_values='.',dtype={'Label':'category'})
#remove valores infinitos
dados.replace(-np.Inf, np.nan)
#substitui valores NaN
dados.fillna(dados.mean())


In [None]:
dados.dtypes

In [None]:
dados.memory_usage(deep=True)

In [None]:
# verifica quantas instâncias (linhas) e quantos atributos (colunas) a base de dados contém
print("numero de linhas e colunas: ",dados.shape)

In [None]:
#visualizar distribições por classes contidas no csv - informar nome da classe alvo
print(dados.groupby('Label').size())

In [None]:
cols = list(dados.columns)
# colunas que nao serao normalizadas
cols.remove('Label')
# Copiando os dados e aplicando a normalizacao por reescala nas colunas do DataFrame que contem
# valores continuos. Por padrao, o metodo minmax_scale reescala com min=0 e max=1.
dados = dados[~dados.isin([np.nan, np.inf, -np.inf]).any(1)]
dados[cols] = dados[cols].apply(minmax_scale)

In [None]:
#define as colunas de atributos e a coluna da classe (de 0 a 78 são atributos e após a 78 é a classe)
X_raw = dados.iloc[:, :-1].values # atributos
y_raw = dados.iloc[:, 78].values # classe de ataques
X_raw = np.nan_to_num(X_raw.astype(np.float32))

In [None]:
#transformar a variável y com valores categóricos (classses de ataques) em valores numéricos:
labelencoder_y = LabelEncoder()
y_raw = labelencoder_y.fit_transform(y_raw)

In [None]:
# Instanciando um PCA. O parametro n_components indica a quantidade de dimensoes que a base
# original sera reduzida.
pca = PCA(n_components=10, whiten=True,random_state=42)

In [None]:
# Aplicando o pca na base de dados. O atributo 'values' retorna um numpy.array
# de duas dimensões (matriz) contendo apenas os valores numericos do DataFrame.
X_raw = pca.fit_transform(X_raw)

In [None]:
def random_sampling_knn(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_knn = KNeighborsClassifier(n_neighbors=5)
    arquivo_accuracy_knn = open("random_accuracy_performance_knn_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_knn = ("random_accuracy_history_knn_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_knn = ("random_F1_Score_history_knn_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_knn = ("random_tempo_history_knn_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_knn=[]
    f1_score_history_knn=[]
    tempo_history_knn=[]
    learner_knn.fit(X_train, y_train)
    predictions = learner_knn.predict(X_teste)
    random_sample_score_knn = accuracy_score(y_teste, predictions)
    performance_history_knn.append(random_sample_score_knn)
    f1score_knn = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_knn.append(f1score_knn)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_knn = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_knn.fit(X_train, y_train)
        predictions = learner_knn.predict(X_teste)
        performance_history_knn.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_knn = time.time()
        time_elapsed_knn = (t2_knn-t1_knn)
        hours_knn, rem_knn = divmod(time_elapsed_knn, 3600)
        minutes_knn, seconds_knn = divmod(rem_knn, 60)
        tempo_history_knn.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_knn),int(minutes_knn),int(seconds_knn)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_knn after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_knn.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_knn.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_knn.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_knn.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_knn= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_knn.write('F1 score after query no. %d: %f \n' % (index+1, f1score_knn))
        f1_score_history_knn.append(f1score_knn)
        #print ("========================================")
        arquivo_accuracy_knn.write('======================================== \n')
        
        
   
    arquivo_accuracy_knn.write("\n Avaliação por classe \n")
    arquivo_accuracy_knn.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_knn, performance_history_knn,delimiter=",")
    np.savetxt(arquivo_f1_score_history_knn, f1_score_history_knn)
    np.savetxt(arquivo_tempo_history_knn,tempo_history_knn,fmt="%s")
    arquivo_accuracy_knn.close()
    
def random_sampling_rf(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_rf = RandomForestClassifier(random_state=42)
    arquivo_accuracy_rf = open("random_accuracy_performance_rf_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_rf = ("random_accuracy_history_rf_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_rf = ("random_F1_Score_history_rf_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_rf = ("random_tempo_history_rf_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_rf=[]
    f1_score_history_rf=[]
    tempo_history_rf=[]
    learner_rf.fit(X_train, y_train)
    predictions = learner_rf.predict(X_teste)
    random_sample_score_rf = accuracy_score(y_teste, predictions)
    performance_history_rf.append(random_sample_score_rf)
    f1score_rf = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_rf.append(f1score_rf)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_rf = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_rf.fit(X_train, y_train)
        predictions = learner_rf.predict(X_teste)
        performance_history_rf.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_rf = time.time()
        time_elapsed_rf = (t2_rf-t1_rf)
        hours_rf, rem_rf = divmod(time_elapsed_rf, 3600)
        minutes_rf, seconds_rf = divmod(rem_rf, 60)
        tempo_history_rf.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_rf),int(minutes_rf),int(seconds_rf)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_rf after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_rf.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_rf.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_rf.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_rf.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_rf= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_rf.write('F1 score after query no. %d: %f \n' % (index+1, f1score_rf))
        f1_score_history_rf.append(f1score_rf)
        #print ("========================================")
        arquivo_accuracy_rf.write('======================================== \n')
        
        
   
    arquivo_accuracy_rf.write("\n Avaliação por classe \n")
    arquivo_accuracy_rf.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_rf, performance_history_rf,delimiter=",")
    np.savetxt(arquivo_f1_score_history_rf, f1_score_history_rf)
    np.savetxt(arquivo_tempo_history_rf,tempo_history_rf,fmt="%s")
    arquivo_accuracy_rf.close()
    
    
    
        

def random_sampling_tree(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_tree = DecisionTreeClassifier(max_depth=5,min_samples_split=2,min_samples_leaf=2)
    arquivo_accuracy_tree = open("random_accuracy_performance_tree_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_tree = ("random_accuracy_history_tree_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_tree = ("random_F1_Score_history_tree_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_tree = ("random_tempo_history_tree_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_tree=[]
    f1_score_history_tree=[]
    tempo_history_tree=[]
    learner_tree.fit(X_train, y_train)
    predictions = learner_tree.predict(X_teste)
    random_sample_score_tree = accuracy_score(y_teste, predictions)
    performance_history_tree.append(random_sample_score_tree)
    f1score_tree = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_tree.append(f1score_tree)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_tree = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_tree.fit(X_train, y_train)
        predictions = learner_tree.predict(X_teste)
        performance_history_tree.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_tree = time.time()
        time_elapsed_tree = (t2_tree-t1_tree)
        hours_tree, rem_tree = divmod(time_elapsed_tree, 3600)
        minutes_tree, seconds_tree = divmod(rem_tree, 60)
        tempo_history_tree.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_tree),int(minutes_tree),int(seconds_tree)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_tree after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_tree.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_tree.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_tree.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_tree.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_tree= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_tree.write('F1 score after query no. %d: %f \n' % (index+1, f1score_tree))
        f1_score_history_tree.append(f1score_tree)
        #print ("========================================")
        arquivo_accuracy_tree.write('======================================== \n')
        
        
   
    arquivo_accuracy_tree.write("\n Avaliação por classe \n")
    arquivo_accuracy_tree.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_tree, performance_history_tree,delimiter=",")
    np.savetxt(arquivo_f1_score_history_tree, f1_score_history_tree)
    np.savetxt(arquivo_tempo_history_tree,tempo_history_tree,fmt="%s")
    arquivo_accuracy_tree.close()
    

def random_sampling_mlp(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neural_network import MLPClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_mlp = MLPClassifier(max_iter = 2000)
    arquivo_accuracy_mlp = open("random_accuracy_performance_mlp_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_mlp = ("random_accuracy_history_mlp_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_mlp = ("random_F1_Score_history_mlp_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_mlp = ("random_tempo_history_mlp_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_mlp=[]
    f1_score_history_mlp=[]
    tempo_history_mlp=[]
    learner_mlp.fit(X_train, y_train)
    predictions = learner_mlp.predict(X_teste)
    random_sample_score_mlp = accuracy_score(y_teste, predictions)
    performance_history_mlp.append(random_sample_score_mlp)
    f1score_mlp = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_mlp.append(f1score_mlp)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_mlp = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_mlp.fit(X_train, y_train)
        predictions = learner_mlp.predict(X_teste)
        performance_history_mlp.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_mlp = time.time()
        time_elapsed_mlp = (t2_mlp-t1_mlp)
        hours_mlp, rem_mlp = divmod(time_elapsed_mlp, 3600)
        minutes_mlp, seconds_mlp = divmod(rem_mlp, 60)
        tempo_history_mlp.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_mlp),int(minutes_mlp),int(seconds_mlp)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_mlp after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_mlp.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_mlp.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_mlp.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_mlp.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_mlp= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_mlp.write('F1 score after query no. %d: %f \n' % (index+1, f1score_mlp))
        f1_score_history_mlp.append(f1score_mlp)
        #print ("========================================")
        arquivo_accuracy_mlp.write('======================================== \n')
        
        
   
    arquivo_accuracy_mlp.write("\n Avaliação por classe \n")
    arquivo_accuracy_mlp.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_mlp, performance_history_mlp,delimiter=",")
    np.savetxt(arquivo_f1_score_history_mlp, f1_score_history_mlp)
    np.savetxt(arquivo_tempo_history_mlp,tempo_history_mlp,fmt="%s")
    arquivo_accuracy_mlp.close()
    
    
    
def random_sampling_xgb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    from xgboost import XGBClassifier
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_xgb = GradientBoostingClassifier(n_estimators=7, learning_rate=1.0,max_depth=1, random_state=42)
    arquivo_accuracy_xgb = open("random_accuracy_performance_xgb_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_xgb = ("random_accuracy_history_xgb_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_xgb = ("random_F1_Score_history_xgb_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_xgb = ("random_tempo_history_xgb_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_xgb=[]
    f1_score_history_xgb=[]
    tempo_history_xgb=[]
    learner_xgb.fit(X_train, y_train)
    predictions = learner_xgb.predict(X_teste)
    random_sample_score_xgb = accuracy_score(y_teste, predictions)
    performance_history_xgb.append(random_sample_score_xgb)
    f1score_xgb = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_xgb.append(f1score_xgb)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_xgb = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_xgb.fit(X_train, y_train)
        predictions = learner_xgb.predict(X_teste)
        performance_history_xgb.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_xgb = time.time()
        time_elapsed_xgb = (t2_xgb-t1_xgb)
        hours_xgb, rem_xgb = divmod(time_elapsed_xgb, 3600)
        minutes_xgb, seconds_xgb = divmod(rem_xgb, 60)
        tempo_history_xgb.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_xgb),int(minutes_xgb),int(seconds_xgb)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_xgb after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_xgb.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_xgb.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_xgb.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_xgb.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_xgb= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_xgb.write('F1 score after query no. %d: %f \n' % (index+1, f1score_xgb))
        f1_score_history_xgb.append(f1score_xgb)
        #print ("========================================")
        arquivo_accuracy_xgb.write('======================================== \n')
        
        
   
    arquivo_accuracy_xgb.write("\n Avaliação por classe \n")
    arquivo_accuracy_xgb.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_xgb, performance_history_xgb,delimiter=",")
    np.savetxt(arquivo_f1_score_history_xgb, f1_score_history_xgb)
    np.savetxt(arquivo_tempo_history_xgb,tempo_history_xgb,fmt="%s")
    arquivo_accuracy_xgb.close()
    

def random_sampling_svm(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from xgboost import XGBClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_svm = svm.SVC(kernel='linear',probability=True)
    arquivo_accuracy_svm = open("random_accuracy_performance_svm_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_svm = ("random_accuracy_history_svm_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_svm = ("random_F1_Score_history_svm_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_svm = ("random_tempo_history_svm_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_svm=[]
    f1_score_history_svm=[]
    tempo_history_svm=[]
    learner_svm.fit(X_train, y_train)
    predictions = learner_svm.predict(X_teste)
    random_sample_score_svm = accuracy_score(y_teste, predictions)
    performance_history_svm.append(random_sample_score_svm)
    f1score_svm = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_svm.append(f1score_svm)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_svm = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_svm.fit(X_train, y_train)
        predictions = learner_svm.predict(X_teste)
        performance_history_svm.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_svm = time.time()
        time_elapsed_svm = (t2_svm-t1_svm)
        hours_svm, rem_svm = divmod(time_elapsed_svm, 3600)
        minutes_svm, seconds_svm = divmod(rem_svm, 60)
        tempo_history_svm.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_svm),int(minutes_svm),int(seconds_svm)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_svm after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_svm.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_svm.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_svm.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_svm.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_svm= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_svm.write('F1 score after query no. %d: %f \n' % (index+1, f1score_svm))
        f1_score_history_svm.append(f1score_svm)
        #print ("========================================")
        arquivo_accuracy_svm.write('======================================== \n')
        
        
   
    arquivo_accuracy_svm.write("\n Avaliação por classe \n")
    arquivo_accuracy_svm.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_svm, performance_history_svm,delimiter=",")
    np.savetxt(arquivo_f1_score_history_svm, f1_score_history_svm)
    np.savetxt(arquivo_tempo_history_svm,tempo_history_svm,fmt="%s")
    arquivo_accuracy_svm.close()
    

def random_sampling_nb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from xgboost import XGBClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_nb = GaussianNB()
    arquivo_accuracy_nb = open("random_accuracy_performance_nb_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_nb = ("random_accuracy_history_nb_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_nb = ("random_F1_Score_history_nb_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_nb = ("random_tempo_history_nb_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_nb=[]
    f1_score_history_nb=[]
    tempo_history_nb=[]
    learner_nb.fit(X_train, y_train)
    predictions = learner_nb.predict(X_teste)
    random_sample_score_nb = accuracy_score(y_teste, predictions)
    performance_history_nb.append(random_sample_score_nb)
    f1score_nb = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_nb.append(f1score_nb)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_nb = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_nb.fit(X_train, y_train)
        predictions = learner_nb.predict(X_teste)
        performance_history_nb.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_nb = time.time()
        time_elapsed_nb = (t2_nb-t1_nb)
        hours_nb, rem_nb = divmod(time_elapsed_nb, 3600)
        minutes_nb, seconds_nb = divmod(rem_nb, 60)
        tempo_history_nb.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_nb),int(minutes_nb),int(seconds_nb)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_nb after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_nb.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_nb.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_nb.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_nb.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_nb= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_nb.write('F1 score after query no. %d: %f \n' % (index+1, f1score_nb))
        f1_score_history_nb.append(f1score_nb)
        #print ("========================================")
        arquivo_accuracy_nb.write('======================================== \n')
        
        
   
    arquivo_accuracy_nb.write("\n Avaliação por classe \n")
    arquivo_accuracy_nb.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_nb, performance_history_nb,delimiter=",")
    np.savetxt(arquivo_f1_score_history_nb, f1_score_history_nb)
    np.savetxt(arquivo_tempo_history_nb,tempo_history_nb,fmt="%s")
    arquivo_accuracy_nb.close()
    
def random_sampling_adb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.ensemble import AdaBoostClassifier
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from xgboost import XGBClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
    import functools
    from IPython.display import clear_output
    
    #define nome de arquivos para salvar
    indica_pool=str(idx_dobra)
           
    # recupera as amostras de treino iniciais - a extratificação realizada só serve para tal finalidade.
    # No caso força a buscar pelo menos uma amostras de cada rótulo disponível (train_size= len(np.unique(y_raw)).
    # Realizar a busca aleatoriamente não garante iniciar com uma instância de cada classe.
    X_train_inicial, X_test_inicial, y_train_inicial, y_test_inicial = train_test_split(X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]], train_size= len(np.unique(y_raw[idx_data[idx_dobra][TRAIN]])) + t_inicial, stratify = y_raw[idx_data[idx_dobra][TRAIN]])
    # recupera amostras de teste de acordo com a dobra em uso
    X_teste, y_teste = X_raw[idx_data[idx_dobra][TEST]], y_raw[idx_data[idx_dobra][TEST]]
    # recupera amostras de treino (será o pool) de acordo com a dobra em uso
    X_pool, y_pool = X_raw[idx_data[idx_dobra][TRAIN]], y_raw[idx_data[idx_dobra][TRAIN]]
    
    #isola exemplos rotulados para o treinamento inicial
    X_train = X_train_inicial
    y_train = y_train_inicial
   
    #instanciando classificadores de aprendizado ativo
    learner_adb = AdaBoostClassifier(n_estimators=100, random_state=0)
    arquivo_accuracy_adb = open("random_accuracy_performance_adb_dobra_"+indica_pool+".txt","a")
    arquivo_accuracy_history_adb = ("random_accuracy_history_adb_dobra_"+indica_pool+".csv")
    arquivo_f1_score_history_adb = ("random_F1_Score_history_adb_dobra_"+indica_pool+".csv")
    arquivo_tempo_history_adb = ("random_tempo_history_adb_dobra_"+indica_pool+".csv")
    
    #Registro da pontuação na porção de teste com o treinamento inicial
    performance_history_adb=[]
    f1_score_history_adb=[]
    tempo_history_adb=[]
    learner_adb.fit(X_train, y_train)
    predictions = learner_adb.predict(X_teste)
    random_sample_score_adb = accuracy_score(y_teste, predictions)
    performance_history_adb.append(random_sample_score_adb)
    f1score_adb = 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
    f1_score_history_adb.append(f1score_adb)
    
    #Aprendizado Ativo
  
    for index in range(N_QUERIES):
        #inicia calculo do tempo de processamento da estratégia (consulta e inserção)
        t1_adb = time.time()
        n_labeled_examples_pool = X_pool.shape[0]
        training_indices_pool = np.random.randint(low=0, high=n_labeled_examples_pool, size=BATCH_SIZE)
        X_temp= X_pool[training_indices_pool]
        y_temp= y_pool[training_indices_pool]
        X_train=np.append(X_train,X_temp, axis=0)
        y_train=np.append(y_train,y_temp, axis=0)
        learner_adb.fit(X_train, y_train)
        predictions = learner_adb.predict(X_teste)
        performance_history_adb.append(accuracy_score(y_teste, predictions))
        
        #apaga registros consultados
        X_pool = np.delete(X_pool, training_indices_pool, axis=0)
        y_pool = np.delete(y_pool, training_indices_pool, axis=0)
        
        #termina calculo de processamentoda estratégia
        t2_adb = time.time()
        time_elapsed_adb = (t2_adb-t1_adb)
        hours_adb, rem_adb = divmod(time_elapsed_adb, 3600)
        minutes_adb, seconds_adb = divmod(rem_adb, 60)
        tempo_history_adb.append("{:0>2}:{:0>2}:{:0>2}".format(int(hours_adb),int(minutes_adb),int(seconds_adb)))
        #grava dados
        clear_output(wait=True)
        print ('Accuracy_adb after query no. %d: %f' % (index+1, accuracy_score(y_teste, predictions)))
        arquivo_accuracy_adb.write('Accuracy after query no. %d: %f \n' % (index+1,accuracy_score(y_teste, predictions)))
        #print ('Precision after query no. %d: %f' % (index+1, precision_score(y_test, predictions,average='macro',zero_division=1)))
        arquivo_accuracy_adb.write('Precision after query no. %d: %f \n' % (index+1,precision_score(y_teste, predictions,average='macro',zero_division=1)))
        #print ('Recall after query no. %d: %f' % (index+1, recall_score(y_test, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_adb.write('Recall after query no. %d: %f \n' % (index+1, recall_score(y_teste, predictions, average='macro',zero_division=1)))
        #print ('F1 score after query no. %d: %f' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        #arquivo_accuracy_adb.write('F1 Score after query no. %d: %f \n' % (index+1, f1_score(y_test, predictions,average='macro',zero_division=1)))
        f1score_adb= 2*((precision_score(y_teste, predictions,average='macro',zero_division=1)*recall_score(y_teste, predictions, average='macro',zero_division=1))/(precision_score(y_teste, predictions,average='macro',zero_division=1)+recall_score(y_teste, predictions, average='macro',zero_division=1)))
        arquivo_accuracy_adb.write('F1 score after query no. %d: %f \n' % (index+1, f1score_adb))
        f1_score_history_adb.append(f1score_adb)
        #print ("========================================")
        arquivo_accuracy_adb.write('======================================== \n')
        
        
   
    arquivo_accuracy_adb.write("\n Avaliação por classe \n")
    arquivo_accuracy_adb.write(classification_report(y_teste, predictions,zero_division=1))  
    np.savetxt(arquivo_accuracy_history_adb, performance_history_adb,delimiter=",")
    np.savetxt(arquivo_f1_score_history_adb, f1_score_history_adb)
    np.savetxt(arquivo_tempo_history_adb,tempo_history_adb,fmt="%s")
    arquivo_accuracy_adb.close()
    
    
    
import time
import sys
import threading
from datetime import datetime
from datetime import date

from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit, train_test_split
from modAL.uncertainty import classifier_uncertainty
from modAL.models import ActiveLearner
from modAL.uncertainty import uncertainty_sampling
from timeit import Timer
import time
import functools
# importing the multiprocessing module
import multiprocessing

#inicia relogio
t1 = time.time()
# Define o tamanho das divisões feitas no dataset (cross-validation)
n_dobras = 10
# Define Tamanho inicial da amostra (toda estratégia parte de um tamanho mínimo).
t_inicial = 10

#define array de indices das partições
idx_data =[]
# cross validation bags - n_splits
data_cv = StratifiedShuffleSplit(n_splits= n_dobras,train_size=99996,test_size=99996,random_state=42) 
data_cv.get_n_splits(X_raw, y_raw)
# chame a instância e gere os dados sobre a base original
type(data_cv.split(X_raw, y_raw))
# dividir os dados - A função split.split () retorna índices para amostras de treino e amostras de teste. 
# Ele examinará o número de validação cruzada especificado e retornará cada vez que treinar 
# e testar os índices de amostra usando os conjuntos de dados de treinamento e teste que podem 
# ser criados filtrando o conjunto de dados inteiro. Por exemplo idx_data[0][1], o primeiro indice faz referencia
# a dobra e o segundo indice faz referencia a posição da dobra (0 = treino e 1 = teste). Logo TRAIN=0 e TEST=1.
for train_index, test_index in data_cv.split(X_raw,y_raw):
    #print("TRAIN:", train_index, "TEST:", test_index)
    #print("n_split",n_splits,"TRAIN:", train_index, "TEST:", test_index)
    idx_data.append([train_index, test_index])
#verifica tamanho das dobras (numero de instâncias de cada dobra)
#print("tamanho de cada dobra: ",idx_data[3][0].shape)

TRAIN =0
TEST =1

# DEFINE NUMERO DE QUERIES
BATCH_SIZE = 400
N_RAW_SAMPLES = 20000
N_QUERIES = N_RAW_SAMPLES // BATCH_SIZE

#chama procedimento de aprendizado para todas as dobras

for idx_dobra in range(n_dobras):    
    #threading.Thread(target=random_sampling_knn(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    #threading.Thread(target=random_sampling_rf(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    #threading.Thread(target=random_sampling_nb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    #threading.Thread(target=random_sampling_mlp(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    #threading.Thread(target=random_sampling_tree(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    #threading.Thread(target=random_sampling_xgb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    #threading.Thread(target=random_sampling_svm(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    #threading.Thread(target=random_sampling_adb(X_raw, y_raw, idx_data, idx_dobra, TRAIN, TEST, t_inicial)).start()
    
t2 = time.time()
time_elapsed = (t2-t1)
hours, rem = divmod(time_elapsed, 3600)
minutes, seconds = divmod(rem, 60)
print("Tempo Total: {:0>2}:{:0>2}:{:0>2}".format(int(hours),int(minutes),int(seconds)))
