In [21]:
import matplotlib.pyplot as plt
import numpy as np
import csv 
import pandas as pd
import xgboost as xgb
from sklearn.neural_network import MLPClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier


percentualTreinamento = 0.7


dataset = {}
# Define the numeric labels to filter
labels={'M','B'}
labels_list = list(labels)


# Define column names based on dataset documentation
columns = [
    "ID", "Diagnosis",
    "Radius_mean", "Texture_mean", "Perimeter_mean", "Area_mean", "Smoothness_mean",
    "Compactness_mean", "Concavity_mean", "Concave_points_mean", "Symmetry_mean", "Fractal_dimension_mean",
    "Radius_se", "Texture_se", "Perimeter_se", "Area_se", "Smoothness_se",
    "Compactness_se", "Concavity_se", "Concave_points_se", "Symmetry_se", "Fractal_dimension_se",
    "Radius_worst", "Texture_worst", "Perimeter_worst", "Area_worst", "Smoothness_worst",
    "Compactness_worst", "Concavity_worst", "Concave_points_worst", "Symmetry_worst", "Fractal_dimension_worst"
]

# Load the dataset
dadosBrutos = pd.read_csv("../../dataset/wdbc.data", names=columns, header=None)

# Drop the ID column (not useful for analysis)
dadosBrutos.drop(columns=["ID"], inplace=True)


# Filter rows where a column equals one of the labels
dadosBrutosLabel0 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[0]] 
dadosBrutosLabel1 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[1]] 

# metadata 
#print(dadosBrutos.head())



In [None]:
# Total number of samples

nAmostras_treinamento0 = len(dadosBrutosLabel0)
tamanhoTreinamento0 = int(nAmostras_treinamento0 * percentualTreinamento)

# Randomly select indices for group 1 - treinamento
indices_label0_treinamento = np.random.choice(dadosBrutosLabel0.index, size=tamanhoTreinamento0, replace=False)
# Select remaining indices for group 1 - teste
indices_label0_teste = dadosBrutosLabel0.index.difference(indices_label0_treinamento)

# Determine the size of group 2
nAmostras_treinamento1 = len(dadosBrutosLabel1)
tamanhoTreinamento1 = int(nAmostras_treinamento1 * percentualTreinamento)

# Randomly select indices for group 2 - treinamento
indices_label1_treinamento = np.random.choice(dadosBrutosLabel1.index, size=tamanhoTreinamento1, replace=False)
indices_label1_teste = dadosBrutosLabel0.index.difference(indices_label1_treinamento)

trainData = dadosBrutosLabel0.loc[indices_label0_treinamento]
buffer_trainData = dadosBrutosLabel1.loc[indices_label1_treinamento]
testData = dadosBrutosLabel0.loc[indices_label0_teste]
buffer_testData = dadosBrutosLabel1.loc[indices_label1_treinamento]

filtered_rows_Train = pd.concat([trainData,buffer_trainData], axis=0)
filtered_rows_Test = pd.concat([testData,buffer_testData], axis=0)

trainData = filtered_rows_Train.iloc[:, 1:31].to_numpy()
trainLabels= filtered_rows_Train.iloc[:, 0]
testData = filtered_rows_Test.iloc[:, 1:31].to_numpy()
testLabels= filtered_rows_Test.iloc[:, 0]

# aleatoriza ordem de treinamento
#indicesShuffledTranData = np.random.choice(trainData.index, size=1, replace=False)
#trainData = trainData.loc[indicesShuffledTranData]
#trainLabels = trainLabels.loc[indicesShuffledTranData]


trainLabels = trainLabels.replace({labels_list[0]: 0, labels_list[1]: 1}).to_numpy()
testLabels = testLabels.replace({labels_list[0]: 0, labels_list[1]: 1}) .to_numpy()

print(trainLabels)


## Experimento 1: Ruído distribuído de forma igual aos dois rótulos

In [10]:
import statistics
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


percentualErrosMaximo=0.05
stepErros=0.006

vector = np.arange(0, percentualErrosMaximo, stepErros)
print("Blocos de erro entrada: ", vector)

SEED = 42
np.random.seed(SEED)

classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators = 15,random_state = SEED), # n_estimators =100
    "SVM (RBF)": SVC(C=100,kernel='rbf', gamma=0.001,class_weight='balanced',random_state = SEED),
    "XGBoost": XGBClassifier(n_estimators = 20, objective = 'binary:logistic', tree_method = 'hist', eta = 0.1, # n_estimators = 100
                             max_depth = 3, enable_categorical = True,random_state = SEED),
    "MLP": MLPClassifier(hidden_layer_sizes=(50), activation='tanh', solver='sgd',
                    max_iter=500, alpha=0.001,random_state = SEED)
}
# MLPClassifier(hidden_layer_sizes=(50), activation='tanh', solver='lbfgs',
                    #max_iter=3000, alpha=0.001)

nRepeticoes=10
resultadosConsolidados_treino = []
resultadosConsolidados_teste = []
blocos = []
all_results=[]
resultadosAcc=[]


for nivelRuidoAtual in tqdm(vector):
    for indiceRepeticao in range(nRepeticoes):
        # Total number of samples

        dadosBrutosLabel0 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[0]] 
        dadosBrutosLabel1 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[1]] 

        nAmostras_treinamento0 = len(dadosBrutosLabel0)
        tamanhoTreinamento0 = int(nAmostras_treinamento0 * percentualTreinamento)
        tamanhoErroTreinamento0 = int(nAmostras_treinamento0 * (nivelRuidoAtual/2) )

        # Randomly select indices for group 1 - treinamento
        indices_label0_treinamento = np.random.choice(dadosBrutosLabel0.index, size=tamanhoTreinamento0, replace=False)
        indices_label0_erro =np.random.choice(indices_label0_treinamento, size=tamanhoErroTreinamento0, replace=False)

        # Select remaining indices for group 1 - teste
        indices_label0_teste = dadosBrutosLabel0.index.difference(indices_label0_treinamento)

        # Determine the size of group 2
        nAmostras_treinamento1 = len(dadosBrutosLabel1)
        tamanhoTreinamento1 = int(nAmostras_treinamento1 * percentualTreinamento)
        tamanhoErroTreinamento1 = int(nAmostras_treinamento0 * (nivelRuidoAtual/2) )

        # Randomly select indices for group 2 - treinamento
        indices_label1_treinamento = np.random.choice(dadosBrutosLabel1.index, size=tamanhoTreinamento1, replace=False)
        indices_label1_teste = dadosBrutosLabel0.index.difference(indices_label1_treinamento)
        indices_label1_erro =np.random.choice(indices_label1_treinamento, size=tamanhoErroTreinamento1, replace=False)

        # introduz erro
        dadosLabel0=dadosBrutosLabel0
        dadosLabel1=dadosBrutosLabel1
        dadosLabel0.loc[indices_label0_erro, "Diagnosis"] = dadosLabel0.loc[indices_label0_erro, "Diagnosis"].map({"M": "B", "B": "M"})
        dadosLabel1.loc[indices_label1_erro, "Diagnosis"] = dadosLabel1.loc[indices_label1_erro, "Diagnosis"].map({"M": "B", "B": "M"})

        trainData = dadosLabel0.loc[indices_label0_treinamento]
        buffer_trainData = dadosLabel1.loc[indices_label1_treinamento]
        testData = dadosLabel0.loc[indices_label0_teste]
        buffer_testData = dadosLabel1.loc[indices_label1_treinamento]

        filtered_rows_Train = pd.concat([trainData,buffer_trainData], axis=0)
        filtered_rows_Test = pd.concat([testData,buffer_testData], axis=0)

        #aleatoriza ordem de treinamento
        indicesShuffledTranData = np.random.choice(filtered_rows_Train.index, size=1, replace=False)
        trainData = filtered_rows_Train.loc[indicesShuffledTranData]
        trainLabels = filtered_rows_Train.loc[indicesShuffledTranData]


        trainData = filtered_rows_Train.iloc[:, 1:31]
        trainLabels= filtered_rows_Train.iloc[:, 0]
        testData = filtered_rows_Test.iloc[:, 1:31]
        testLabels= filtered_rows_Test.iloc[:, 0]


        trainLabels = filtered_rows_Train.loc[:, "Diagnosis"].map({"M": 0, "B": 1}).to_numpy()
        testLabels = filtered_rows_Test.loc[:, "Diagnosis"].map({"M": 0, "B": 1}).to_numpy()
          
        # Calcula o nivel de ruido real baseado no número de indices flipados divididio pelo numero total de indices 
        nivelRuidoAtual_real=(len(indices_label0_erro)+len(indices_label1_erro))/len(trainLabels)
        
        ### Treinamentos
        for name, model in classifiers.items():
            model.fit(trainData, trainLabels)
            resultadoTreinamento=model.predict(trainData)
            resultadoTeste=model.predict(testData)
            acc_teste = ( sum((testLabels==resultadoTeste))/len(testLabels) )
            resultadosAcc.append([nivelRuidoAtual_real, name, indiceRepeticao,acc_teste])
            for true_label, pred_label in zip(resultadoTeste, testLabels):
                all_results.append([nivelRuidoAtual_real, name, indiceRepeticao, true_label, pred_label, acc_teste])

        
      
    # print("observados_treinamento=",observados_treinamento)
    # print("soma=",sum(trainLabels==observados_treinamento))æ
    # print("len=",len(trainLabels))

    # print(sum(trainLabels==observados_treinamento)/len(trainLabels))

print(blocos)

df_results = pd.DataFrame(all_results, columns=["Noise Level", "Classifier", "IndexRep", "True Label", "Predicted Label", "Accuracy"])
resultadosExp = pd.DataFrame(resultadosAcc, columns=["Noise Level", "Classifier", "IndexRep", "Accuracy"])


Blocos de erro entrada:  [0.    0.006]


100%|██████████| 2/2 [00:08<00:00,  4.18s/it]

[]





In [19]:
import os
print(resultadosExp)
string = "../resultados/resultadosExperimento1.csv" 
if not os.path.exists(string):
    resultadosExp.to_csv(string, index=True)


    Noise Level     Classifier  IndexRep  Accuracy
0           0.0  Random Forest         0  0.984026
1           0.0      SVM (RBF)         0  0.977636
2           0.0        XGBoost         0  0.984026
3           0.0            MLP         0  0.904153
4           0.0  Random Forest         1  0.980831
..          ...            ...       ...       ...
75          0.0            MLP         8  0.648562
76          0.0  Random Forest         9  0.980831
77          0.0      SVM (RBF)         9  0.974441
78          0.0        XGBoost         9  0.990415
79          0.0            MLP         9  0.961661

[80 rows x 4 columns]


## Experimento 2: Médico tendencioso a classificar como tumor maligno

In [16]:
import statistics
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


percentualErrosMaximo=0.05
stepErros=0.006

vector = np.arange(0, percentualErrosMaximo, stepErros)
print("Blocos de erro entrada: ", vector)

SEED = 42
np.random.seed(SEED)

classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators = 15,random_state = SEED), # n_estimators =100
    "SVM (RBF)": SVC(C=100,kernel='rbf', gamma=0.001,class_weight='balanced',random_state = SEED),
    "XGBoost": XGBClassifier(n_estimators = 20, objective = 'binary:logistic', tree_method = 'hist', eta = 0.1, # n_estimators = 100
                             max_depth = 3, enable_categorical = True,random_state = SEED),
    "MLP": MLPClassifier(hidden_layer_sizes=(50), activation='tanh', solver='sgd',
                    max_iter=500, alpha=0.001,random_state = SEED)
}
# MLPClassifier(hidden_layer_sizes=(50), activation='tanh', solver='lbfgs',
                    #max_iter=3000, alpha=0.001)

nRepeticoes=10
resultadosConsolidados_treino = []
resultadosConsolidados_teste = []
blocos = []
all_results=[]
resultadosAcc=[]


for nivelRuidoAtual in tqdm(vector):
    for indiceRepeticao in range(nRepeticoes):
        # Total number of samples

        dadosBrutosLabel0 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[0]] # maligno
        dadosBrutosLabel1 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[1]] 

        nAmostras_treinamento0 = len(dadosBrutosLabel0)
        tamanhoTreinamento0 = int(nAmostras_treinamento0 * percentualTreinamento)
        tamanhoErroTreinamento0 = 0 # tendencioso a classificar maligno

        # Randomly select indices for group 1 - treinamento
        indices_label0_treinamento = np.random.choice(dadosBrutosLabel0.index, size=tamanhoTreinamento0, replace=False)
        indices_label0_erro =np.random.choice(indices_label0_treinamento, size=tamanhoErroTreinamento0, replace=False)

        # Select remaining indices for group 1 - teste
        indices_label0_teste = dadosBrutosLabel0.index.difference(indices_label0_treinamento)

        # Determine the size of group 2
        nAmostras_treinamento1 = len(dadosBrutosLabel1)
        tamanhoTreinamento1 = int(nAmostras_treinamento1 * percentualTreinamento)
        tamanhoErroTreinamento1 = int(nAmostras_treinamento0 * (nivelRuidoAtual) ) # tendencioso a classificar maligno

        # Randomly select indices for group 2 - treinamento
        indices_label1_treinamento = np.random.choice(dadosBrutosLabel1.index, size=tamanhoTreinamento1, replace=False)
        indices_label1_teste = dadosBrutosLabel0.index.difference(indices_label1_treinamento)
        indices_label1_erro =np.random.choice(indices_label1_treinamento, size=tamanhoErroTreinamento1, replace=False)

        # introduz erro
        dadosLabel0=dadosBrutosLabel0
        dadosLabel1=dadosBrutosLabel1
        dadosLabel0.loc[indices_label0_erro, "Diagnosis"] = dadosLabel0.loc[indices_label0_erro, "Diagnosis"].map({"M": "B", "B": "M"})
        dadosLabel1.loc[indices_label1_erro, "Diagnosis"] = dadosLabel1.loc[indices_label1_erro, "Diagnosis"].map({"M": "B", "B": "M"})

        trainData = dadosLabel0.loc[indices_label0_treinamento]
        buffer_trainData = dadosLabel1.loc[indices_label1_treinamento]
        testData = dadosLabel0.loc[indices_label0_teste]
        buffer_testData = dadosLabel1.loc[indices_label1_treinamento]

        filtered_rows_Train = pd.concat([trainData,buffer_trainData], axis=0)
        filtered_rows_Test = pd.concat([testData,buffer_testData], axis=0)

        #aleatoriza ordem de treinamento
        indicesShuffledTranData = np.random.choice(filtered_rows_Train.index, size=1, replace=False)
        trainData = filtered_rows_Train.loc[indicesShuffledTranData]
        trainLabels = filtered_rows_Train.loc[indicesShuffledTranData]


        trainData = filtered_rows_Train.iloc[:, 1:31]
        trainLabels= filtered_rows_Train.iloc[:, 0]
        testData = filtered_rows_Test.iloc[:, 1:31]
        testLabels= filtered_rows_Test.iloc[:, 0]


        trainLabels = filtered_rows_Train.loc[:, "Diagnosis"].map({"M": 0, "B": 1}).to_numpy()
        testLabels = filtered_rows_Test.loc[:, "Diagnosis"].map({"M": 0, "B": 1}).to_numpy()


        # Calcula o nivel de ruido real baseado no número de indices flipados divididio pelo numero total de indices 
        nivelRuidoAtual_real=(len(indices_label0_erro)+len(indices_label1_erro))/len(trainLabels)
        
        ### Treinamentos
        for name, model in classifiers.items():
            model.fit(trainData, trainLabels)
            resultadoTreinamento=model.predict(trainData)
            resultadoTeste=model.predict(testData)
            acc_teste = ( sum((testLabels==resultadoTeste))/len(testLabels) )
            resultadosAcc.append([nivelRuidoAtual_real, name, indiceRepeticao,acc_teste])
            for true_label, pred_label in zip(resultadoTeste, testLabels):
                all_results.append([nivelRuidoAtual_real, name, indiceRepeticao, true_label, pred_label, acc_teste])

        
      
    # print("observados_treinamento=",observados_treinamento)
    # print("soma=",sum(trainLabels==observados_treinamento))æ
    # print("len=",len(trainLabels))

    # print(sum(trainLabels==observados_treinamento)/len(trainLabels))

print(blocos)

df_results2 = pd.DataFrame(all_results, columns=["Noise Level", "Classifier", "IndexRep", "True Label", "Predicted Label", "Accuracy"])
resultadosExp2 = pd.DataFrame(resultadosAcc, columns=["Noise Level", "Classifier", "IndexRep", "Accuracy"])


Blocos de erro entrada:  [0.    0.006 0.012 0.018 0.024 0.03  0.036 0.042 0.048]


  0%|          | 0/9 [00:00<?, ?it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 11%|█         | 1/9 [00:00<00:03,  2.32it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 22%|██▏       | 2/9 [00:00<00:02,  2.53it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 33%|███▎      | 3/9 [00:01<00:04,  1.47it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 44%|████▍     | 4/9 [00:02<00:03,  1.60it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 56%|█████▌    | 5/9 [00:02<00:02,  1.74it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 67%|██████▋   | 6/9 [00:03<00:01,  1.71it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1]


 78%|███████▊  | 7/9 [00:03<00:01,  1.87it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1
 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 89%|████████▉ | 8/9 [00:04<00:00,  1.89it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


100%|██████████| 9/9 [00:04<00:00,  1.83it/s]

[]





In [18]:
import os
print(resultadosExp2)
string = "../resultados/resultadosExperimento2.csv" 
if not os.path.exists(string):
    resultadosExp2.to_csv(string, index=True)

    Noise Level     Classifier  IndexRep  Accuracy
0      0.000000  Random Forest         0  0.984026
1      0.000000      SVM (RBF)         0  0.977636
2      0.000000        XGBoost         0  0.984026
3      0.000000            MLP         0  0.904153
4      0.002519  Random Forest         0  0.987220
5      0.002519      SVM (RBF)         0  0.968051
6      0.002519        XGBoost         0  0.980831
7      0.002519            MLP         0  0.904153
8      0.005038  Random Forest         0  0.993610
9      0.005038      SVM (RBF)         0  0.987220
10     0.005038        XGBoost         0  0.987220
11     0.005038            MLP         0  0.945687
12     0.007557  Random Forest         0  0.961661
13     0.007557      SVM (RBF)         0  0.984026
14     0.007557        XGBoost         0  0.961661
15     0.007557            MLP         0  0.900958
16     0.012594  Random Forest         0  0.977636
17     0.012594      SVM (RBF)         0  0.987220
18     0.012594        XGBoost 

## Experimento 3: Médico tendencioso a classificar como tumor benigno

In [15]:
import statistics
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


percentualErrosMaximo=0.05
stepErros=0.006

vector = np.arange(0, percentualErrosMaximo, stepErros)
print("Blocos de erro entrada: ", vector)

SEED = 42
np.random.seed(SEED)

classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators = 15,random_state = SEED), # n_estimators =100
    "SVM (RBF)": SVC(C=100,kernel='rbf', gamma=0.001,class_weight='balanced',random_state = SEED),
    "XGBoost": XGBClassifier(n_estimators = 20, objective = 'binary:logistic', tree_method = 'hist', eta = 0.1, # n_estimators = 100
                             max_depth = 3, enable_categorical = True,random_state = SEED),
    "MLP": MLPClassifier(hidden_layer_sizes=(50), activation='tanh', solver='sgd',
                    max_iter=500, alpha=0.001,random_state = SEED)
}
# MLPClassifier(hidden_layer_sizes=(50), activation='tanh', solver='lbfgs',
                    #max_iter=3000, alpha=0.001)

nRepeticoes=10
resultadosConsolidados_treino = []
resultadosConsolidados_teste = []
blocos = []
all_results=[]
resultadosAcc=[]


for nivelRuidoAtual in tqdm(vector):
    for indiceRepeticao in range(nRepeticoes):
        # Total number of samples

        dadosBrutosLabel0 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[0]] # maligno
        dadosBrutosLabel1 = dadosBrutos[dadosBrutos['Diagnosis'] == labels_list[1]] # benigno

        nAmostras_treinamento0 = len(dadosBrutosLabel0)
        tamanhoTreinamento0 = int(nAmostras_treinamento0 * percentualTreinamento)
        tamanhoErroTreinamento0 = int(nAmostras_treinamento0 * (nivelRuidoAtual) )

        # Randomly select indices for group 1 - treinamento
        indices_label0_treinamento = np.random.choice(dadosBrutosLabel0.index, size=tamanhoTreinamento0, replace=False)
        indices_label0_erro =np.random.choice(indices_label0_treinamento, size=tamanhoErroTreinamento0, replace=False)

        # Select remaining indices for group 1 - teste
        indices_label0_teste = dadosBrutosLabel0.index.difference(indices_label0_treinamento)

        # Determine the size of group 2
        nAmostras_treinamento1 = len(dadosBrutosLabel1)
        tamanhoTreinamento1 = int(nAmostras_treinamento1 * percentualTreinamento)
        tamanhoErroTreinamento1 = 0

        # Randomly select indices for group 2 - treinamento
        indices_label1_treinamento = np.random.choice(dadosBrutosLabel1.index, size=tamanhoTreinamento1, replace=False)
        indices_label1_teste = dadosBrutosLabel0.index.difference(indices_label1_treinamento)
        indices_label1_erro =np.random.choice(indices_label1_treinamento, size=tamanhoErroTreinamento1, replace=False)

        # introduz erro
        dadosLabel0=dadosBrutosLabel0
        dadosLabel1=dadosBrutosLabel1
        dadosLabel0.loc[indices_label0_erro, "Diagnosis"] = dadosLabel0.loc[indices_label0_erro, "Diagnosis"].map({"M": "B", "B": "M"})
        dadosLabel1.loc[indices_label1_erro, "Diagnosis"] = dadosLabel1.loc[indices_label1_erro, "Diagnosis"].map({"M": "B", "B": "M"})

        trainData = dadosLabel0.loc[indices_label0_treinamento]
        buffer_trainData = dadosLabel1.loc[indices_label1_treinamento]
        testData = dadosLabel0.loc[indices_label0_teste]
        buffer_testData = dadosLabel1.loc[indices_label1_treinamento]

        filtered_rows_Train = pd.concat([trainData,buffer_trainData], axis=0)
        filtered_rows_Test = pd.concat([testData,buffer_testData], axis=0)

        #aleatoriza ordem de treinamento
        indicesShuffledTranData = np.random.choice(filtered_rows_Train.index, size=1, replace=False)
        trainData = filtered_rows_Train.loc[indicesShuffledTranData]
        trainLabels = filtered_rows_Train.loc[indicesShuffledTranData]
        

        trainData = filtered_rows_Train.iloc[:, 1:31]
        trainLabels= filtered_rows_Train.iloc[:, 0]
        testData = filtered_rows_Test.iloc[:, 1:31]
        testLabels= filtered_rows_Test.iloc[:, 0]


        trainLabels = filtered_rows_Train.loc[:, "Diagnosis"].map({"M": 0, "B": 1}).to_numpy()
        testLabels = filtered_rows_Test.loc[:, "Diagnosis"].map({"M": 0, "B": 1}).to_numpy()

        # Calcula o nivel de ruido real baseado no número de indices flipados divididio pelo numero total de indices 
        nivelRuidoAtual_real=(len(indices_label0_erro)+len(indices_label1_erro))/len(trainLabels)
        
        ### Treinamentos
        for name, model in classifiers.items():
            model.fit(trainData, trainLabels)
            resultadoTreinamento=model.predict(trainData)
            resultadoTeste=model.predict(testData)
            acc_teste = ( sum((testLabels==resultadoTeste))/len(testLabels) )
            resultadosAcc.append([nivelRuidoAtual_real, name, indiceRepeticao,acc_teste])
            for true_label, pred_label in zip(resultadoTeste, testLabels):
                all_results.append([nivelRuidoAtual_real, name, indiceRepeticao, true_label, pred_label, acc_teste])

        
      
    # print("observados_treinamento=",observados_treinamento)
    # print("soma=",sum(trainLabels==observados_treinamento))æ
    # print("len=",len(trainLabels))

    # print(sum(trainLabels==observados_treinamento)/len(trainLabels))

print(blocos)

df_results3 = pd.DataFrame(all_results, columns=["Noise Level", "Classifier", "IndexRep", "True Label", "Predicted Label", "Accuracy"])
resultadosExp3 = pd.DataFrame(resultadosAcc, columns=["Noise Level", "Classifier", "IndexRep", "Accuracy"])


Blocos de erro entrada:  [0.    0.006 0.012 0.018 0.024 0.03  0.036 0.042 0.048]


  0%|          | 0/9 [00:00<?, ?it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 11%|█         | 1/9 [00:00<00:03,  2.27it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 22%|██▏       | 2/9 [00:01<00:03,  1.94it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 33%|███▎      | 3/9 [00:01<00:03,  1.99it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 44%|████▍     | 4/9 [00:01<00:02,  2.16it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 56%|█████▌    | 5/9 [00:02<00:02,  1.83it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 67%|██████▋   | 6/9 [00:03<00:01,  1.85it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 78%|███████▊  | 7/9 [00:03<00:01,  1.78it/s]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


 89%|████████▉ | 8/9 [00:04<00:00,  1.91it/s]

[0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


100%|██████████| 9/9 [00:04<00:00,  1.94it/s]

[]





In [17]:
import os
print(resultadosExp3)
string = "../resultados/resultadosExperimento3.csv" 
if not os.path.exists(string):
    resultadosExp3.to_csv(string, index=True)

    Noise Level     Classifier  IndexRep  Accuracy
0      0.000000  Random Forest         0  0.984026
1      0.000000      SVM (RBF)         0  0.977636
2      0.000000        XGBoost         0  0.984026
3      0.000000            MLP         0  0.904153
4      0.002519  Random Forest         0  0.987220
5      0.002519      SVM (RBF)         0  0.968051
6      0.002519        XGBoost         0  0.974441
7      0.002519            MLP         0  0.843450
8      0.005038  Random Forest         0  0.987220
9      0.005038      SVM (RBF)         0  0.984026
10     0.005038        XGBoost         0  0.987220
11     0.005038            MLP         0  0.958466
12     0.007557  Random Forest         0  0.974441
13     0.007557      SVM (RBF)         0  0.984026
14     0.007557        XGBoost         0  0.974441
15     0.007557            MLP         0  0.952077
16     0.012594  Random Forest         0  0.987220
17     0.012594      SVM (RBF)         0  0.984026
18     0.012594        XGBoost 