In [26]:
import numpy as np

def load_iris_data():
    # Carregar os dados do conjunto de dados iris
   
    from sklearn.datasets import load_iris
    iris = load_iris()
    X = iris.data
    y = iris.target
    return X, y

def shuffle_data(X, y, random_state=None):
    # Embaralhar os dados mantendo a correspondência entre features e rótulos
    
    if random_state is not None:
        np.random.seed(random_state)
    indices = np.random.permutation(len(X))
    return X[indices], y[indices]

def split_data(X, y, test_size=0.3):
    # Dividir os dados em conjuntos de treinamento e teste
    split_index = int(len(X) * (1 - test_size))
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]
    return X_train, X_test, y_train, y_test

def calculate_means(X_train, y_train):
    # Calcular as médias de cada classe
    unique_classes = np.unique(y_train)
    class_means = []
    for cls in unique_classes:
        class_means.append(np.mean(X_train[y_train == cls], axis=0))
    return class_means

def calculate_covariance_matrices(X_train, y_train, means):
    # Calcular as matrizes de covariância de cada classe
    unique_classes = np.unique(y_train)
    cov_matrices = []
    for cls, mean in zip(unique_classes, means):
        cov_matrix = np.cov(X_train[y_train == cls], rowvar=False)
        cov_matrices.append(cov_matrix)
    return cov_matrices

def gaussian_pdf(x, mean, cov):
    # Função de densidade de probabilidade gaussiana multivariada
    n = len(x)
    exp_term = np.exp(-0.5 * np.dot(np.dot((x - mean).T, np.linalg.inv(cov)), (x - mean)))
    coef = 1 / ((2 * np.pi) ** (n / 2) * np.linalg.det(cov) ** 0.5)
    return coef * exp_term

def predict_class(X_test, class_means, cov_matrices):
    # Predição da classe para cada amostra no conjunto de teste
    y_pred = []
    for x in X_test:
        probabilities = []
        for mean, cov in zip(class_means, cov_matrices):
            probabilities.append(gaussian_pdf(x, mean, cov))
        y_pred.append(np.argmax(probabilities))
    return np.array(y_pred)

def accuracy_score(y_true, y_pred):
    # Calcular a acurácia
    return np.mean(y_true == y_pred)

def confusion_matrix(y_true, y_pred, num_classes):
    # Calcular a matriz de confusão
    conf_matrix = np.zeros((num_classes, num_classes), dtype=int)
    for true, pred in zip(y_true, y_pred):
        conf_matrix[true][pred] += 1
    return conf_matrix

def holdout_experiment(X, y, n_runs=20, test_size=0.3):
    accuracies = []

    for _ in range(n_runs):
        # Embaralhar e dividir os dados
        X_shuffled, y_shuffled = shuffle_data(X, y, random_state=_)
        X_train, X_test, y_train, y_test = split_data(X_shuffled, y_shuffled, test_size=test_size)

        # Calcular as médias e as matrizes de covariância de cada classe
        class_means = calculate_means(X_train, y_train)
        cov_matrices = calculate_covariance_matrices(X_train, y_train, class_means)

        # Realizar a predição
        y_pred = predict_class(X_test, class_means, cov_matrices)

        # Calcular a acurácia e armazenar
        accuracies.append(accuracy_score(y_test, y_pred))

    accuracies = np.array(accuracies)
    mean_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    best_index = np.argmax(accuracies)

    # Calcular a matriz de confusão da melhor realização
    X_train, X_test, y_train, y_test = split_data(X, y, test_size=test_size)
    class_means = calculate_means(X_train, y_train)
    cov_matrices = calculate_covariance_matrices(X_train, y_train, class_means)
    y_pred = predict_class(X_test, class_means, cov_matrices)
    conf_matrix = confusion_matrix(y_test, y_pred, len(np.unique(y)))

    return mean_accuracy, std_accuracy, conf_matrix

# Carregar os dados iris
X, y = load_iris_data()

# Executar o experimento holdout
mean_accuracy, std_accuracy, best_conf_matrix = holdout_experiment(X, y)

print("Mean Accuracy:", mean_accuracy)
print("Standard Deviation of Accuracy:", std_accuracy)
print("Best Confusion Matrix:")
print(best_conf_matrix)


Mean Accuracy: 0.9644444444444444
Standard Deviation of Accuracy: 0.028458329944145988
Best Confusion Matrix:
[[ 0  0  0]
 [ 0  0  0]
 [ 0 34 11]]


In [25]:
import numpy as np

def load_iris_data():
    from sklearn.datasets import load_iris
    iris = load_iris()
    X = iris.data
    y = iris.target
    return X, y

def shuffle_data(X, y, random_state=None):
    if random_state is not None:
        np.random.seed(random_state)
    indices = np.random.permutation(len(X))
    return X[indices], y[indices]

def split_data(X, y, test_size=0.3):
    split_index = int(len(X) * (1 - test_size))
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]
    return X_train, X_test, y_train, y_test

def calculate_means(X_train, y_train):
    unique_classes = np.unique(y_train)
    class_means = []
    for cls in unique_classes:
        class_means.append(np.mean(X_train[y_train == cls], axis=0))
    return class_means

def calculate_covariance_matrices(X_train, y_train, means):
    unique_classes = np.unique(y_train)
    cov_matrices = []
    for cls, mean in zip(unique_classes, means):
        cov_matrix = np.cov(X_train[y_train == cls], rowvar=False)
        cov_matrices.append(cov_matrix)
    return cov_matrices

def gaussian_pdf(x, mean, cov):
    n = len(x)
    exp_term = np.exp(-0.5 * np.dot(np.dot((x - mean).T, np.linalg.inv(cov)), (x - mean)))
    coef = 1 / ((2 * np.pi) ** (n / 2) * np.linalg.det(cov) ** 0.5)
    return coef * exp_term

def predict_class(X_test, class_means, cov_matrices):
    y_pred = []
    for x in X_test:
        probabilities = []
        for mean, cov in zip(class_means, cov_matrices):
            probabilities.append(gaussian_pdf(x, mean, cov))
        y_pred.append(np.argmax(probabilities))
    return np.array(y_pred)

def accuracy_score(y_true, y_pred):
    return np.mean(y_true == y_pred)

def confusion_matrix(y_true, y_pred, num_classes):
    conf_matrix = np.zeros((num_classes, num_classes), dtype=int)
    for true, pred in zip(y_true, y_pred):
        conf_matrix[true, pred] += 1
    return conf_matrix

def holdout_experiment(X, y, n_runs=20, test_size=0.3):
    accuracies = []

    for _ in range(n_runs):
        X_shuffled, y_shuffled = shuffle_data(X, y, random_state=_)
        X_train, X_test, y_train, y_test = split_data(X_shuffled, y_shuffled, test_size=test_size)

        class_means = calculate_means(X_train, y_train)
        cov_matrices = calculate_covariance_matrices(X_train, y_train, class_means)

        y_pred = predict_class(X_test, class_means, cov_matrices)

        accuracies.append(accuracy_score(y_test, y_pred))

    accuracies = np.array(accuracies)
    mean_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    best_index = np.argmax(accuracies)

    X_train, X_test, y_train, y_test = split_data(X, y, test_size=test_size)
    class_means = calculate_means(X_train, y_train)
    cov_matrices = calculate_covariance_matrices(X_train, y_train, class_means)
    y_pred = predict_class(X_test, class_means, cov_matrices)
    
    # Calcular a matriz de confusão manualmente
    conf_matrix = confusion_matrix(y_test, y_pred, num_classes=len(np.unique(y)))

    return mean_accuracy, std_accuracy, conf_matrix

X, y = load_iris_data()
mean_accuracy, std_accuracy, best_conf_matrix = holdout_experiment(X, y)

print("Mean Accuracy:", mean_accuracy)
print("Standard Deviation of Accuracy:", std_accuracy)
print("Best Confusion Matrix:")
print(best_conf_matrix)



Mean Accuracy: 0.9644444444444444
Standard Deviation of Accuracy: 0.028458329944145988
Best Confusion Matrix:
[[ 0  0  0]
 [ 0  0  0]
 [ 0 34 11]]


In [22]:
import numpy as np

# Carregar o conjunto de dados Iris
def load_iris_dataset():
    from sklearn.datasets import load_iris
    iris = load_iris()
    X = iris.data
    y = iris.target
    return X, y

# Função para dividir os dados em conjunto de treinamento e teste
def train_test_split_custom(X, y, test_size=0.2):
    n_samples = X.shape[0]
    n_test = int(n_samples * test_size)
    indices = np.arange(n_samples)
    np.random.shuffle(indices)
    X_shuffled = X[indices]
    y_shuffled = y[indices]
    X_train = X_shuffled[:-n_test]
    X_test = X_shuffled[-n_test:]
    y_train = y_shuffled[:-n_test]
    y_test = y_shuffled[-n_test:]
    return X_train, X_test, y_train, y_test

# Modelo de classificação (substitua esta parte pelo seu modelo)
class Model:
    def fit(self, X_train, y_train):
        pass

    def predict(self, X_test):
        return np.random.randint(low=0, high=3, size=X_test.shape[0])

model = Model()

# Listas para armazenar os resultados
accuracies = []
confusion_matrices = []

# Iteração do experimento holdout
for _ in range(10):
    # Carregar o conjunto de dados Iris
    X, y = load_iris_dataset()
    
    # Dividir os dados em conjunto de treinamento e teste
    X_train, X_test, y_train, y_test = train_test_split_custom(X, y, test_size=0.2)
    
    # Treinar o modelo
    model.fit(X_train, y_train)
    
    # Prever os rótulos para o conjunto de teste
    y_pred = model.predict(X_test)
    
    # Calcular a precisão
    correct_predictions = np.sum(y_pred == y_test)
    accuracy = correct_predictions / len(y_test)
    
    # Calcular a matriz de confusão
    confusion_matrix = np.zeros((3, 3), dtype=int)
    for i in range(len(y_test)):
        confusion_matrix[y_test[i]][y_pred[i]] += 1
    
    # Armazenar os resultados
    accuracies.append(accuracy)
    confusion_matrices.append(confusion_matrix)

# Exibir os resultados
print("Mean Accuracy:", np.mean(accuracies))
print("Standard Deviation of Accuracy:", np.std(accuracies))
best_confusion_matrix_index = np.argmax(accuracies)
best_confusion_matrix = confusion_matrices[best_confusion_matrix_index]
print("Best Confusion Matrix:")
print(best_confusion_matrix)


Mean Accuracy: 0.35
Standard Deviation of Accuracy: 0.060092521257733164
Best Confusion Matrix:
[[2 3 2]
 [1 6 3]
 [2 6 5]]
