# Test Dataset 24

### Mushroom Classifier

In [16]:
import pandas as pd
import numpy as np
import openml
import matplotlib.pyplot as plt

from AdaBoostWorkyWorky import AdaBoost

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score ,confusion_matrix, ConfusionMatrixDisplay

In [17]:
# get the data from OpenMl

suite = openml.study.get_suite(99)

task_id = 24 # Mushrooms Classification
task = openml.tasks.get_task(task_id)    

dataset = openml.datasets.get_dataset(task.dataset_id)

X, y, categorical_indicator, attribute_names = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute
)

#y Visualizar o dataset
df = pd.DataFrame(X, columns = attribute_names)

# converter:
#   0 -> -1
#   1 -> 1
df['target'] = 2*y-1 

df = df.dropna(how='any', axis=0)
X = df.drop(columns=['target'], axis=1)
y = df.target

df

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises%3F,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,target
0,5.0,2.0,4.0,1.0,6.0,2.0,0.0,1.0,4.0,0.0,...,7.0,7.0,0.0,2.0,1.0,5.0,2.0,3.0,5.0,1
1,5.0,2.0,9.0,1.0,0.0,2.0,0.0,0.0,4.0,0.0,...,7.0,7.0,0.0,2.0,1.0,5.0,3.0,2.0,1.0,-1
2,0.0,2.0,8.0,1.0,3.0,2.0,0.0,0.0,5.0,0.0,...,7.0,7.0,0.0,2.0,1.0,5.0,3.0,2.0,3.0,-1
3,5.0,3.0,8.0,1.0,6.0,2.0,0.0,1.0,5.0,0.0,...,7.0,7.0,0.0,2.0,1.0,5.0,2.0,3.0,5.0,1
4,5.0,2.0,3.0,0.0,5.0,2.0,2.0,0.0,4.0,1.0,...,7.0,7.0,0.0,2.0,1.0,1.0,3.0,0.0,1.0,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7986,0.0,3.0,4.0,0.0,5.0,2.0,0.0,0.0,10.0,0.0,...,4.0,4.0,0.0,2.0,2.0,5.0,7.0,5.0,4.0,-1
8001,5.0,3.0,4.0,0.0,5.0,2.0,0.0,0.0,10.0,0.0,...,4.0,4.0,0.0,2.0,2.0,5.0,7.0,5.0,4.0,-1
8038,5.0,3.0,3.0,1.0,5.0,2.0,0.0,0.0,10.0,0.0,...,7.0,7.0,0.0,2.0,2.0,5.0,7.0,5.0,4.0,-1
8095,5.0,3.0,1.0,0.0,4.0,2.0,0.0,0.0,11.0,0.0,...,1.0,1.0,0.0,2.0,0.0,4.0,7.0,1.0,0.0,1


In [18]:
'''# Splitting the data set into train and test datasets
X = df.drop(['target'], axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state = 1234, shuffle = True)'''

"# Splitting the data set into train and test datasets\nX = df.drop(['target'], axis=1)\ny = df['target']\nX_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state = 1234, shuffle = True)"

In [19]:
# holdout estimation function

def holdout_estimation(model, alpha, n_classifiers, x, y, test_size_value=0.3, seed=1111):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size_value, random_state=seed)
    model.fit(x_train, y_train, alpha, n_classifiers)
    y_pred = model.predict(x_test)
    #print(f"Estimated accuracy by holdout {accuracy_score(y_test, y_pred):.5f}, using {model.__class__.__name__}")
    return accuracy_score(y_test, y_pred)

In [20]:
# get the best number of classifiers in the AdaBoost for each alpha type of calc
best_n_interators = {}
for alpha in range(3):
    print("Calculating the best n for alpha =", alpha)
    best_n_interators[alpha] = (0, 0)
    for n in range(50, 151, 10):
        ab = AdaBoost()
        accuracy = holdout_estimation(ab, alpha, n, X, y)
        if (accuracy > best_n_interators[alpha][1]):
                best_n_interators[alpha] = (n, accuracy)

print(best_n_interators)

Calculating the best n for alpha = 0
Calculating the best n for alpha = 1
Calculating the best n for alpha = 2
{0: (150, 0.9911452184179457), 1: (130, 0.9858323494687131), 2: (130, 0.9940968122786304)}


In [21]:
# getting the accuracy results by holdout for 50 runs on the default and modified models

scores = {}
for alpha in range(3):
    scores[alpha] = []
    print("Alpha calc method =", alpha)
    for _ in range(50):
        ab = AdaBoost()
        scores[alpha].append(holdout_estimation(ab, alpha, best_n_interators[alpha][0], X, y))

for key in scores.keys():
    print(f"{key}: {scores.get(key)}")

Alpha calc method = 0
Alpha calc method = 1
Alpha calc method = 2
0: [0.9940968122786304, 0.9852420306965761, 0.9893742621015348, 0.9893742621015348, 0.987603305785124, 0.9887839433293979, 0.9911452184179457, 0.9905548996458088, 0.9905548996458088, 0.9905548996458088, 0.9698937426210154, 0.9899645808736718, 0.9811097992916175, 0.9846517119244392, 0.9887839433293979, 0.9881936245572609, 0.9905548996458088, 0.9858323494687131, 0.9935064935064936, 0.987012987012987, 0.9929161747343566, 0.987603305785124, 0.9905548996458088, 0.9940968122786304, 0.9887839433293979, 0.9887839433293979, 0.9775678866587958, 0.9923258559622196, 0.9822904368358913, 0.9899645808736718, 0.9899645808736718, 0.98642266824085, 0.98642266824085, 0.9917355371900827, 0.9834710743801653, 0.9852420306965761, 0.9858323494687131, 0.9887839433293979, 0.9923258559622196, 0.9852420306965761, 0.9887839433293979, 0.9899645808736718, 0.9846517119244392, 0.9834710743801653, 0.9887839433293979, 0.9887839433293979, 0.991735537190082

In [22]:
# check statistics for each model

averages = {}
stddev = {}
for alpha in range(3):
    averages[alpha] = np.mean(scores[alpha])
    stddev[alpha] = np.std(scores[alpha])

print("Averages for each method to calculate the alpha:\n\t", averages)
print()
print("Standard deviation for each method to calculate the alpha:\n\t", stddev)

Averages for each method to calculate the alpha:
	 {0: 0.9879929161747343, 1: 0.9766942148760331, 2: 0.9857733175914994}

Standard deviation for each method to calculate the alpha:
	 {0: 0.004205141895275554, 1: 0.006531220307650744, 2: 0.005881009104241817}


In [23]:
# compare models using paired t-test or wilcoxon signed rank test

default_scores = np.array(scores[0])
alpha1_scores = np.array(scores[1])
alpha2_scores = np.array(scores[2])