In [57]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [58]:
#Carregando o dataset
df = pd.read_excel("https://raw.githubusercontent.com/irvin-s/smd_project/main/dataset/dry_bean_dataset.xls")

In [59]:
#atribuindo os labes para a classe reposta
labels = ["Barbunya", "Bombay", "Cali", "Dermason", "Horoz", "Seker", "Sira"]

#Dividindo a base em treino e teste
X = df.drop("Class", axis=1)
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state=123)

#Normalizando os dados
ss = StandardScaler()
X_train_norm = ss.fit_transform(X_train)
X_test_norm = ss.fit_transform(X_test)

#Transformando a variável cartegorica em binária
labelencoder = LabelEncoder()
y_train_bin = labelencoder.fit_transform(y_train)
y_test_bin = labelencoder.fit_transform(y_test)

In [51]:
#Aplicando o modelo KNN

#Definindo o valor de vizinhos
classifier = KNeighborsClassifier(n_neighbors=15)

#Treinar o modelo, com os dados de treinamento
classifier.fit(X_train_norm, y_train_bin)

#Prever os valores de y com dos dados de X_test
y_pred = classifier.predict(X_test_norm)

# Imprimindo a matriz confusa
print("Matriz Confusa KNN: ")
print(confusion_matrix(y_test_bin, y_pred), "\n")  

# Imprimindo o relatório de classificação
print("Relatório de classificação KNN: \n", classification_report(y_test_bin, y_pred, target_names=labels))  

# Imprimindo o quão acurado foi o modelo
acu_knn = accuracy_score(y_test_bin, y_pred) * 100
print("Acurácia KNN: {:.2f}%".format(acu_knn))

Matriz Confusa: 
[[ 384    0   34    0    2    3   13]
 [   0  171    0    0    0    0    0]
 [  11    0  507    0    5    2    7]
 [   0    0    0 1064    0   12   68]
 [   0    0   14    2  620    0   18]
 [   2    0    0   19    1  628   24]
 [   1    0    0   93    9    6  772]] 

Relatório de classificação: 
               precision    recall  f1-score   support

    Barbunya       0.96      0.88      0.92       436
      Bombay       1.00      1.00      1.00       171
        Cali       0.91      0.95      0.93       532
    Dermason       0.90      0.93      0.92      1144
       Horoz       0.97      0.95      0.96       654
       Seker       0.96      0.93      0.95       674
        Sira       0.86      0.88      0.87       881

    accuracy                           0.92      4492
   macro avg       0.94      0.93      0.93      4492
weighted avg       0.92      0.92      0.92      4492

Acurácia KNN: 92.30%


In [52]:
#Aplicando a Árvore de decisão

#Instanciando o modelo
model = DecisionTreeClassifier()

# Treinar o modelo
model.fit(X_train, y_train_bin)

# Aplicar o modelo ao treinamento e ao teste
predicted_test_y = model.predict(X_test)

predicted_train_y = model.predict(X_train)

In [56]:
# Imprimindo a matriz confusa
print("Matriz Confusa Decision Tree: ")
print(confusion_matrix(y_test_bin, predicted_test_y), "\n")  

# Imprimindo o relatório de classificação
print("Relatório de classificação Decision Tree: \n", classification_report(y_test_bin, predicted_test_y, target_names=labels)) 

#Imprimindo a acurácia do modelo
accuracy_dt = accuracy_score(y_test_bin, predicted_test_y) * 100
print ("Acurácia Decision Tree: {:.2f}%.".format(accuracy_dt))

Matriz Confusa Decision Tree: 
[[ 377    0   35    0    7    5   12]
 [   1  170    0    0    0    0    0]
 [  26    0  488    0   11    2    5]
 [   0    0    0 1039    5   16   84]
 [   2    0   12    5  614    0   21]
 [   8    0    0   29    0  613   24]
 [   7    0    1   91   26   15  741]] 

Relatório de classificação Decision Tree: 
               precision    recall  f1-score   support

    Barbunya       0.90      0.86      0.88       436
      Bombay       1.00      0.99      1.00       171
        Cali       0.91      0.92      0.91       532
    Dermason       0.89      0.91      0.90      1144
       Horoz       0.93      0.94      0.93       654
       Seker       0.94      0.91      0.93       674
        Sira       0.84      0.84      0.84       881

    accuracy                           0.90      4492
   macro avg       0.91      0.91      0.91      4492
weighted avg       0.90      0.90      0.90      4492

Acurácia Decision Tree: 89.98%.


In [60]:
#Aplicando a Random Forest

#Instanciando o modelo
rf = RandomForestClassifier(80, max_depth=10, random_state=42)
rf.fit(X_train,y_train_bin)

#Resultado do modelo
score_rf = cross_val_score(rf, X, y, cv=5, scoring='accuracy').mean()
score_rf = score_rf * 100
print("Acurácia Random Forest: {:.2f}%.".format(score_rf))

Acurácia Random Forest: 61.78%.


In [61]:
#Aplicando a Rede Neural MLP

#Instanciado o modelo
modelNN = Sequential()
modelNN.add(Dense(128, input_shape=(16,), activation="sigmoid"))
modelNN.add(Dense(64, activation="sigmoid"))
modelNN.add(Dense(7, activation="softmax"))

#Vetorizar a classe resposta
lb = LabelBinarizer()
y_train_vet = lb.fit_transform(y_train)
y_test_vet = lb.fit_transform(y_test)

#Realizando o treinamento
modelNN.compile(optimizer=SGD(0.01), loss="categorical_crossentropy", metrics=["accuracy"])
H = modelNN.fit(X_train_norm, y_train_vet, batch_size=128, epochs=10, verbose=2, validation_data=(X_test_norm, y_test_vet))

#Avalidando a Rede Neural 
predictions = modelNN.predict(X_test_norm, batch_size=128)
print(classification_report(y_test_vet.argmax(axis=1), predictions.argmax(axis=1), target_names=labels))

#Matriz confusa
cnf_matrix = confusion_matrix(X_test_norm, predictions.argmax(axis=1))

Epoch 1/10
72/72 - 1s - loss: 1.8717 - accuracy: 0.2402 - val_loss: 1.8030 - val_accuracy: 0.2547
Epoch 2/10
72/72 - 0s - loss: 1.7805 - accuracy: 0.2634 - val_loss: 1.7698 - val_accuracy: 0.2547
Epoch 3/10
72/72 - 0s - loss: 1.7532 - accuracy: 0.2634 - val_loss: 1.7445 - val_accuracy: 0.2547
Epoch 4/10
72/72 - 0s - loss: 1.7281 - accuracy: 0.2634 - val_loss: 1.7197 - val_accuracy: 0.2547
Epoch 5/10
72/72 - 0s - loss: 1.7023 - accuracy: 0.2635 - val_loss: 1.6932 - val_accuracy: 0.2549
Epoch 6/10
72/72 - 0s - loss: 1.6749 - accuracy: 0.2643 - val_loss: 1.6648 - val_accuracy: 0.2558
Epoch 7/10
72/72 - 0s - loss: 1.6458 - accuracy: 0.2706 - val_loss: 1.6347 - val_accuracy: 0.2812
Epoch 8/10
72/72 - 0s - loss: 1.6149 - accuracy: 0.3091 - val_loss: 1.6039 - val_accuracy: 0.2861
Epoch 9/10
72/72 - 0s - loss: 1.5827 - accuracy: 0.3253 - val_loss: 1.5706 - val_accuracy: 0.3533
Epoch 10/10
72/72 - 0s - loss: 1.5489 - accuracy: 0.3745 - val_loss: 1.5363 - val_accuracy: 0.3713
              preci

  _warn_prf(average, modifier, msg_start, len(result))


ValueError: ignored

In [None]:
#Ensamble de Redes Neurais


In [34]:
#Ensamble heterogêneo

#Instanciando o modelo
rfr = RandomForestRegressor(n_estimators = 200, random_state = 42)

# Treinando o modelo no dataset de treino
rfr.fit(X_train, y_train_bin)

#Avaliando o desempenho através do erro médio absoluto
score = -1*cross_val_score(rfr, X_test, y_test_bin, cv = 10, scoring = 'neg_mean_absolute_error').mean()

print(score)

#Aplicando o modelo
rfr.predict(X_test)

0.2891363424894827


array([3.   , 5.995, 1.   , ..., 6.   , 3.905, 4.74 ])