In [40]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [41]:
#Carregando o dataset
df = pd.read_excel("https://raw.githubusercontent.com/irvin-s/smd_project/main/dataset/dry_bean_dataset.xls")

In [42]:
#Dividindo a base em treino e teste
X = df.drop("Class", axis=1)
y = df["Class"]

#Transformando a variável cartegorica em binária
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state=123)

#Normalizando os dados
ss = StandardScaler()
X_train_norm = ss.fit_transform(X_train)
X_test_norm = ss.fit_transform(X_test)

In [59]:
#Aplicando o modelo KNN

#Definindo o valor de vizinhos
classifier = KNeighborsClassifier(n_neighbors=15)

#Treinar o modelo, com os dados de treinamento
classifier.fit(X_train_norm, y_train)

#Prever os valores de y com dos dados de X_test
y_pred = classifier.predict(X_test_norm)

# Imprimindo a matriz confusa
print("Matriz Confusa: ")
print(confusion_matrix(y_test, y_pred), "\n")  

# Imprimindo o relatório de classificação
print("Relatório de classificação: \n", classification_report(y_test, y_pred))  

# Imprimindo o quão acurado foi o modelo
acu_knn = accuracy_score(y_test, y_pred) * 100
print("Acurácia KNN: {:.2f}%".format(acu_knn))

Matriz Confusa: 
[[ 384    0   34    0    2    3   13]
 [   0  171    0    0    0    0    0]
 [  11    0  507    0    5    2    7]
 [   0    0    0 1064    0   12   68]
 [   0    0   14    2  620    0   18]
 [   2    0    0   19    1  628   24]
 [   1    0    0   93    9    6  772]] 

Relatório de classificação: 
               precision    recall  f1-score   support

           0       0.96      0.88      0.92       436
           1       1.00      1.00      1.00       171
           2       0.91      0.95      0.93       532
           3       0.90      0.93      0.92      1144
           4       0.97      0.95      0.96       654
           5       0.96      0.93      0.95       674
           6       0.86      0.88      0.87       881

    accuracy                           0.92      4492
   macro avg       0.94      0.93      0.93      4492
weighted avg       0.92      0.92      0.92      4492

Acurácia KNN: 92.30%


In [44]:
#Aplicando a Árvore de decisão

#Instanciando o modelo
model = DecisionTreeClassifier()

# Treinar o modelo
model.fit(X_train, y_train)

# Aplicar o modelo ao treinamento e ao teste
predicted_test_y = model.predict(X_test)

predicted_train_y = model.predict(X_train)

In [45]:
#Imprimindo a acurácia do modelo
accuracy = accuracy_score(y_test, predicted_test_y) * 100
print ("Acurácia Decision Tree: {:.2f}%.".format(accuracy))

Acurácia Decision Tree: 89.83%.


In [47]:
#Aplicando a Random Forest

#Instanciando o modelo
rf = RandomForestClassifier(80, max_depth=10, random_state=42)
rf.fit(X_train,y_train)

#Resultado do modelo
score_rf = cross_val_score(rf, X, y, cv=5, scoring='accuracy').mean()
score_rf = score_rf * 100
print("Acurácia Random Forest: {:.2f}%.".format(score_rf))

Acurácia Random Forest: 61.78%.


In [26]:
#Aplicando a Rede Neural MLP

#Instanciado o modelo
modelNN = Sequential()
modelNN.add(Dense(128, input_shape=(16,), activation="sigmoid"))
modelNN.add(Dense(64, activation="sigmoid"))
modelNN.add(Dense(7, activation="softmax"))

#Realizando o treinamento
modelNN.compile(optimizer=SGD(0.01), loss="categorical_crossentropy", metrics=["accuracy"])
H = modelNN.fit(X_train, y_train, batch_size=128, epochs=10, verbose=2, validation_data=(X_test, y_test))

In [None]:
#Ensamble de Redes Neurais


In [49]:
#Ensamble heterogêneo

#Instanciando o modelo
rfr = RandomForestRegressor(n_estimators = 200, random_state = 42)

# Treinando o modelo no dataset de treino
rfr.fit(X_train, y_train)

#Avaliando o desempenho através do erro médio absoluto
score = -1*cross_val_score(rfr, X_test, y_test, cv = 10, scoring = 'neg_mean_absolute_error').mean()

print(score)

#Aplicando o modelo
rfr.predict(X_test)

0.2891363424894827


array([3.   , 5.995, 1.   , ..., 6.   , 3.905, 4.74 ])