### Configuração inicial:


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt

In [46]:
SEED_VALUE = 202407

vehicles = pd.read_csv("data/6 - Veiculos - Dados.csv")
vehicles

vehicles = vehicles.drop(columns=['a'])

X = vehicles.drop(columns=['tipo'])
y = vehicles['tipo']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=SEED_VALUE)

def space():
    print("\n\n")

### KNN:


In [32]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

knn_predictions = knn_model.predict(X_test)
print("KNN confusion matrix:")
print(confusion_matrix(y_test, knn_predictions))
print("KNN Accuracy Score:", accuracy_score(y_test, knn_predictions))

KNN confusion matrix:
[[40  1  3  3]
 [ 4 26 17  2]
 [ 2 16 22  2]
 [ 1  2  2 27]]
KNN Accuracy Score: 0.6764705882352942


### RNA:


In [33]:
## HOLD OUT
mlp_model = MLPClassifier(max_iter=2000, random_state=SEED_VALUE)
mlp_model.fit(X_train, y_train)
mlp_predictions = mlp_model.predict(X_test)
print("MLP Hold-out Confusion Matrix:")
print(confusion_matrix(y_test, mlp_predictions))
print("MLP Hold-out Accuracy Score:", accuracy_score(y_test, mlp_predictions))

space()

## CV
param_grid = {
    'hidden_layer_sizes': [(i,) for i in range(1, 46, 10)],
    'alpha':np.arange(0.0001, 0.001, 0.01)
}
grid_search = GridSearchCV(MLPClassifier(max_iter=2000, random_state=SEED_VALUE), param_grid, cv=10, n_jobs=1)
grid_search.fit(X_train, y_train)
print("Best Parameters:", grid_search.best_params_)
mlp_cv_model = grid_search.best_estimator_
mlp_cv_predictions = mlp_cv_model.predict(X_test)
print("MLP Cross-Validation Confusion Matrix:")
print(confusion_matrix(y_test, mlp_cv_predictions))
print("MLP Cross-Validation Accuracy Score:", accuracy_score(y_test, mlp_cv_predictions))



MLP Hold-out Confusion Matrix:
[[43  2  0  2]
 [ 2 46  0  1]
 [ 4 34  3  1]
 [ 2  4  0 26]]
MLP Hold-out Accuracy Score: 0.6941176470588235



Best Parameters: {'alpha': 0.0001, 'hidden_layer_sizes': (11,)}
MLP Cross-Validation Confusion Matrix:
[[40  5  0  2]
 [ 4 34  5  6]
 [ 2 24 11  5]
 [ 0  6  2 24]]
MLP Cross-Validation Accuracy Score: 0.6411764705882353


### SVM:

In [37]:
## HOLD OUT
svm_model = SVC(kernel='rbf', random_state=SEED_VALUE)
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
print("SVM Hold-out Confusion Matrix:")
print(confusion_matrix(y_test, svm_predictions))
print("SVM Hold-out Accuracy Score:", accuracy_score(y_test, svm_predictions))

space()

## CV
param_grid_svm = {
    'C': np.arange(1, 101, 10),
    'gamma': np.arange(0.01, 0.21, 0.05)
}
grid_search_svm = GridSearchCV(SVC(kernel='rbf', random_state=SEED_VALUE), param_grid_svm, cv=10, n_jobs=1)
grid_search_svm.fit(X_train, y_train)
print("Best Parameters:", grid_search_svm.best_params_)
cv_model_svm = grid_search_svm.best_estimator_
cv_svm_predictions = cv_model_svm.predict(X_test)
print("SVM Cross-Validation Confusion Matrix:")
print(confusion_matrix(y_test, cv_svm_predictions))
print("SVM Cross-Validation Accuracy Score:", accuracy_score(y_test, cv_svm_predictions))



SVM Hold-out Confusion Matrix:
[[19  0  5 23]
 [ 6 17 12 14]
 [ 9 15 12  6]
 [ 3  0  0 29]]
SVM Hold-out Accuracy Score: 0.45294117647058824



Best Parameters: {'C': 11, 'gamma': 0.01}
SVM Cross-Validation Confusion Matrix:
[[25  0 21  1]
 [ 1 11 36  1]
 [ 1  7 34  0]
 [ 0  1 13 18]]
SVM Cross-Validation Accuracy Score: 0.5176470588235295


### RF:

In [43]:
# HOLD OUT
rf_model = RandomForestClassifier(random_state=SEED_VALUE)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
print("RF Hold-out Confusion Matrix:")
print(confusion_matrix(y_test, rf_predictions))
print("RF Hold-out Accuracy Score:", accuracy_score(y_test, rf_predictions))

space()

# CV
param_grid_rf = {
    'n_estimators': np.arange(100, 301, 100),
    'max_features': np.arange(2, 10, 3)
}
grid_search_rf = GridSearchCV(RandomForestClassifier(random_state=SEED_VALUE), param_grid_rf, cv=10, n_jobs=-1)
grid_search_rf.fit(X_train, y_train)
print("Best RF Parameters:", grid_search_rf.best_params_)
rf_cv_model = grid_search_rf.best_estimator_
rf_cv_predictions = rf_cv_model.predict(X_test)
print("RF Cross-Validation Confusion Matrix:")
print(confusion_matrix(y_test, rf_cv_predictions))
print("RF Cross-Validation Accuracy Score:", accuracy_score(y_test, rf_cv_predictions))



RF Hold-out Confusion Matrix:
[[47  0  0  0]
 [ 0 26 22  1]
 [ 3 11 27  1]
 [ 0  0  1 31]]
RF Hold-out Accuracy Score: 0.7705882352941177



Best RF Parameters: {'max_features': 8, 'n_estimators': 200}
RF Cross-Validation Confusion Matrix:
[[47  0  0  0]
 [ 1 28 19  1]
 [ 3 14 24  1]
 [ 0  0  1 31]]
RF Cross-Validation Accuracy Score: 0.7647058823529411


### TESTE DE NOVOS CASOS COM O MELHOR MODELO: RF - HOLD OUT

In [47]:
vehicles_new_case = pd.read_csv("data/6 - Veiculos - Novos_Dados.csv")
vehicles_new_case_predictions = rf_model.predict(vehicles_new_case.drop(columns='tipo'))
vehicles_new_case['tipo'] = vehicles_new_case_predictions
print(vehicles_new_case)

   Comp  Circ  DCirc  RadRa  PrAxisRa  MaxLRa  ScatRa  Elong  PrAxisRect  \
0    84    37     53    121        59       5     123     55          17   
1   105    55     96    181        56       9     219     30          25   
2    94    44     70    186        72       8     153     42          19   
3   105    51     93    160        51       7     217     30          24   

   MaxLRect  ScVarMaxis  ScVarmaxis  RaGyr  SkewMaxis  Skewmaxis  Kurtmaxis  \
0       125         141         221    133         82          7          1   
1       175         231         713    216         74          4          5   
2       144         171         361    178         67          7          2   
3       165         240         703    208         81          9         25   

   KurtMaxis  HollRa  tipo  
0        179     183   van  
1        187     194  opel  
2        199     206   bus  
3        188     188   bus  
