In [None]:
# ===== Pacotes / Packages =====
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, classification_report
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# ===== 1. Ler os dados .XLSX / Read .XLSX data =====
dados = pd.read_excel("NAME.xlsx")
print(dados.head())

In [None]:
# === Ler os dados .CSV / Read .CSV data ===
dados = pd.read_csv('NAME.csv', sep=',')  # especifica o separador
print(dados.head())

In [None]:
# ===== 2. Garantir que preditoras sejam numéricas / Ensure predictors are numeric =====
X = dados.drop(dados.columns[0], axis=1).apply(pd.to_numeric)
y = dados[dados.columns[0]].values.ravel()  # Classe na primeira coluna / Class in first column

# ===== 3. Padronizar os dados / Standardize data =====
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ===== Exclui NaN / Excludes NaN =====
dados_cleaned = dados.dropna()

In [None]:
# ===== 4. Divisão treino/teste / Training/testing division =====
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=1234, stratify=y
)

print("Treino:", X_train.shape, y_train.shape)
print("Teste:", X_test.shape, y_test.shape)

Treino: (6723, 9) (6723,)
Teste: (1681, 9) (1681,)


In [None]:
# ===== 5. Treinar SVM / Train SVM =====
modelo_svm = SVC(kernel="linear", probability=True, random_state=1234) #testar com outros kernel = poly, rbf e sigmoid.
modelo_svm.fit(X_train, y_train)

In [None]:
# ===== 6. Predição / Prediction =====
y_pred_train = modelo_svm.predict(X_train)
y_pred_test = modelo_svm.predict(X_test)

print("Acurácia Treino (Training Accuracy):", accuracy_score(y_train, y_pred_train))
print("Acurácia Teste (Test Accuracy):", accuracy_score(y_test, y_pred_test))

In [None]:
# ===== 7. Métricas / Metrics =====
classes = sorted(np.unique(y))

# Matriz de confusão / Confusion matrix
conf_mat = confusion_matrix(y_test, y_pred_test, labels=classes)
cm_df = pd.DataFrame(conf_mat, index=classes, columns=classes)

# Visualização / Preview
plt.figure(figsize=(6,5))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predict")
plt.ylabel("True")
plt.title("Confusion Matrix - SVM")
plt.show()

# Classification report
print("\nClassification Report Teste:\n")
print(classification_report(y_test, y_pred_test, target_names=[str(c) for c in classes]))

In [None]:
# ===== 8. Validação Cruzada com K-Fold / K-Fold Cross-Validation =====
from sklearn.model_selection import cross_val_score

# Validar o modelo SVM com validação cruzada (K=5 por padrão) / Validate the SVM model with cross-validation (K=5 by default)
cv_scores = cross_val_score(modelo_svm, X_scaled, y, cv=5)

print("Acurácias por Fold (Accuracies per Fold):", cv_scores)
print("Acurácia Média (K-Fold) (Average Accuracy (K-Fold)):", np.mean(cv_scores))
print("Desvio Padrão da Acurácia (K-Fold) (Standard Deviation of Accuracy (K-Fold)):", np.std(cv_scores))

In [None]:
# ===== 9. Feature Importance (para kernel linear) =====

# Extrair os coeficientes do modelo (para kernel linear) / Extract model coefficients (for linear kernel)
# Os coeficientes indicam a importância de cada feature na decisão de fronteira / The coefficients indicate the importance of each feature in the frontier decision
coeficientes = modelo_svm.coef_[0]  # Pegando os coeficientes para a primeira classe (ajustar se necessário) / Getting the coefficients for the first class (adjust if necessary)

# Obter os nomes das features / Get feature names
nomes_features = X.columns

# Criar um DataFrame para visualizar coeficientes e nomes das features / Create a DataFrame to visualize coefficients and feature names
feature_importance = pd.DataFrame({
    'feature': nomes_features,
    'importance': np.abs(coeficientes) # Usar valor absoluto para importância / Use absolute value for importance
})

# Ordenar as features por importância / Sort features by importance
feature_importance = feature_importance.sort_values('importance', ascending=False)

# Visualizar as top N features (ex: top 10) / View the top N features (e.g. top 10)
plt.figure(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance.head(20), palette='viridis')
plt.title('Top 10 Features by Importance (Linear SVM Coefficients)')
plt.xlabel('Importance (Absolute Coefficient Value)')
plt.ylabel('Feature')
plt.show()