In [14]:
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

# Definir o caminho relativo para o arquivo de dados
file_path = 'sample_data/mammographic_masses.data.txt'

# Carregar e limpar os dados
col_names = ['BI_RADS', 'age', 'shape', 'margin', 'density', 'severity']
df = pd.read_csv(file_path, names=col_names, header=None, na_values='?')
df.dropna(inplace=True)

# Definir recursos e classes
features = ['age', 'shape', 'margin', 'density']
target = 'severity'

# Dividir os dados em conjuntos de treino e teste
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.25, random_state=0)

# Escalar os dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Inicializar variáveis para armazenar a melhor acurácia e o melhor kernel
best_accuracy = 0
best_kernel = None

# Treinar e avaliar o classificador SVM para diferentes kernels
kernels = ['linear', 'rbf', 'sigmoid', 'poly']
for kernel in kernels:
    clf = svm.SVC(kernel=kernel, C=1)
    clf.fit(X_train_scaled, y_train)
    accuracy = clf.score(X_test_scaled, y_test)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_kernel = kernel

print(f'Maximum accuracy: {best_accuracy:.2f} with kernel={best_kernel}')


Maximum accuracy: 0.78 with kernel=linear
