### Preparação dos dados

In [1]:
import sys
import os

# Adiciona o diretório raiz ao sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from src.utils import accuracy


# 1. Carregar o Dataset
print("Carregando dataset...")
mnist = fetch_openml("mnist_784", version=1)
x, y = mnist.data, mnist.target

# 2. Filtrar para Apenas Duas Classes (ex.: dígitos 0 e 1)
print("Filtrando dataset...")
classes = ['3', '8']
filter_idx = np.isin(y, classes)
x, y = x[filter_idx], y[filter_idx]
y = np.where(y == '3', -1, 1)  # Converte para -1 e 1

# 3. Normalizar os Dados
print("Normalizando dataset...")
scaler = MinMaxScaler()
x = scaler.fit_transform(x)

# 4. Dividir em Conjunto de Treinamento e Teste
print("Dividindo o dataset...")
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Carregando dataset...
Filtrando dataset...
Normalizando dataset...
Dividindo o dataset...


### Usando scikit-learn

In [4]:
from sklearn import svm

clf = svm.SVC(C=1, kernel='linear')
clf.fit(x_train, y_train)
predict = clf.predict(x_test)

acc = accuracy(y_test, predict)

print(f"Accuracy of {acc}")

Accuracy of 0.9717251252684324


### Usando minha propria implementação do SVM

In [3]:
from src.svm import SVM

svm = SVM()
svm.fit(x_train, y_train)
y_pred = svm.predict(x_test)

acc = accuracy(y_test, y_pred)
print(f"Accuracy of {acc}")

Accuracy of 0.9674302075876879
