### Speed Up Comparison

In [1]:
from tqdm.notebook import trange, tqdm
import multiprocessing, requests, sys, time, itertools, dill, random, os, pickle, copy
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics, svm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml

from skrebate import ReliefF
from ucimlrepo import fetch_ucirepo
from sklearn.datasets import fetch_openml

### Definindo comparação

In [2]:
def test_speed(n_tests, n_features, X, y, CLASSIFIER):
    start = time.time()
    f1 = []
    for i in trange(n_tests):
        skf = StratifiedKFold(n_splits=5, shuffle=True) # Kfolding usado para separar em treino e teste
        clf = CLASSIFIER  # treino usando modelo SVM
        f1.append(cross_val_score(clf, X[:,:40], y, cv=skf, scoring='f1_macro')) # Computar f1
    return np.array(f1).mean(), time.time() - start

### Modelos

In [3]:
model_linear_svc = make_pipeline(
    StandardScaler(),
    svm.LinearSVC(
        dual=False,         # Modo primal (n_samples > n_features)
        penalty='l2',       # Regularização L2
        tol=1e-3,           # Tolerância mais alta
        max_iter=2000       # Máximo de iterações
    )) ### AQUI O DEEPSEEK SUGERIU PRA MIM

model_dt = make_pipeline(
    StandardScaler(),
    DecisionTreeClassifier(max_depth=6))

model_svc_kernel = make_pipeline(
    StandardScaler(),
    svm.SVC(kernel='linear'))

### DataSet 20
62 Samples

In [4]:
colon = fetch_openml(data_id=45087, parser='auto')
X = np.array(colon.data)
y = np.ravel(np.array(colon.target))

In [5]:
n_tests = 100
n_features = 40
test = test_speed(n_tests, n_features, X, y, model_dt)
print('Decision Tree - f1_score:', test[0], 'time:', test[1])
test = test_speed(n_tests, n_features, X, y, model_linear_svc)
print('SVM LinarSVC - f1_score:', test[0], 'time:', test[1])
test = test_speed(n_tests, n_features, X, y, model_svc_kernel)
print('SVC Kernel Linear - f1_score:', test[0], 'time:', test[1])

  0%|          | 0/100 [00:00<?, ?it/s]

Decision Tree - f1_score: 0.7504332405382715 time: 4.684973239898682


  0%|          | 0/100 [00:00<?, ?it/s]

SVM LinarSVC - f1_score: 0.7488127131726203 time: 4.633829832077026


  0%|          | 0/100 [00:00<?, ?it/s]

SVC Kernel Linear - f1_score: 0.7496683927183927 time: 4.31135630607605


### Dataset 17
856 Samples

In [6]:
cnae = fetch_openml(data_id=1468, parser='auto')
X = np.array(cnae.data)
y = np.ravel(np.array(cnae.target))

In [7]:
n_tests = 100
n_features = 40
test = test_speed(n_tests, n_features, X, y, model_dt)
print('Decision Tree - f1_score:', test[0], 'time:', test[1])
test = test_speed(n_tests, n_features, X, y, model_linear_svc)
print('SVM LinarSVC - f1_score:', test[0], 'time:', test[1])
test = test_speed(n_tests, n_features, X, y, model_svc_kernel)
print('SVC Kernel Linear - f1_score:', test[0], 'time:', test[1])

  0%|          | 0/100 [00:00<?, ?it/s]

Decision Tree - f1_score: 0.13997875232650953 time: 4.559659719467163


  0%|          | 0/100 [00:00<?, ?it/s]

SVM LinarSVC - f1_score: 0.17801757175767563 time: 45.48831272125244


  0%|          | 0/100 [00:00<?, ?it/s]

SVC Kernel Linear - f1_score: 0.1709018089392465 time: 44.51150131225586
