In [2]:
#Sobreamostragem ou oversampling
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler, SMOTE
from collections import Counter
from sklearn import svm
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)

In [3]:
X, y = make_classification(n_samples = 1000, weights=[0.9, 0.1], random_state = 42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [4]:
#treinando com o conjunto de treino original (desbalanceado)
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train,y_train)
y_test_predictions = svm_model.predict(X_test)
print('Dados originais / Desbalanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")

print("_____________________________________")


#Random over sampling
random_over = RandomOverSampler(random_state = 42)
X_train_over, y_train_over = random_over.fit_resample(X_train, y_train) #aplicar apenas no treino

print(Counter(y_train), Counter(y_train_over))

#treinando com o conjunto de treino balanceado
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train_over,y_train_over)
y_test_predictions = svm_model.predict(X_test)


print('Random over sampler / dados balanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")

Dados originais / Desbalanceados:
Acuracia = 0.885
Precision = 0.38461538461538464
Recall = 0.25
F1 Score = 0.30303030303030304
_____________________________________
Counter({0: 717, 1: 83}) Counter({0: 717, 1: 717})
Random over sampler / dados balanceados:
Acuracia = 0.845
Precision = 0.36585365853658536
Recall = 0.75
F1 Score = 0.4918032786885247


In [5]:
#SMOTE
#treinando com o conjunto de treino original (desbalanceado)
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train,y_train)
y_test_predictions = svm_model.predict(X_test)
print('Dados originais / Desbalanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")

print("_____________________________________")


#SMOTE
smote = SMOTE(random_state = 42)
X_train_over, y_train_over = smote.fit_resample(X_train, y_train) #aplicar apenas no treino

print(Counter(y_train), Counter(y_train_over))

#treinando com o conjunto de treino balanceado com SMOTE
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train_over,y_train_over)
y_test_predictions = svm_model.predict(X_test)


print('SMOTE / dados balanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")

Dados originais / Desbalanceados:
Acuracia = 0.885
Precision = 0.38461538461538464
Recall = 0.25
F1 Score = 0.30303030303030304
_____________________________________
Counter({0: 717, 1: 83}) Counter({0: 717, 1: 717})
SMOTE / dados balanceados:
Acuracia = 0.845
Precision = 0.35135135135135137
Recall = 0.65
F1 Score = 0.456140350877193


In [6]:
#Subamostragem ou undersampling
from imblearn.under_sampling import RandomUnderSampler, TomekLinks


#treinando com o conjunto de treino original (desbalanceado)
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train,y_train)
y_test_predictions = svm_model.predict(X_test)
print('Dados originais / Desbalanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")
print("_____________________________________")


#Random under sampler
random_under = RandomUnderSampler(random_state = 42)
X_train_under, y_train_under = random_under.fit_resample(X_train, y_train)


print(Counter(y_train), Counter(y_train_under))

#treinando com o conjunto de treino balanceado com RandomUnderSampler
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train_under,y_train_under)
y_test_predictions = svm_model.predict(X_test)

print('Random under sampler / dados balanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")


Dados originais / Desbalanceados:
Acuracia = 0.885
Precision = 0.38461538461538464
Recall = 0.25
F1 Score = 0.30303030303030304
_____________________________________
Counter({0: 717, 1: 83}) Counter({0: 83, 1: 83})
Random under sampler / dados balanceados:
Acuracia = 0.85
Precision = 0.375
Recall = 0.75
F1 Score = 0.5


In [7]:
#Tomek links
#treinando com o conjunto de treino original (desbalanceado)
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train,y_train)
y_test_predictions = svm_model.predict(X_test)
print('Dados originais / Desbalanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")
print("_____________________________________")


#TomekLinks
tl = TomekLinks()
X_train_tl, y_train_tl = tl.fit_resample(X_train, y_train)


print(Counter(y_train), Counter(y_train_under))

#treinando com o conjunto de treino balanceado com TomekLinks
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train_tl,y_train_tl)
y_test_predictions = svm_model.predict(X_test)


print('Tomek Links / dados balanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")


Dados originais / Desbalanceados:
Acuracia = 0.885
Precision = 0.38461538461538464
Recall = 0.25
F1 Score = 0.30303030303030304
_____________________________________
Counter({0: 717, 1: 83}) Counter({0: 83, 1: 83})
Tomek Links / dados balanceados:
Acuracia = 0.895
Precision = 0.45454545454545453
Recall = 0.25
F1 Score = 0.3225806451612903


In [72]:
#Combinação 
from imblearn.combine import SMOTETomek
#treinando com o conjunto de treino original (desbalanceado)
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train,y_train)
y_test_predictions = svm_model.predict(X_test)
print('Dados originais / Desbalanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")
print("_____________________________________")


#Random under sampler
smote_tl = SMOTETomek(random_state = 42)
X_train_cb, y_train_cb = smote_tl.fit_resample(X_train, y_train)

print(Counter(y_train), Counter(y_train_cb))

#treinando com o conjunto de treino balanceado com SMOTE
svm_model = svm.SVC(kernel ='sigmoid').fit(X_train_cb,y_train_cb)
y_test_predictions = svm_model.predict(X_test)


print('Tomek Links / dados balanceados:')
acuracia = accuracy_score(y_test, y_test_predictions)
precision = precision_score(y_test, y_test_predictions)
recall = recall_score(y_test, y_test_predictions)
f1score = f1_score(y_test, y_test_predictions)

print(f"Acuracia = {acuracia}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 Score = {f1score}")

Dados originais / Desbalanceados:
Acuracia = 0.885
Precision = 0.38461538461538464
Recall = 0.25
F1 Score = 0.30303030303030304
_____________________________________
Counter({0: 717, 1: 83}) Counter({0: 716, 1: 716})
Tomek Links / dados balanceados:
Acuracia = 0.845
Precision = 0.35135135135135137
Recall = 0.65
F1 Score = 0.456140350877193
