# Biased SVM

Biased SVMs are a variant of the standard SVM method that penalize misclassified positive and negative examples differently.

In [None]:
import numpy as np
import pandas as pd

### Load the SCAR dataset

In [None]:
from data import load_scar, load_sar, load_pg

train, valid, test, c = load_scar()

### Learn a biased SVM

A biased SVM is trained regarding unlabeled data as negative. A biased SVM penalizes misclassified positive and negative examples differently. The best parameter is found according to $F'_1$ on the validation set.

In [None]:
from sklearn.svm import SVC
from utils import f1_prime

train_xs, train_ys, train_ss, train_es = train
valid_xs, valid_ys, valid_ss, valid_es = valid

best_f1_prime = -1.0
best_clf = None
for class_weight_pos in np.arange(0.5, 1.0, 0.05):
    class_weight = {
        0: 1.0 - class_weight_pos,
        1: class_weight_pos,
    }

    clf = SVC(class_weight=class_weight, random_state=0, probability=True).fit(train_xs, train_ss)

    valid_ss_hat = clf.predict(valid_xs)

    f1_prime_ = f1_prime(valid_ss, valid_ss_hat)
    if f1_prime_ > best_f1_prime:
        print(f"The best classifier is updated: class weight is {class_weight}.")
        best_f1_prime = f1_prime_
        best_clf = clf

### Predict

In [None]:
test_xs, test_ys, test_ss, test_es = test

test_ys_hat = clf.predict(test_xs)
test_ys_prob = clf.predict_proba(test_xs)[:, 1]

### Evaluate the performance

In [None]:
from sklearn.metrics import f1_score

f1_score(test_ys, test_ys_hat)

### Visualize the result

In [None]:
from utils import plot_x_y, plot_x_y_proba

In [None]:
plot_x_y(test_xs, test_ys)

In [None]:
plot_x_y(test_xs, test_ys_hat)

In [None]:
plot_x_y_proba(test_xs, test_ys_prob)