# Biased SVM

Biased SVMs are a variant of the standard SVM method that penalize misclassified positive and negative examples differently.

In [1]:
import numpy as np
import pandas as pd

### Load the SCAR dataset

In [2]:
from sar import load_sar

train, valid, test = load_sar()

### Learn a biased SVM

A biased SVM is trained regarding unlabeled data as negative. A biased SVM penalizes misclassified positive and negative examples differently. The best parameter is found according to $F'_1$ on the validation set.

In [None]:
from sklearn.svm import SVC
from utils import f1_prime

train_x, _, train_s = train
valid_x, _, valid_s = valid

best_f1_prime = -1.0
best_clf = None
for class_weight_p in np.arange(0.5, 1.0, 0.05):
    class_weight = {
        0: 1.0 - class_weight_p,
        1: class_weight_p,
    }

    clf = SVC(class_weight=class_weight, random_state=0, probability=True).fit(train_x, train_s)

    valid_s_hat = clf.predict(valid_x)

    f1_prime_ = f1_prime(valid_s, valid_s_hat)
    if f1_prime_ > best_f1_prime:
        print(f"The best classifier is updated: class weight is {class_weight}.")
        best_f1_prime = f1_prime_
        best_clf = clf

The best classifier is updated: class weight is {0: 0.5, 1: 0.5}.
The best classifier is updated: class weight is {0: 0.44999999999999996, 1: 0.55}.
The best classifier is updated: class weight is {0: 0.3999999999999999, 1: 0.6000000000000001}.
The best classifier is updated: class weight is {0: 0.34999999999999987, 1: 0.6500000000000001}.
The best classifier is updated: class weight is {0: 0.2999999999999998, 1: 0.7000000000000002}.
The best classifier is updated: class weight is {0: 0.24999999999999978, 1: 0.7500000000000002}.
The best classifier is updated: class weight is {0: 0.19999999999999973, 1: 0.8000000000000003}.


### Predict

In [None]:
test_x, test_y, _ = test

test_y_hat = clf.predict(test_x)
test_y_prob_hat = clf.predict_proba(test_x)[:, 1]

### Evaluate the performance

In [None]:
from sklearn.metrics import f1_score

f1_score(test_y, test_y_hat)

### Visualize the result

In [None]:
from utils import plot_x_y, plot_x_y_proba

In [None]:
plot_x_y(test_x, test_y)

In [None]:
plot_x_y(test_x, test_y_hat)

In [None]:
plot_x_y_proba(test_x, test_y_prob_hat)