# Rebalancing Methods

This method learns $P(y|x)$ from PU data by incorporating the labeling mechanism.

In [None]:
import numpy as np
import pandas as pd

### Load the dataset

In [None]:
from data import load_scar, load_sar, load_pg

train, valid, test, c = load_scar()

### Create a new dataset

- Create a new dataset by regarding labeled and unlabeled data as positive and negative data, respectively.
- Assign a weight to each training example in the new dataset as follows:
  - Assign $w^+ = \frac{\tau}{c\tau}$ to positive data.
  - Assign $w^- = 1 - \frac{1 - \tau}{1 - c\tau}$ to negative data.

In [None]:
train_xs, train_ys, train_ss, train_es = train

new_train_xs = train_xs[:]
new_train_ys = train_ss[:]

tau = 0.5
class_weight = {
    0: 1 - (1 - tau) / (1 - c * tau),
    1: tau / (c * tau),
}

### Learn a classifier

In [None]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0, class_weight=class_weight).fit(new_train_xs, new_train_ys)

### Predict

In [None]:
test_xs, test_ys, test_ss, test_es = test

test_ys_hat = clf.predict(test_xs)
test_ys_prob = clf.predict_proba(test_xs)[:, 1]

### Evaluate the performance

In [None]:
from sklearn.metrics import f1_score

f1_score(test_ys, test_ys_hat)

### Visualize the result

In [None]:
from utils import plot_x_y, plot_x_y_proba

In [None]:
plot_x_y(test_xs, test_ys_hat)