# Postprocessing

Post-processing calculates $P(y|x)$ by scaling $P(s|x)$ by $\frac{1}{c}$.
Note that this method is applicable only when the SCAR assumption holds.

In [None]:
import numpy as np
import pandas as pd

### Load the dataset

In [None]:
from data import load_scar, load_sar, load_pg

train, valid, test, c = load_scar()

### Learn a non-traditional classifier $P(s|x)$

In [None]:
from sklearn.linear_model import LogisticRegression

train_xs, train_ys, train_ss, train_es = train

clf = LogisticRegression(random_state=0).fit(train_xs, train_ss)

### Predict

If the SCAR assumption holds, $P(y|x)$ is calculated by scaling $P(s|x)$ by $\frac{1}{c}$.

In [None]:
test_xs, test_ys, test_ss, test_es = test

test_ss_prob = clf.predict_proba(test_xs)[:, 1]

test_ys_prob = np.minimum(test_ss_prob / c, 1.0)
test_ys_hat = (test_ys_prob > 0.5).astype(np.int32)

### Evaluate the performane

In [None]:
from sklearn.metrics import f1_score

f1_score(test_ys, test_ys_hat)

### Visualize the result

In [None]:
from utils import plot_x_y, plot_x_y_proba

In [None]:
plot_x_y(test_xs, test_ys_hat)