In [1]:
%load_ext autoreload
%autoreload 2
import os, sys

sys.path.append(
    os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))
)
import numpy as np
import pandas as pd
from ppi_py.datasets import load_dataset
from ppi_py import ppi_logistic_ci, eff_ppi_logistic_ci
from tqdm import tqdm
from scipy.optimize import brentq
from scipy.special import expit
from baseline_utils import *
from tqdm import tqdm

In [8]:
alphas = np.array([0.2, 0.1, 0.05])
n = 1000
N = 10000
d = 2
num_trials = 100
eff_includeds = np.zeros((num_trials, len(alphas)))
eff_sizes = np.zeros((num_trials, len(alphas), d))
includeds = np.zeros((num_trials, len(alphas)))
sizes = np.zeros((num_trials, len(alphas), d))
for i in tqdm(range(num_trials)):
    # Make a synthetic regression problem
    X = np.random.randn(n, d)
    beta = 5*np.random.randn(d)
    beta_prediction = beta + np.random.randn(d) + 2#+ np.random.randn(d) + 2
    Y = np.random.binomial(1, expit(X.dot(beta)))
    Yhat = expit(X.dot(beta_prediction))
    # Make a synthetic unlabeled data set with predictions Yhat
    X_unlabeled = np.random.randn(N, d)
    Yhat_unlabeled = expit(X_unlabeled.dot(beta_prediction))
    # Compute the confidence interval
    for j in range(len(alphas)):
        eff_ppi_ci = eff_ppi_logistic_ci(
            X, Y, Yhat, X_unlabeled, Yhat_unlabeled, alpha=alphas[j], grad_tol=1e-1
        )
        ppi_ci = ppi_logistic_ci(
            X, Y, Yhat, X_unlabeled, Yhat_unlabeled, alpha=alphas[j], grad_tol=1e-1, grid_size=2000,
        )
        eff_sizes[i,j,:] = np.array([ eff_ppi_ci[1][_d] - eff_ppi_ci[0][_d] for _d in range(d) ])
        sizes[i,j,:] = np.array([ ppi_ci[1][_d] - ppi_ci[0][_d] for _d in range(d) ])
        # Check that the confidence interval contains the true beta
        eff_includeds[i,j] = int(
            (eff_ppi_ci[0][0] <= beta[0]) & (beta[0] <= eff_ppi_ci[1][0])
        )
        includeds[i,j] = int(
            (ppi_ci[0][0] <= beta[0]) & (beta[0] <= ppi_ci[1][0])
        )

100%|██████████████████████████████████████████████████████████████████████████████████| 100/100 [02:35<00:00,  1.55s/it]


In [9]:
print(sizes.mean(axis=0))
print()
print(eff_sizes.mean(axis=0))
print()
print(includeds.mean(axis=0))
print()
print(eff_includeds.mean(axis=0))

[[2.05925926 2.24      ]
 [2.30666667 2.48      ]
 [2.44       2.62074074]]

[[1.25418676 1.43103425]
 [1.60973128 1.83671258]
 [1.91811312 2.18857802]]

[0.96 0.98 0.98]

[0.92 0.94 0.97]


In [77]:
print(includeds)

[[1. 1. 0.]
 [0. 0. 0.]
 [1. 1. 1.]
 [1. 1. 1.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 1.]
 [1. 1. 1.]
 [0. 0. 0.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [0. 0. 0.]
 [0. 0. 0.]
 [1. 1. 1.]
 [0. 0. 0.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [0. 0. 0.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [0. 0. 0.]
 [0. 1. 1.]
 [0. 0. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [0. 0. 1.]
 [1. 1. 1.]
 [0. 0. 0.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
