In [2]:
import pool_alice_code
import numpy as np
from typing import List, Tuple

## exploring how to calculate U_hat efficieintly

In [3]:
dummy_x_unlabeled = np.random.standard_normal(size=(5,3))
dummy_x_unlabeled

array([[-0.21114576,  1.2127227 ,  0.14841182],
       [ 1.42749795,  1.18437981,  0.62335293],
       [ 0.43484084,  0.76926533,  1.01784891],
       [ 1.25864123,  0.32134642, -0.65060902],
       [-0.19197823,  0.2527082 , -0.25559072]])

In [4]:
dummy_basis_funcs = [np.square,np.sin,np.exp]
ϕ_mtx: np.ndarray = np.empty(shape=dummy_x_unlabeled.shape)
t = len(dummy_x_unlabeled[0])

for l in range(0,t):
    ϕ_mtx[:,l] = dummy_basis_funcs[l](dummy_x_unlabeled[:,l])

In [5]:
ϕ_mtx

array([[0.04458253, 0.9365737 , 1.1599905 ],
       [2.03775038, 0.92626552, 1.86517135],
       [0.18908655, 0.69560762, 2.76723579],
       [1.58417775, 0.31584435, 0.52172794],
       [0.03685564, 0.25002706, 0.77445888]])

In [6]:
comparison=\
f"""
dummy_basis_funcs[0](dummy_x_unlabeled[:,0]) = {dummy_basis_funcs[0](dummy_x_unlabeled[:,0])}
dummy_basis_funcs[1](dummy_x_unlabeled[:,1]) = {dummy_basis_funcs[1](dummy_x_unlabeled[:,1])}
dummy_basis_funcs[2](dummy_x_unlabeled[:,2]) = {dummy_basis_funcs[2](dummy_x_unlabeled[:,2])}
"""
print(comparison)


dummy_basis_funcs[0](dummy_x_unlabeled[:,0]) = [0.04458253 2.03775038 0.18908655 1.58417775 0.03685564]
dummy_basis_funcs[1](dummy_x_unlabeled[:,1]) = [0.9365737  0.92626552 0.69560762 0.31584435 0.25002706]
dummy_basis_funcs[2](dummy_x_unlabeled[:,2]) = [1.1599905  1.86517135 2.76723579 0.52172794 0.77445888]



##  compututing probabilities from b_lambda

In [7]:
# compututing probabilities from b_lambda

dummy_b_lambda_set = np.array([11,21,16])

probs = dummy_b_lambda_set/np.sum(dummy_b_lambda_set)

print(f"probs={probs}\nsum to 1?: {(np.sum(probs),np.sum(probs)==1)}")

probs=[0.22916667 0.4375     0.33333333]
sum to 1?: (1.0, True)


In [8]:
try:
    np.random.choice(a=[1,2,3,4,5],size=2,replace=False,p=[-1,1,-1,1,1])
except:
    print("Gives: ValueError: probabilities are not non-negative")
# Great! this helps confirm that my b_set is a valid set of numbers using the fully matrix method

Gives: ValueError: probabilities are not non-negative


# P-ALICE implementation (imported code from pool_alice_code.py)

## Load data

In [9]:
import os
os.getcwd()

'/home/ethan/02750-automation/automation_final_project/P-ALICE'

In [10]:
X_unlabeled = np.load('../data/abalone_age/X.npy')
y_unlabeled = np.load('../data/abalone_age/y.npy')

## make some basis functions

In [11]:
num_features = len(X_unlabeled[0])
basis_funcs_v1: List[np.ufunc] = [pool_alice_code.identity_basis for _ in range(num_features)] # type: ignore
basis_funcs_v1

[<function pool_alice_code.identity_basis(x: numpy.ndarray)>,
 <function pool_alice_code.identity_basis(x: numpy.ndarray)>,
 <function pool_alice_code.identity_basis(x: numpy.ndarray)>,
 <function pool_alice_code.identity_basis(x: numpy.ndarray)>,
 <function pool_alice_code.identity_basis(x: numpy.ndarray)>,
 <function pool_alice_code.identity_basis(x: numpy.ndarray)>,
 <function pool_alice_code.identity_basis(x: numpy.ndarray)>,
 <function pool_alice_code.identity_basis(x: numpy.ndarray)>]

In [12]:
# perform simple orinary least squares
# def pool_alice(X_unlabeled: np.ndarray, oracle: np.ndarray, basis_funcs: List[np.ufunc], n_tr: int, prng_seed:int)->np.ndarray:
prng_seed_v1 = 1001
n_tr=int(len(X_unlabeled) * 0.30)
weights_v1 = pool_alice_code.pool_alice(X_unlabeled=X_unlabeled, oracle=y_unlabeled,basis_funcs=basis_funcs_v1,n_tr=n_tr,prng_seed=prng_seed_v1)

λ_val=0.0
λ_val=0.1
λ_val=0.2
λ_val=0.30000000000000004
λ_val=0.4
λ_val=0.5
λ_val=0.6000000000000001
λ_val=0.7000000000000001
λ_val=0.8
λ_val=0.9
λ_val=1.0
λ_val=0.4
λ_val=0.41000000000000003
λ_val=0.42000000000000004
λ_val=0.43000000000000005
λ_val=0.44000000000000006
λ_val=0.45000000000000007
λ_val=0.4600000000000001
λ_val=0.4700000000000001
λ_val=0.4800000000000001
λ_val=0.4900000000000001
λ_val=0.5000000000000001
λ_val=0.5100000000000001
λ_val=0.5200000000000001
λ_val=0.5300000000000001
λ_val=0.5400000000000001
λ_val=0.5500000000000002
λ_val=0.5600000000000002
λ_val=0.5700000000000002
λ_val=0.5800000000000002
λ_val=0.5900000000000002
λ_val=0.6000000000000002
max_λ_val=1.0


In [13]:
weights_v1

array([ -0.17899819,   3.60659815,  10.02759623,  40.03522819,
        10.40095623, -21.92212008, -16.62536434,   3.26951855])

In [14]:
weights_v1.shape

(8,)

In [16]:
y_pred = X_unlabeled @ weights_v1

In [17]:
y_pred

array([ 8.34026299,  7.1094937 , 11.10137846, ..., 12.07295876,
        9.31555228, 10.83073077])

In [18]:
y_pred.shape

(4177,)

In [19]:
X_unlabeled.shape

(4177, 8)