# Example usage

To use `okridge` in a project:

## Import Necessary packages

In [2]:
import os
import numpy as np
from okridge.tree import BNBTree
from okridge.utils import download_file_from_google_drive
from pathlib import Path

## Download Sample Synthetic Data

In [4]:
data_file_path = "../tests/Synthetic_n=6000_p=3000_k=10_rho=0.5_snr=5.0_seed=0.npy"

if not os.path.isfile(data_file_path):
    download_file_from_google_drive('1lizlnufRBmEzMNpr0OlgE-P7otC8opkX', data_file_path)

loaded_data = np.load(data_file_path, allow_pickle=True)
X, y = loaded_data.item().get("X"), loaded_data.item().get("y")

print("Shape of feature matrix is", X.shape)
print("There are {} number of samples".format(len(y)))

Shape of feature matrix is (6000, 3000)
There are 6000 number of samples


## Apply OKRidge Software

In [8]:
k = 10 # cardinality constraint
lambda2 = 0.1 # l2 regularization parameter
gap_tol = 1e-4 # optimality gap tolerance
verbose = True # print out the progress
time_limit = 180 # time limit in seconds

BnB_optimizer = BNBTree(X=X, y=y, lambda2=lambda2)

upper_bound, betas, optimality_gap, max_lower_bound, running_time = BnB_optimizer.solve(k = k, gap_tol = gap_tol, verbose = verbose, time_limit = time_limit)

we take lambda to be 513.4124044068881
max_memory_GB is larger than available memory. Using all available memory (2.2282333374023438 GB) in the machine
max number of saved solutions is 2226571
initializing took 0.002254009246826172 seconds
0 levels of depth used
there are 0 nodes left in the bfs queue
l: 0, (d: -659444.6478942618, u: -657798.0645024217, g: 0.002503174576966306, t: 15.029995679855347 s
there are 0 nodes left in the bfs queue
l: 1, (d: -659387.8270570671, u: -657798.0645024217, g: 0.0024167942115305044, t: 26.396912336349487 s
there are 0 nodes left in the bfs queue
l: 2, (d: -659317.8396916108, u: -657798.0645024217, g: 0.0023103977819374216, t: 39.599390268325806 s
there are 0 nodes left in the bfs queue
l: 3, (d: -659250.2763933846, u: -657798.0645024217, g: 0.002207686476033345, t: 51.607298135757446 s
there are 0 nodes left in the bfs queue
l: 4, (d: -659169.0430832554, u: -657798.0645024217, g: 0.002084193698366704, t: 64.0999104976654 s
there are 0 nodes left in t

In [11]:
print("Loss of best solution is", upper_bound)
print("indices of nonzero coefficients are", np.where(betas != 0)[0])
print("Optimality gap is {}%".format(optimality_gap * 100))
print("Running time is {} seconds".format(running_time))

Loss of best solution is -657798.0645024217
indices of nonzero coefficients are [   0  300  600  900 1200 1500 1800 2100 2400 2700]
Optimality gap is 0.0%
Running time is 127.77510619163513 seconds
