In [1]:
import os
import sys
sys.path.append("../")
from scipy.io import savemat, loadmat
from IPG.src.lossfunction import LogisticLoss
from IPG.src.regularizer_noidentification import NatOG as NatOGNO
from IPG.src.regularizer import NatOG
from IPG.src.solver import IpgSolver
import IPG.src.utils as utils
from IPG.src.params import *
import yaml
import numpy as np

In [2]:
def main(lam_shrink = 0.1, datasetName = 'a9a', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1):
    loss = 'logit'
    fileType = fileTypeDict[datasetName]
    print("Working on: {}...".format(datasetName))
    X, y = utils.set_up_xy(datasetName, fileType, dbDir)
    if loss == 'logit':
        f = LogisticLoss(X, y, datasetName)
    p = X.shape[1]
    grp_size = min(p // 2, grp_size)
    generator = utils.GenOverlapGroup(
        p, grp_size=grp_size, overlap_ratio=overlap_ratio)
    groups, starts, ends = generator.get_group()

    lammax_path = f'{dbDir}/lammax-{datasetName}-{grp_size}-{overlap_ratio}.mat'
    if os.path.exists(lammax_path):
        lammax = loadmat(lammax_path)["lammax"][0][0]
        print(f"loading lammax from: {lammax_path}")
    else:
        lammax = utils.lam_max(X, y, starts, ends, loss)
        savemat(lammax_path, {"lammax": lammax})
        print(f"save lammax to: {lammax_path}")

    r = NatOG(penalty=lammax * lam_shrink, groups=groups, weights=None)
    rno = NatOGNO(penalty=lammax * lam_shrink, groups=groups, weights=None)
    
    alpha_init = 1.0
    print(f"Lambda use:{lammax * lam_shrink} | overlap ratio:{overlap_ratio} | group size:{grp_size}")
    print("Inexact subprobsolve: yd")
    with open('../IPG/src/config.yaml', "r") as stream:
        config = yaml.load(stream, Loader=yaml.SafeLoader)
    config['mainsolver']['exact_pg_computation'] = False
    config['mainsolver']['inexact_pg_computation'] = 'yd'
    config['inexactpg']['yd']['gamma'] = 0.1    

    solver = IpgSolver(f, r, config)
    info_yd = solver.solve(alpha_init=alpha_init,  save_ckpt=False)
    print(f"Yd with id:    #nz:{info_yd['nz']:5d}/#nnz:{info_yd['nnz']:5d}/F:{info_yd['F']:.6e}")
    solver_no = IpgSolver(f, rno, config)
    info_yd_no = solver_no.solve(alpha_init=alpha_init,  save_ckpt=False)
    print(f"Yd Without id: #nz:{info_yd_no['nz']:5d}/#nnz:{info_yd_no['nnz']:5d}/F:{info_yd_no['F']:.6e}")
    result = {"lambda_shrink":{lam_shrink}, "lambda":lammax * lam_shrink, 
              "grp_size":grp_size, "overlap_ratio":overlap_ratio,
              "id": {"nz":info_yd['nz'], "nnz":info_yd['nnz'] , "F":info_yd['F']},
              "noid": {"nz":info_yd_no['nz'], "nnz":info_yd_no['nnz'] , "F":info_yd_no['F']},
             }
    np.save("projnoproj_ckpt/{}_info.npy".format(datasetName), result)
    return result

In [4]:
result = main(lam_shrink = 0.1, datasetName = 'a9a', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: a9a...
loading lammax from: ../../GroupFaRSA/db/lammax-a9a-10-0.1.mat
Lambda use:0.013458112455945873 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:   12/#nnz:    2/F:5.083373e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    0/#nnz:   14/F:5.083373e-01


In [7]:
result = main(lam_shrink = 0.1, datasetName = 'colon_cancer', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: colon_cancer...
loading lammax from: ../../GroupFaRSA/db/lammax-colon_cancer-10-0.1.mat
Lambda use:0.01775094387544969 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:  213/#nnz:   10/F:3.362699e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    1/#nnz:  222/F:3.362699e-01


In [None]:
result = main(lam_shrink = 0.1, datasetName = 'duke', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

In [4]:
result = main(lam_shrink = 0.1, datasetName = 'w8a', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: w8a...
loading lammax from: ../../GroupFaRSA/db/lammax-w8a-10-0.1.mat
Lambda use:0.006686974129108611 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:   24/#nnz:   10/F:4.290289e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    0/#nnz:   34/F:4.290289e-01


In [5]:
np.load("projnoproj_ckpt/w8a_info.npy", allow_pickle=True).item()

{'lambda_shrink': {0.1},
 'lambda': 0.006686974129108611,
 'grp_size': 10,
 'overlap_ratio': 0.1,
 'id': {'nz': 24, 'nnz': 10, 'F': 0.4290289109838439},
 'noid': {'nz': 0, 'nnz': 34, 'F': 0.42902891100442325}}

In [None]:
# datasetName = 'w8a'
# lam_shrink = 0.1;  dbDir = '../../GroupFaRSA/db'; grp_size = 10; overlap_ratio = 0.1
# loss = 'logit'
# fileType = fileTypeDict[datasetName]
# print("Working on: {}...".format(datasetName))
# X, y = utils.set_up_xy(datasetName, fileType, dbDir)
# if loss == 'logit':
#     f = LogisticLoss(X, y, datasetName)
# p = X.shape[1]
# grp_size = min(p // 2, grp_size)
# generator = utils.GenOverlapGroup(
#     p, grp_size=grp_size, overlap_ratio=overlap_ratio)
# groups, starts, ends = generator.get_group()

# lammax_path = f'{dbDir}/lammax-{datasetName}-{grp_size}-{overlap_ratio}.mat'
# if os.path.exists(lammax_path):
#     lammax = loadmat(lammax_path)["lammax"][0][0]
#     print(f"loading lammax from: {lammax_path}")
# else:
#     lammax = utils.lam_max(X, y, starts, ends, loss)
#     savemat(lammax_path, {"lammax": lammax})
#     print(f"save lammax to: {lammax_path}")

# r = NatOG(penalty=lammax * lam_shrink, groups=groups, weights=None)
# rno = NatOGNO(penalty=lammax * lam_shrink, groups=groups, weights=None)

# alpha_init = 1.0

# print(f"Lambda use:{lammax * lam_shrink} | overlap ratio:{overlap_ratio} | group size:{grp_size}")

# print("Inexact subprobsolve: yd")
# with open('../IPG/src/config.yaml', "r") as stream:
#     config = yaml.load(stream, Loader=yaml.SafeLoader)
# config['mainsolver']['exact_pg_computation'] = False
# config['mainsolver']['inexact_pg_computation'] = 'yd'
# config['inexactpg']['yd']['gamma'] = 0.1    

# solver = IpgSolver(f, r, config)
# info_yd = solver.solve(alpha_init=alpha_init,  save_ckpt=False)

# solver_no = IpgSolver(f, rno, config)
# info_yd_no = solver_no.solve(alpha_init=alpha_init,  save_ckpt=False)

# print(f"Yd with id:    #nz:{info_yd['nz']:5d}/#nnz:{info_yd['nnz']:5d}/F:{info_yd['F']:.6e}\n   Without id: #nz:{info_yd_no['nz']:5d}/#nnz:{info_yd_no['nnz']:5d}/F:{info_yd_no['F']:.6e}")