In [14]:
import os
import sys
sys.path.append("../")
from scipy.io import savemat, loadmat
from IPG.src.lossfunction import LogisticLoss
from IPG.src.regularizer_noidentification import NatOG as NatOGNO
from IPG.src.regularizer import NatOG
from IPG.src.solver import IpgSolver
import IPG.src.utils as utils
from IPG.src.params import *
import yaml
import numpy as np
import pandas as pd

In [2]:
def main(lam_shrink = 0.1, datasetName = 'a9a', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1):
    loss = 'logit'
    fileType = fileTypeDict[datasetName]
    print("Working on: {}...".format(datasetName))
    X, y = utils.set_up_xy(datasetName, fileType, dbDir)
    if loss == 'logit':
        f = LogisticLoss(X, y, datasetName)
    p = X.shape[1]
    grp_size = min(p // 2, grp_size)
    generator = utils.GenOverlapGroup(
        p, grp_size=grp_size, overlap_ratio=overlap_ratio)
    groups, starts, ends = generator.get_group()

    lammax_path = f'{dbDir}/lammax-{datasetName}-{grp_size}-{overlap_ratio}.mat'
    if os.path.exists(lammax_path):
        lammax = loadmat(lammax_path)["lammax"][0][0]
        print(f"loading lammax from: {lammax_path}")
    else:
        lammax = utils.lam_max(X, y, starts, ends, loss)
        savemat(lammax_path, {"lammax": lammax})
        print(f"save lammax to: {lammax_path}")

    r = NatOG(penalty=lammax * lam_shrink, groups=groups, weights=None)
    rno = NatOGNO(penalty=lammax * lam_shrink, groups=groups, weights=None)
    
    alpha_init = 1.0
    print(f"Lambda use:{lammax * lam_shrink} | overlap ratio:{overlap_ratio} | group size:{grp_size}")
    print("Inexact subprobsolve: yd")
    with open('../IPG/src/config.yaml', "r") as stream:
        config = yaml.load(stream, Loader=yaml.SafeLoader)
    config['mainsolver']['exact_pg_computation'] = False
    config['mainsolver']['inexact_pg_computation'] = 'yd'
    config['inexactpg']['yd']['gamma'] = 0.1    

    solver = IpgSolver(f, r, config)
    info_yd = solver.solve(alpha_init=alpha_init,  save_ckpt=False)
    print(f"Yd with id:    #nz:{info_yd['nz']:5d}/#nnz:{info_yd['nnz']:5d}/F:{info_yd['F']:.6e}")
    solver_no = IpgSolver(f, rno, config)
    info_yd_no = solver_no.solve(alpha_init=alpha_init,  save_ckpt=False)
    print(f"Yd Without id: #nz:{info_yd_no['nz']:5d}/#nnz:{info_yd_no['nnz']:5d}/F:{info_yd_no['F']:.6e}")
    result = {"lambda_shrink":{lam_shrink}, "lambda":lammax * lam_shrink, 
              "grp_size":grp_size, "overlap_ratio":overlap_ratio,
              "id": {"nz":info_yd['nz'], "nnz":info_yd['nnz'] , "F":info_yd['F']},
              "noid": {"nz":info_yd_no['nz'], "nnz":info_yd_no['nnz'] , "F":info_yd_no['F']},
             }
    np.save("projnoproj_ckpt/{}_info.npy".format(datasetName), result)
    return result

In [3]:
result = main(lam_shrink = 0.1, datasetName = 'a9a', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: a9a...
loading lammax from: ../../GroupFaRSA/db/lammax-a9a-10-0.1.mat
Lambda use:0.013458112455945873 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:   12/#nnz:    2/F:5.083373e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    0/#nnz:   14/F:5.083373e-01


In [4]:
result = main(lam_shrink = 0.1, datasetName = 'colon_cancer', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: colon_cancer...
loading lammax from: ../../GroupFaRSA/db/lammax-colon_cancer-10-0.1.mat
Lambda use:0.01775094387544969 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:  213/#nnz:   10/F:3.362699e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    1/#nnz:  222/F:3.362699e-01


In [5]:
result = main(lam_shrink = 0.1, datasetName = 'duke', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: duke...
loading lammax from: ../../GroupFaRSA/db/lammax-duke-10-0.1.mat
Lambda use:0.01619798453280325 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:  779/#nnz:   13/F:2.469097e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    2/#nnz:  790/F:2.469097e-01


In [6]:
result = main(lam_shrink = 0.1, datasetName = 'leu', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: leu...
loading lammax from: ../../GroupFaRSA/db/lammax-leu-10-0.1.mat
Lambda use:0.020514136347770287 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:  781/#nnz:   11/F:2.586271e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    0/#nnz:  792/F:2.586274e-01


In [7]:
result = main(lam_shrink = 0.1, datasetName = 'mushrooms', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: mushrooms...
loading lammax from: ../../GroupFaRSA/db/lammax-mushrooms-10-0.1.mat
Lambda use:0.009528306037321311 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:   10/#nnz:    3/F:3.161381e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    0/#nnz:   13/F:3.161381e-01


In [8]:
result = main(lam_shrink = 0.1, datasetName = 'w8a', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: w8a...
loading lammax from: ../../GroupFaRSA/db/lammax-w8a-10-0.1.mat
Lambda use:0.006686974129108611 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:   24/#nnz:   10/F:4.290289e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    0/#nnz:   34/F:4.290289e-01


In [9]:
result = main(lam_shrink = 0.1, datasetName = 'gisette', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

Working on: gisette...
save lammax to: ../../GroupFaRSA/db/lammax-gisette-10-0.1.mat
Lambda use:0.012003455878695352 | overlap ratio:0.1 | group size:10
Inexact subprobsolve: yd

Exit: Optimal Solution Found

Yd with id:    #nz:  536/#nnz:   20/F:4.026708e-01

Exit: Early stoppiong. (2 correction steps cap reached).

Yd Without id: #nz:    2/#nnz:  554/F:4.026711e-01


In [11]:
# result = main(lam_shrink = 0.1, datasetName = 'madelon', dbDir = '../../GroupFaRSA/db', grp_size = 10, overlap_ratio = 0.1)

In [12]:
datasets = ['a9a', 'colon_cancer', 'duke', 'gisette', 'leu', 'mushrooms', 'w8a']
info = {'dataset':[], 'lambda':[], 'group size':[], 'overlap ratio':[], '#nz (our/PGA)':[], '#nnz (our/PGA)':[], 'F (our/PGA)':[]}
for dataset in datasets:
    result = np.load(f"projnoproj_ckpt/{dataset}_info.npy", allow_pickle=True).item()
    info['dataset'].append(dataset)
    info['lambda'].append(result['lambda'])
    info['group size'].append(result['grp_size'])
    info['overlap ratio'].append(result['overlap_ratio'])
    info['#nz (our/PGA)'].append(f"{result['id']['nz']}/{result['noid']['nz']}")
    info['#nnz (our/PGA)'].append(f"{result['id']['nnz']}/{result['noid']['nnz']}")
    info['F (our/PGA)'].append(f"{result['id']['F']:7f}/{result['noid']['F']:7f}")    

In [19]:
df=pd.DataFrame.from_dict(info)
df['dataset'] = ['a9a', 'colon-cancer', 'duke breast-cancer', 'gisette', 'leukemia', 'mushrooms', 'w8a']

In [21]:
print(df[['dataset', ]].to_latex(index=False))

\begin{tabular}{lrrrlll}
\toprule
            dataset &    lambda &  group size &  overlap ratio & \#nz (our/PGA) & \#nnz (our/PGA) &        F (our/PGA) \\
\midrule
                a9a &  0.013458 &          10 &            0.1 &          12/0 &           2/14 &  0.508337/0.508337 \\
       colon-cancer &  0.017751 &          10 &            0.1 &         213/1 &         10/222 &  0.336270/0.336270 \\
 duke breast-cancer &  0.016198 &          10 &            0.1 &         779/2 &         13/790 &  0.246910/0.246910 \\
            gisette &  0.012003 &          10 &            0.1 &         536/2 &         20/554 &  0.402671/0.402671 \\
           leukemia &  0.020514 &          10 &            0.1 &         781/0 &         11/792 &  0.258627/0.258627 \\
          mushrooms &  0.009528 &          10 &            0.1 &          10/0 &           3/13 &  0.316138/0.316138 \\
                w8a &  0.006687 &          10 &            0.1 &          24/0 &          10/34 &  0.429029/0.42902

In [None]:
# datasetName = 'w8a'
# lam_shrink = 0.1;  dbDir = '../../GroupFaRSA/db'; grp_size = 10; overlap_ratio = 0.1
# loss = 'logit'
# fileType = fileTypeDict[datasetName]
# print("Working on: {}...".format(datasetName))
# X, y = utils.set_up_xy(datasetName, fileType, dbDir)
# if loss == 'logit':
#     f = LogisticLoss(X, y, datasetName)
# p = X.shape[1]
# grp_size = min(p // 2, grp_size)
# generator = utils.GenOverlapGroup(
#     p, grp_size=grp_size, overlap_ratio=overlap_ratio)
# groups, starts, ends = generator.get_group()

# lammax_path = f'{dbDir}/lammax-{datasetName}-{grp_size}-{overlap_ratio}.mat'
# if os.path.exists(lammax_path):
#     lammax = loadmat(lammax_path)["lammax"][0][0]
#     print(f"loading lammax from: {lammax_path}")
# else:
#     lammax = utils.lam_max(X, y, starts, ends, loss)
#     savemat(lammax_path, {"lammax": lammax})
#     print(f"save lammax to: {lammax_path}")

# r = NatOG(penalty=lammax * lam_shrink, groups=groups, weights=None)
# rno = NatOGNO(penalty=lammax * lam_shrink, groups=groups, weights=None)

# alpha_init = 1.0

# print(f"Lambda use:{lammax * lam_shrink} | overlap ratio:{overlap_ratio} | group size:{grp_size}")

# print("Inexact subprobsolve: yd")
# with open('../IPG/src/config.yaml', "r") as stream:
#     config = yaml.load(stream, Loader=yaml.SafeLoader)
# config['mainsolver']['exact_pg_computation'] = False
# config['mainsolver']['inexact_pg_computation'] = 'yd'
# config['inexactpg']['yd']['gamma'] = 0.1    

# solver = IpgSolver(f, r, config)
# info_yd = solver.solve(alpha_init=alpha_init,  save_ckpt=False)

# solver_no = IpgSolver(f, rno, config)
# info_yd_no = solver_no.solve(alpha_init=alpha_init,  save_ckpt=False)

# print(f"Yd with id:    #nz:{info_yd['nz']:5d}/#nnz:{info_yd['nnz']:5d}/F:{info_yd['F']:.6e}\n   Without id: #nz:{info_yd_no['nz']:5d}/#nnz:{info_yd_no['nnz']:5d}/F:{info_yd_no['F']:.6e}")