In [1]:
import sys
import glob
import numpy as np
import pandas as pd
import warnings
from copy import deepcopy
import matplotlib.pyplot as plt
from matplotlib import rc
rc('text', usetex=True)

In [2]:
def create_paths(logdir='../IPG/test/log', date='09_10_2021', inexact_type='lee', loss='logit',
                 tol = 1e-6, lam_shrink=[0.1], group_size=[10], overlap_ratio=[0.1], excludes=None, **kwargs):
    list_all_npy_path = []
    for lam in lam_shrink:
        for grp in group_size:
            for r in overlap_ratio:
                minimal_dir = f'{logdir}/{date}/{inexact_type}/{loss}/ckpt/{tol}/{lam}_{grp}_{r}'
                for key, value in kwargs.items():
                    minimal_dir += f'_{kwargs[key]}'
                datasets = glob.glob(f'{minimal_dir}/*.npy')
                datasets.sort()
                datasets_rmed = datasets.copy()
                if excludes is not None:
                    for e in excludes:
                        for p in datasets:
                            datasetname = p.split("/")[-1].split("_")[0]
                            if datasetname == e:
                                datasets_rmed.remove(p)
                list_all_npy_path += datasets_rmed
    return list_all_npy_path
def load_df_from_paths(list_all_npy_path, cols=['datasetid', 'status', 'time', 'iteration', 'subits',
                                                'F', 'optim', 'nz', 'nnz']):
    info_lst = []
    for path in list_all_npy_path:
        info = np.load(path, allow_pickle=True).item()
        info_lst.append(info)
    try:
        df = pd.DataFrame(info_lst)[cols]
    except KeyError:
        cols_ = cols.copy()
        cols_[cols_.index('F')] = 'f'
        df = pd.DataFrame(info_lst)[cols_]
    # summarize status
    codes = df['status'].unique()
    formatter = f'{len(str(df.shape[0]))}d'
    for code in codes:
        count = np.sum(df['status'] == code)
        if count is None:
            count = 0
        print(f" {count:{formatter}}/{df.shape[0]} instances terminate with status: {code:2d}")
    return df

In [3]:
def get_all(logdir, date, inexact_type, loss, tol,
            lam_shrink, group_size, overlap_ratio,
            excludes=None, param_lst=None):
    algo_df_dict = {}
    for p in param_lst:
            algorithm = f'{inexact_type}-{p}'
            print(f'{algorithm}')
            paths = create_paths(logdir, date, inexact_type, loss, tol, 
                                 lam_shrink, group_size, overlap_ratio, excludes, p=p)
            if paths == []:
                print(' empty')
                df = None
            else:
                df = load_df_from_paths(paths)
            algo_df_dict[algorithm] = df
    return algo_df_dict

In [4]:
algo_df_dict = get_all(logdir='../IPG/test/log', date='09_10_2021', inexact_type='lee', loss='logit', tol = 1e-6, 
        lam_shrink=[0.1, 0.01], group_size=[10, 100], overlap_ratio=[0.1, 0.2, 0.3], excludes=None, param_lst=[0.1, 0.2, 0.3, 0.4, 0.5])

lee-0.1
 59/59 instances terminate with status: 404
lee-0.2
 60/60 instances terminate with status: 404
lee-0.3
 60/60 instances terminate with status: 404
lee-0.4
 59/59 instances terminate with status: 404
lee-0.5
 55/55 instances terminate with status: 404


In [5]:
# algo_df_dict['lee-0.5'].sort_values('datasetid')

In [6]:
# ls ../IPG/test/log/09_10_2021/lee/logit/ckpt/1e-06/0.1_10_0.2_0.5/ 

In [7]:
# cat ../IPG/test/log/09_10_2021/lee/logit/logfile/0.1_10_0.2_0.5/colon_cancer.txt

In [8]:
algo_df_dict = get_all(logdir='../IPG/test/log', date='09_10_2021', inexact_type='yd', loss='logit', tol = 1e-6, 
        lam_shrink=[0.1, 0.01], group_size=[10, 100], overlap_ratio=[0.1, 0.2, 0.3], excludes=None, param_lst=[0.1, 0.2, 0.3, 0.4, 0.5])

yd-0.1
 50/50 instances terminate with status: 404
yd-0.2
 51/51 instances terminate with status: 404
yd-0.3
 54/54 instances terminate with status: 404
yd-0.4
 54/54 instances terminate with status: 404
yd-0.5
 55/55 instances terminate with status: 404


In [9]:
algo_df_dict['yd-0.1'].sort_values('datasetid')

Unnamed: 0,datasetid,status,time,iteration,subits,F,optim,nz,nnz
26,a9a_0.01_10_0.1,404,26.903623,1473,1474,0.366938,9.720507e-07,7,7
31,a9a_0.01_10_0.2,404,34.651336,1877,1878,0.369733,9.801378e-07,8,8
36,a9a_0.01_10_0.3,404,24.431704,1317,1318,0.375264,9.997245e-07,8,10
0,a9a_0.1_10_0.1,404,3.499795,91,93,0.508337,8.694658e-07,12,2
6,a9a_0.1_10_0.2,404,6.186013,238,240,0.530008,8.125296e-07,11,5
12,a9a_0.1_10_0.3,404,3.385603,82,84,0.521574,7.233579e-07,16,2
41,colon_cancer_0.01_100_0.1,404,18.114334,4080,4426,0.067146,4.919811e-06,13,10
44,colon_cancer_0.01_100_0.2,404,20.448548,4090,4975,0.070428,4.772045e-06,14,11
47,colon_cancer_0.01_100_0.3,404,24.38137,3434,8643,0.08072,4.295093e-06,13,16
27,colon_cancer_0.01_10_0.1,404,32.274284,3146,3534,0.068706,4.79277e-06,201,22


In [14]:
ls ../IPG/test/log/09_10_2021/yd/logit/ckpt/1e-06/0.1_100_0.2_0.1/ 

colon_cancer_info.npy


In [13]:

cat ../IPG/test/log/09_10_2021/yd/logit/logfile/0.1_100_0.2_0.1/w8a.txt

cat: ../IPG/test/log/09_10_2021/yd/logit/logfile/0.1_100_0.2_0.1/w8a.txt: No such file or directory


In [36]:
algo_df_dict = get_all(logdir='../IPG/test/log', date='09_10_2021', inexact_type='schimdt', loss='logit', tol = 1e-6, 
        lam_shrink=[0.1, 0.01], group_size=[10, 100], overlap_ratio=[0.1, 0.2, 0.3], excludes=None, param_lst=[1e0, 1e1, 1e2, 1e3, 1e4])

schimdt-1.0
 57/57 instances terminate with status: 404
schimdt-10.0
 60/60 instances terminate with status: 404
schimdt-100.0
 60/60 instances terminate with status: 404
schimdt-1000.0
 60/60 instances terminate with status: 404
schimdt-10000.0
 60/60 instances terminate with status: 404


In [33]:
ls ../IPG/test/log/09_10_2021/lee/logit/ckpt/1e-06/0.1_10_0.3_0.1/ 

a9a_info.npy           duke_info.npy  mushrooms_info.npy
colon_cancer_info.npy  leu_info.npy


In [25]:
path = "../IPG/test/log/09_10_2021/lee/logit/ckpt/1e-06/0.01_100_0.1_0.1/w8a_info.npy"
np.load(path, allow_pickle="True").item()['optim'], np.load(path, allow_pickle="True").item()['status']

(7.485627446075131e-07, 404)

In [15]:
path = "../IPG/test/log/09_10_2021/lee/logit/ckpt/1e-05/0.01_100_0.1_0.1/w8a_info.npy"
np.load(path, allow_pickle="True").item()['optim']

6.23955952580761e-06

In [17]:
path = "../IPG/test/log/09_10_2021/lee/logit/ckpt/0.0001/0.01_100_0.1_0.1/w8a_info.npy"
np.load(path, allow_pickle="True").item()['optim']

6.563115869618423e-06

In [24]:
cat ../IPG/test/log/09_10_2021/yd/logit/logfile/0.01_10_0.1_0.1/a9a.txt


       Inexact Proximal Gradient Type Method   (version:0.1 (2021-08-29))  
Problem Summary: Excuted at 2021-9-10 2:34
Dataset:./log/09_10_2021/yd/logit/logfile/0.01_10_0.1_0.1/a9a
Data Size:......................................n=32561, p=123
Loss Function:..................................Logistic
Regularizer:....................................Overlapping Group L1
Penalty Parameter:..............................lambda=0.0013
Number of groups:..............................14

Algorithm Parameters:
Termination Conditions: accuracy: 1e-06 | time limits:7200 | iteration limits:10000
Inexact Strategy: yd: gamma:1.000e-01
Lineserch Parameters: eta:1e-05 | xi:0.5 | beta:1.1
Proximal Stepsize update: heuristic
****************************************************************************************************
 Iters.   Obj.    alphak   |  aoptim   its.   Flag  Stepsize  baks    Gap       tarGap    #pz  #pnz | baks    stepsize  |dtaken| |
     0 6.931e-01 1.000e+00 | 6.613e-01    1 desired 