In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras

from sklearn.decomposition import IncrementalPCA, MiniBatchNMF
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score, make_scorer

import random
import pickle
import pydicom
import matplotlib.pyplot as plt


import data_splitter as ds
import data_loader as dl
import feature_extraction as fex
import run_model as rm

In [2]:
# explicitly require this experimental feature
from sklearn.experimental import enable_halving_search_cv # noqa
# now you can import normally from model_selection
from sklearn.model_selection import HalvingGridSearchCV, HalvingRandomSearchCV

## Image Resolution 256 x 256

In [4]:
X = np.load('sample_flatten_X.npy')
y = np.load('sample_flatten_y.npy')

In [5]:
from collections import Counter
Counter(y)

Counter({0.0: 8570, 1.0: 181})

In [7]:
nmf = MiniBatchNMF(
    max_iter = 100000,
    tol = 1e-4, #control early stopping based on changes in H
    max_no_improvement = 3, #control early stopping based on smoothed cost function
    forget_factor = 1, #rescaling factor of past
    fresh_restarts = False, #completely solve for W at each step 
    random_state = 42
    )

logit = LogisticRegression(
    warm_start = True,
    solver = 'saga',
    max_iter = 20,
    l1_ratio = 0.5,
    random_state = 42
    )

pipe = Pipeline([
    ('feature_extraction', nmf),
    ('classify', logit)
])

param_grid = {
    'feature_extraction__n_components': np.arange(2, 51, step=1),
    'feature_extraction__init': ['random', 'nndsvd', 'nndsvda'],
    'feature_extraction__beta_loss': ['frobenius', 'kullback-leibler'],
    'feature_extraction__alpha_W': np.arange(0, 1, step=0.1), #np.logspace(0, -4, num=5, base=10, endpoint=True),
    'feature_extraction__l1_ratio': np.arange(0, 1, step=0.1)
}

In [8]:
RANDOM_STATE = 256

grid = HalvingRandomSearchCV(
    pipe,
    cv = StratifiedKFold(n_splits=5, random_state=RANDOM_STATE, shuffle=True),
    param_distributions = param_grid,
    
    factor = 5, # 1/5 candidates selected in each iteration
    aggressive_elimination = True,
    n_candidates = 'exhaust',
    min_resources = 400,
    scoring = make_scorer(average_precision_score, needs_proba=True),
    refit = False,
    
    n_jobs = -1,
    random_state = 42,
    verbose = 2
)
    
grid.fit(X, y)

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 400
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 21
n_resources: 400
Fitting 5 folds for each of 21 candidates, totalling 105 fits










[CV] END feature_extraction__alpha_W=0.5, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=19; total time= 6.5min
[CV] END feature_extraction__alpha_W=0.5, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.30000000000000004, feature_extraction__n_components=45; total time=21.3min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.5, feature_extraction__n_components=36; total time= 3.9min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=10; total time= 7.0min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=



[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=10; total time= 7.4min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=18; total time=22.2min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.5, feature_extraction__n_components=36; total time= 3.5min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=2; total time= 7.3min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.0, featu



[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=2; total time= 8.3min
[CV] END feature_extraction__alpha_W=0.7000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.8, feature_extraction__n_components=12; total time=10.0min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=8; total time= 3.6min
[CV] END feature_extraction__alpha_W=0.2, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=43; total time=14.9min




[CV] END feature_extraction__alpha_W=0.5, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=19; total time= 5.7min
[CV] END feature_extraction__alpha_W=0.5, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.30000000000000004, feature_extraction__n_components=45; total time= 7.1min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=19; total time=24.8min




[CV] END feature_extraction__alpha_W=0.8, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=38; total time=10.2min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=19; total time=32.6min




[CV] END feature_extraction__alpha_W=0.7000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=17; total time= 4.3min
[CV] END feature_extraction__alpha_W=0.7000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=21; total time=14.8min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=8; total time= 3.4min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=21; total time=19.8min




[CV] END feature_extraction__alpha_W=0.8, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=38; total time=10.6min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=19; total time=34.2min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=29; total time= 3.4min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=16; total time=16.5min
[CV] END feature_extraction__alpha_W=0.5, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extr



[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=2; total time= 7.7min
[CV] END feature_extraction__alpha_W=0.7000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.8, feature_extraction__n_components=12; total time= 9.4min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=46; total time=32.5min
[CV] END feature_extraction__alpha_W=0.5, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=19; total time= 5.8min
[CV] END feature_extraction__alpha_W=0.5, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, featu



[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=29; total time= 3.8min
[CV] END feature_extraction__alpha_W=0.7000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=21; total time=12.0min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=43; total time=46.6min




[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=29; total time= 3.8min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=16; total time= 1.2min
[CV] END feature_extraction__alpha_W=0.2, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=44; total time=10.1min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=43; total time=47.9min




----------
iter: 1
n_candidates: 5
n_resources: 2000
Fitting 5 folds for each of 5 candidates, totalling 25 fits




[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=19; total time=  23.6s




[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=19; total time= 4.6min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=19; total time= 4.9min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=19; total time= 5.1min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=16; total time= 3.1min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extr



In [9]:
print(grid.best_params_)

{'feature_extraction__n_components': 36, 'feature_extraction__l1_ratio': 0.5, 'feature_extraction__init': 'nndsvd', 'feature_extraction__beta_loss': 'frobenius', 'feature_extraction__alpha_W': 0.0}


In [10]:
print(grid.best_score_)

0.03512903804190755


In [11]:
import joblib

#save your model or results
joblib.dump(grid, 'models/nmf_hp_search_256_minre400__round8_randomstate256.pkl')

['models/nmf_hp_search_256_minre400__round8_randomstate256.pkl']

[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=16; total time=15.8min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=43; total time=54.3min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=18; total time=42.9min
[CV] END feature_extraction__alpha_W=0.7000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=17; total time= 4.5min
[CV] END feature_extraction__alpha_W=0.2, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, featur

## Image Resolution 512 x 512

In [6]:
X = np.load('/scratch/siads699w23_class_root/siads699w23_class/irinb/sample_flatten_X_512.npy')
y = np.load('/scratch/siads699w23_class_root/siads699w23_class/irinb/sample_flatten_y_512.npy')

In [7]:
from collections import Counter
Counter(y)

Counter({0.0: 8570, 1.0: 181})

In [8]:
nmf = MiniBatchNMF(
    max_iter = 100000,
    tol = 1e-4, #control early stopping based on changes in H
    max_no_improvement = 3, #control early stopping based on smoothed cost function
    forget_factor = 1, #rescaling factor of past
    fresh_restarts = False, #completely solve for W at each step 
    random_state = 42
    )

logit = LogisticRegression(
    warm_start = True,
    solver = 'saga',
    max_iter = 20,
    l1_ratio = 0.5,
    random_state = 42
    )

pipe = Pipeline([
    ('feature_extraction', nmf),
    ('classify', logit)
])

param_grid = {
    'feature_extraction__n_components': np.arange(2, 31, step=1),
    'feature_extraction__init': ['random', 'nndsvd', 'nndsvda'],
    'feature_extraction__beta_loss': ['frobenius', 'kullback-leibler'],
    'feature_extraction__alpha_W': np.arange(0, 1, step=0.1), #np.logspace(0, -4, num=5, base=10, endpoint=True),
    'feature_extraction__l1_ratio': np.arange(0, 1, step=0.1)
}

In [None]:
RANDOM_STATE = 42

grid = HalvingRandomSearchCV(
    pipe,
    cv = StratifiedKFold(n_splits=5, random_state=RANDOM_STATE, shuffle=True),
    param_distributions = param_grid,
    
    factor = 5, # 1/5 candidates selected in each iteration
    aggressive_elimination = True,
    n_candidates = 'exhaust',
    min_resources = 400,
    scoring = make_scorer(average_precision_score, needs_proba=True),
    refit = False,
    
    n_jobs = -1,
    random_state = 42,
    verbose = 2
)
    
grid.fit(X, y)

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 400
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 21
n_resources: 400
Fitting 5 folds for each of 21 candidates, totalling 105 fits










[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=5; total time=19.0min
[CV] END feature_extraction__alpha_W=0.8, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=12; total time=18.2min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.8, feature_extraction__n_components=5; total time=64.8min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=18; total time= 9.9min




[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=21; total time=64.2min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.30000000000000004, feature_extraction__n_components=28; total time=39.1min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=18; total time=10.7min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.5, feature_extraction__n_components=27; total time=32.2min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsv



[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=18; total time=118.6min




[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=21; total time=72.8min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.30000000000000004, feature_extraction__n_components=6; total time=27.9min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=13; total time=19.0min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=21; total time=26.9min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, featu



[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=3; total time= 1.2min
[CV] END feature_extraction__alpha_W=0.2, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=20; total time=122.9min




[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=3; total time=125.9min




[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.5, feature_extraction__n_components=27; total time=32.3min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=30; total time=96.9min
[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=18; total time=130.3min




[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=18; total time=132.7min




[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=3; total time=135.7min




[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=3; total time=138.4min
[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=18; total time=139.4min
[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=18; total time=140.3min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.5, feature_extraction__n_components=27; total time=35.3min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=ku



[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=5; total time=18.8min
[CV] END feature_extraction__alpha_W=0.8, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=12; total time=14.6min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.8, feature_extraction__n_components=5; total time=48.9min
[CV] END feature_extraction__alpha_W=0.2, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=7; total time=69.4min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extracti



[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=21; total time=20.4min
[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=26; total time=37.3min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=17; total time=20.9min
[CV] END feature_extraction__alpha_W=0.2, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=7; total time=81.2min




[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=5; total time=17.0min
[CV] END feature_extraction__alpha_W=0.8, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=12; total time=23.5min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.8, feature_extraction__n_components=5; total time=61.0min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.8, feature_extraction__n_components=26; total time=70.4min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extract



[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.4, feature_extraction__n_components=21; total time=57.5min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=17; total time=20.1min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.30000000000000004, feature_extraction__n_components=6; total time=27.8min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.8, feature_extraction__n_components=26; total time=85.1min




[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=5; total time=16.1min
[CV] END feature_extraction__alpha_W=0.8, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.7000000000000001, feature_extraction__n_components=12; total time=14.5min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=17; total time= 1.3min
[CV] END feature_extraction__alpha_W=0.4, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=30; total time=36.7min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature



[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=21; total time=23.2min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=17; total time= 1.6min
[CV] END feature_extraction__alpha_W=0.9, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.2, feature_extraction__n_components=17; total time=15.2min
[CV] END feature_extraction__alpha_W=0.0, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.6000000000000001, feature_extraction__n_components=17; total time=24.2min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.30000000000



[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=frobenius, feature_extraction__init=nndsvda, feature_extraction__l1_ratio=0.9, feature_extraction__n_components=5; total time=22.8min
[CV] END feature_extraction__alpha_W=0.6000000000000001, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=random, feature_extraction__l1_ratio=0.1, feature_extraction__n_components=26; total time=46.9min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=frobenius, feature_extraction__init=random, feature_extraction__l1_ratio=0.30000000000000004, feature_extraction__n_components=6; total time=29.7min
[CV] END feature_extraction__alpha_W=0.30000000000000004, feature_extraction__beta_loss=kullback-leibler, feature_extraction__init=nndsvd, feature_extraction__l1_ratio=0.0, feature_extraction__n_components=18; total time= 9.7min
[CV] END feature_extraction__alpha_W=0.1, feature_extraction__beta_loss=kullback-leib



----------
iter: 1
n_candidates: 5
n_resources: 2000
Fitting 5 folds for each of 5 candidates, totalling 25 fits


In [None]:
print(grid.best_params_)

In [None]:
print(grid.best_score_)

In [None]:
import joblib

#save your model or results
joblib.dump(grid, 'models/nmf_hp_search_512_round1.pkl')

In [None]:
print(grid.best_params_)

In [None]:
print(grid.best_score_)

In [None]:
import joblib

#save your model or results
joblib.dump(grid, 'models/nmf_hp_search_512_round2.pkl')

In [None]:
print(grid.best_params_)

In [None]:
print(grid.best_score_)

In [None]:
#save your model or results
joblib.dump(grid, 'models/nmf_hp_search_512_round3.pkl')

In [None]:
print(grid.best_params_)

In [None]:
print(grid.best_score_)

In [None]:
#save your model or results
joblib.dump(grid, 'models/nmf_hp_search_512_round4.pkl')

## Image Resolution 1024 x 1024

In [None]:
BASEPATH = '/scratch/siads699w23_class_root/siads699w23_class/shared_data/team_mammogram/train_images'

# scitkit-learn parameter search does not support batch learning
# so we'll have to combine data in 1 batch
BATCH_SIZE = 8751 

# from PCA, image resolution does not seem to have any effect on no. of component
# so we'll use the lowest resolution to minimize training cost
IMG_SIZE = (1024, 1024, 1)

RANDOM_STATE = 42

# define DataGenerators for training split
train_gen = dl.DataGenerator(
    list_IDs = sample_img_ids,
    labels = splitter.labels,
    patient_img_dict = splitter.trainset,
    basepath = BASEPATH,
    batch_size = BATCH_SIZE,
    img_size = IMG_SIZE,
    n_classes = 2,
    shuffle = True,
    normalize = (0, 1),
    feature_extractor = None,
    verbose = True
    )

# check no. of batch
print(len(train_gen))

In [None]:
# get X and y
for X, y in train_gen:
  X = X.reshape(BATCH_SIZE, -1)
  y = y[:, 1]
  print(X.shape, y.shape)

In [None]:
with open('/scratch/siads699w23_class_root/siads699w23_class/irinb/sample_flatten_X_1024.npy','wb') as f:
    np.save(f, X)
    
with open('/scratch/siads699w23_class_root/siads699w23_class/irinb/sample_flatten_y_1024.npy','wb') as f:
    np.save(f, y)

In [None]:
Counter(y)

In [None]:
nmf = MiniBatchNMF(
    max_iter = 100000,
    tol = 1e-4, #control early stopping based on changes in H
    max_no_improvement = 3, #control early stopping based on smoothed cost function
    forget_factor = 1, #rescaling factor of past
    fresh_restarts = False, #completely solve for W at each step 
    random_state = 42
    )

logit = LogisticRegression(
    warm_start = True,
    solver = 'saga',
    max_iter = 20,
    l1_ratio = 0.5,
    random_state = 42
    )

pipe = Pipeline([
    ('feature_extraction', nmf),
    ('classify', logit)
])

param_grid = {
    'feature_extraction__n_components': np.arange(2, 31, step=1),
    'feature_extraction__init': ['random', 'nndsvd', 'nndsvda'],
    'feature_extraction__beta_loss': ['frobenius', 'kullback-leibler'],
    'feature_extraction__alpha_W': np.arange(0, 1, step=0.1), #np.logspace(0, -4, num=5, base=10, endpoint=True),
    'feature_extraction__l1_ratio': np.arange(0, 1, step=0.1)
}

In [None]:
RANDOM_STATE = 42

grid = HalvingRandomSearchCV(
    pipe,
    cv = StratifiedKFold(n_splits=5, random_state=RANDOM_STATE, shuffle=True),
    param_distributions = param_grid,
    
    factor = 5, # 1/5 candidates selected in each iteration
    aggressive_elimination = True,
    n_candidates = 'exhaust',
    scoring = make_scorer(average_precision_score, needs_proba=True),
    refit = False,
    
    n_jobs = -1,
    random_state = 42,
    verbose = 2
)
    
grid.fit(X, y)

In [None]:
print(grid.best_params_)

In [None]:
print(grid.best_score_)

In [None]:
#save your model or results
joblib.dump(grid, 'models/nmf_hp_search_1024.pkl')