In [14]:
from collections import namedtuple
from NMF_Implementation.L21Norm import Algorithm as L21Norm
from NMF_Implementation.L2Norm import Algorithm as L2Norm
from NMF_Implementation.base import load_data, Implementation
from NMF_Implementation.Noise import salt_and_pepper
import numpy as np
from typing import Dict
OUTPUT_FILENAME = 'gridsearch/gridsearch.csv'
OUTPUT_LOSS_PREFIX = 'losses/loss'
OUTPUT_RESIDUE_PREFIX = 'losses/residue'

observation = namedtuple('Observation', ['p', 'r', 'k', 'modeltype', 'dataset'])

ordered_grid = []
for dataset in 'orl', 'yale':
    for r in 0.5, 0.2, 0.8:
        for p in 0.1, 0.2, 0.4:
            for k in 1, 10, 100:
                for modeltype in 'l21', 'l2':
                    ordered_grid.append(observation(
                        p=p, r=r, k=k, modeltype=modeltype,
                        dataset=dataset
                    ))
for dataset in 'orl', 'yale':
    for r in 0.5, 0.2, 0.8:
        for p in 0.1, 0.2, 0.4:
            for k in 20, 200, 500:
                for modeltype in 'l21', 'l2':
                    ordered_grid.append(observation(
                        p=p, r=r, k=k, modeltype=modeltype,
                        dataset=dataset
                    ))
for dataset in 'orl', 'yale':
    for r in 0.5, 0.2, 0.8:
        for p in 0.1, 0.2, 0.4:
            for k in [1000]:
                for modeltype in ['l21', 'l2']:
                    ordered_grid.append(observation(
                        p=p, r=r, k=k, modeltype=modeltype,
                        dataset=dataset
                    ))

In [15]:
Xo, Yo = load_data('../data/ORL', (30, 37))
Xy, Yy = load_data('../data/CroppedYaleB', (30, 37))
Xo = Xo
Xy = Xy

model_classes : Dict[str, Implementation] = dict(
    l21=L21Norm,
    l2=L2Norm
)
datasets = dict(
    orl=Xo,
    yale=Xy,
)

In [20]:

#with open(OUTPUT_FILENAME, 'w') as f: print('components,proportion_noise,ratio_white,dataset,model_type,result_filenumber,reconstruction_error,train_time', file=f) 
lossnum = 1000
def run_model(obs_param):
    global lossnum
    # unpack tuple
    k = obs_param.k
    p = obs_param.p
    r = obs_param.r
    ds = obs_param.dataset
    md = obs_param.modeltype
    
    if ds == 'orl' and True:  # skip orls
        lossnum += 1
        return
    
    dataset = datasets[ds]
    
    # create model
    model = model_classes[md](k, stop_threshold=-np.inf, max_iter=1000)
    
    def noise(X):
        return salt_and_pepper(X, p, r)

    # generate noisy data
    noisy = noise(dataset)
    
    # train model with noisy data
    start_t = datetime.now()
    model.fit(noisy)
    time_taken = (datetime.now() - start_t).total_seconds()
    
    # extract loss and residue
    loss = model.get_metavalues()['training_loss']
    residue = model.get_metavalues()['training_residue']
    
    reconstruction_error = model.reconstruction_error(noisy, dataset)
    
    # save the data!
    # save reconstruction error
    with open(OUTPUT_FILENAME, 'a') as f:
        print(f'{k},{p},{r},{ds},{md},{lossnum},{reconstruction_error},{time_taken}', file=f)
    # save losses
    with open(f'{OUTPUT_LOSS_PREFIX}{lossnum}.csv', 'w') as f:
        print(*loss, sep=',\n', end='', file=f)
    # save residues
    with open(f'{OUTPUT_RESIDUE_PREFIX}{lossnum}.csv', 'w') as f:
        print(*loss, sep=',\n', end='', file=f)
    lossnum += 1
    return time_taken
    
from datetime import datetime
for i, obs in enumerate(ordered_grid):
    print(f'running {obs}. {lossnum} / {len(ordered_grid)} ({lossnum / len(ordered_grid):.2f}%) ', end='')
    time_taken = run_model(obs)
    print(f'{time_taken}s')


running Observation(p=0.1, r=0.5, k=1, modeltype='l21', dataset='orl'). 1000 / 252 (3.97%) Nones
running Observation(p=0.1, r=0.5, k=1, modeltype='l2', dataset='orl'). 1001 / 252 (3.97%) Nones
running Observation(p=0.1, r=0.5, k=10, modeltype='l21', dataset='orl'). 1002 / 252 (3.98%) Nones
running Observation(p=0.1, r=0.5, k=10, modeltype='l2', dataset='orl'). 1003 / 252 (3.98%) Nones
running Observation(p=0.1, r=0.5, k=100, modeltype='l21', dataset='orl'). 1004 / 252 (3.98%) Nones
running Observation(p=0.1, r=0.5, k=100, modeltype='l2', dataset='orl'). 1005 / 252 (3.99%) Nones
running Observation(p=0.2, r=0.5, k=1, modeltype='l21', dataset='orl'). 1006 / 252 (3.99%) Nones
running Observation(p=0.2, r=0.5, k=1, modeltype='l2', dataset='orl'). 1007 / 252 (4.00%) Nones
running Observation(p=0.2, r=0.5, k=10, modeltype='l21', dataset='orl'). 1008 / 252 (4.00%) Nones
running Observation(p=0.2, r=0.5, k=10, modeltype='l2', dataset='orl'). 1009 / 252 (4.00%) Nones
running Observation(p=0.2, 

running Observation(p=0.2, r=0.2, k=100, modeltype='l21', dataset='yale'). 1082 / 252 (4.29%) 150.950153s
running Observation(p=0.2, r=0.2, k=100, modeltype='l2', dataset='yale'). 1083 / 252 (4.30%) 82.926923s
running Observation(p=0.4, r=0.2, k=1, modeltype='l21', dataset='yale'). 1084 / 252 (4.30%) 100.729892s
running Observation(p=0.4, r=0.2, k=1, modeltype='l2', dataset='yale'). 1085 / 252 (4.31%) 71.219069s
running Observation(p=0.4, r=0.2, k=10, modeltype='l21', dataset='yale'). 1086 / 252 (4.31%) 105.781389s
running Observation(p=0.4, r=0.2, k=10, modeltype='l2', dataset='yale'). 1087 / 252 (4.31%) 61.784406s
running Observation(p=0.4, r=0.2, k=100, modeltype='l21', dataset='yale'). 1088 / 252 (4.32%) 154.330884s
running Observation(p=0.4, r=0.2, k=100, modeltype='l2', dataset='yale'). 1089 / 252 (4.32%) 93.898413s
running Observation(p=0.1, r=0.8, k=1, modeltype='l21', dataset='yale'). 1090 / 252 (4.33%) 120.759727s
running Observation(p=0.1, r=0.8, k=1, modeltype='l2', dataset

running Observation(p=0.1, r=0.5, k=20, modeltype='l2', dataset='yale'). 1163 / 252 (4.62%) 129.311839s
running Observation(p=0.1, r=0.5, k=200, modeltype='l21', dataset='yale'). 1164 / 252 (4.62%) 

KeyboardInterrupt: 