In [20]:
import numpy as np

import src
import src.data.datasets as datasets
import src.quality_measures as qm
from src.paths import processed_data_path

In [4]:
## Specify dataset, algorithm and quality measure triples for training

In [6]:
datasets.available_datasets()

['ball',
 'broken-swiss-roll',
 'coil-100',
 'coil-20',
 'difficult',
 'f-mnist',
 'frey-faces',
 'gaussian-blobs',
 'helix',
 'hiva',
 'lvq-pak',
 'mnist',
 'orl-faces',
 's-curve',
 'shuttle-statlog',
 'sphere',
 'swiss-roll',
 'twinpeaks',
 'unit-cube']

In [9]:
from sklearn.decomposition import PCA
from sklearn.manifold import MDS, Isomap, LocallyLinearEmbedding

from MulticoreTSNE import MulticoreTSNE as TSNE
from umap import UMAP

DR_ALGORITHMS = {
    "autoencoder": None,
    "isomap": Isomap,
    "MDS": MDS,
    "PCA": PCA,
    "t-SNE":TSNE,
    "UMAP": UMAP,
}

def available_algorithms():
    """Valid Algorithms for dimension reduction applications
    
    This function simply returns the list of known dimension reduction
    algorithms.
    
    It exists to allow for a description of the mapping for
    each of the valid strings.

    The valid quality metrics, and the function they map to, are:

    ============     ====================================
    Algorithm        Function
    ============     ====================================
    autoencoder
    isomap
    MDS
    PCA
    t-SNE
    UMAP
    ============     ====================================
    
    
    """
    return DR_ALGORITHMS

In [10]:
available_algorithms()

{'autoencoder': None,
 'isomap': sklearn.manifold.isomap.Isomap,
 'MDS': sklearn.manifold.mds.MDS,
 'PCA': sklearn.decomposition.pca.PCA,
 't-SNE': MulticoreTSNE.MulticoreTSNE,
 'UMAP': umap.umap_.UMAP}

In [11]:
qm.available_quality_measures()

{'1nn-error': <function src.quality_measures.generalized_1nn_error>,
 'continuity': <function src.quality_measures.continuity>,
 'stress': <function src.quality_measures.stress>,
 'strain': <function src.quality_measures.strain>,
 'trustworthiness': <function src.quality_measures.trustworthiness>}

In [None]:
UMAP()

In [16]:
training_dicts = [
    {
        'dataset':'coil-20',
        'algorithm':'UMAP',
        'score':'trustworthiness',
        'meta':'grid_search',
        'algorithm_params':{'n_components':[2], 'n_neighbors':np.arange(2, 100, 3)},
        'score_params':{'n_neighbors':12, 'metric':'euclidean'}
    }
    
]

In [17]:
# Check for valid parameters

In [18]:
for td in training_dicts:
    assert td['dataset'] in datasets.available_datasets()
    assert td['algorithm'] in available_algorithms()
    assert td['score'] in qm.available_quality_measures()

In [24]:
from sklearn.model_selection import GridSearchCV

In [29]:
# Grid search for best parameters and models
results =[]
for td in training_dicts:
        meta = td.get('meta', None)
        if meta == 'grid_search':
            ds = datasets.load_dataset(td['dataset'])
            alg = available_algorithms()[td['algorithm']]()
            score = qm.make_hi_lo_scorer(qm.available_quality_measures()[td['score']], **td['score_params'])
            grid_search = GridSearchCV(alg, td['algorithm_params'], scoring=score)
            grid_search.fit(ds.data, y=ds.target)
            results.append(grid_search)
            

KeyError: 0

In [28]:
%debug

> [0;32m/opt/software/anaconda3/envs/dimension_reduction/lib/python3.6/site-packages/sklearn/base.py[0m(60)[0;36mclone[0;34m()[0m
[0;32m     58 [0;31m                            % (repr(estimator), type(estimator)))
[0m[0;32m     59 [0;31m    [0mklass[0m [0;34m=[0m [0mestimator[0m[0;34m.[0m[0m__class__[0m[0;34m[0m[0m
[0m[0;32m---> 60 [0;31m    [0mnew_object_params[0m [0;34m=[0m [0mestimator[0m[0;34m.[0m[0mget_params[0m[0;34m([0m[0mdeep[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     61 [0;31m    [0;32mfor[0m [0mname[0m[0;34m,[0m [0mparam[0m [0;32min[0m [0msix[0m[0;34m.[0m[0miteritems[0m[0;34m([0m[0mnew_object_params[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m     62 [0;31m        [0mnew_object_params[0m[0;34m[[0m[0mname[0m[0;34m][0m [0;34m=[0m [0mclone[0m[0;34m([0m[0mparam[0m[0;34m,[0m [0msafe[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0m
[0m
ipdb> self
***