In [35]:
import numpy as np
from sklearn.preprocessing import KernelCenterer
import os
from sklearn.manifold import Isomap, LocallyLinearEmbedding, SpectralEmbedding
from numpy.linalg import multi_dot
from tqdm import tqdm_notebook

In [36]:
# paths for the pivot tables
root = './separate_datasets_data'
paths = [os.path.join(root,path) for path in os.listdir(root)]

In [37]:
intrinsic_dims = np.load('./intrinsic_dims.npy',allow_pickle=True).item()

In [38]:
intrinsic_dims

{'./separate_datasets_data/ptb_proj_f.csv': array([5, 6, 7]),
 './separate_datasets_data/AGP_proj_o.csv': array([4, 5, 6]),
 './separate_datasets_data/t2d_proj_f.csv': array([4, 5, 6]),
 './separate_datasets_data/ibd_proj_o.csv': array([2, 3]),
 './separate_datasets_data/t2d_proj_g.csv': array([4, 5, 6]),
 './separate_datasets_data/ptb_proj_g.csv': array([5, 6, 7]),
 './separate_datasets_data/ptb_proj_o.csv': array([4, 5]),
 './separate_datasets_data/AGP_proj_f.csv': array([6, 7, 8, 9]),
 './separate_datasets_data/AGP_proj_g.csv': array([7, 8, 9]),
 './separate_datasets_data/ibd_proj_f.csv': array([3, 4]),
 './separate_datasets_data/t2d_proj_o.csv': array([2, 3, 4]),
 './separate_datasets_data/ibd_proj_g.csv': array([2, 3, 4, 5])}

In [42]:
# calculate isomap projections for the different dimensions for O level and store to the csv files
mf_models = {'isomap':{'class':Isomap, 'params':{}},
            'lle':{'class':LocallyLinearEmbedding, 'params':{'method':"modified",
                                                         'n_neighbors':10}}}
for path in tqdm_notebook(paths):
    
    d = np.genfromtxt(path, delimiter=';')
    k = path.split("/")[-1].split(".")[0]
    dims = intrinsic_dims[path]
    
    for mf_type, model in mf_models.items():
        errs = []
        datasets_trans = []
        # choose better dim
        for i in dims:
            model_class, model_params = model['class'], model['params']
            model_inst = model_class(n_components=i, **model_params)
            d_trans = model_inst.fit_transform(d)
            re = model_inst.reconstruction_error_ if hasattr(model_inst, 'reconstruction_error_') else model_inst.reconstruction_error()

            errs.append(re)
            datasets_trans.append(d_trans)
        
        argmin_err = np.argmin(errs)
        min_err = errs[argmin_err]
        d_trans = datasets_trans[argmin_err]
        dim = dims[argmin_err]
        name = f"./separate_datasets_data/{k}_{mf_type}_dim_{dim}_err_{str(round(min_err, 4)).replace('.','-')}.csv"
        np.savetxt(name, d_trans, delimiter=";")
        print(f"For {k}, error={re} in dim={i}")

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

For ptb_proj_f, error=0.16562106705139243 in dim=7


  alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)


For ptb_proj_f, error=0.0041863376550308334 in dim=7
For AGP_proj_o, error=0.024573691546237068 in dim=6
For AGP_proj_o, error=0.003170976475598393 in dim=6
For t2d_proj_f, error=0.02312341605934981 in dim=6
For t2d_proj_f, error=0.018389031837088406 in dim=6
For ibd_proj_o, error=0.0555115756132815 in dim=3
For ibd_proj_o, error=0.012547181158668055 in dim=3
For t2d_proj_g, error=0.019646910813791708 in dim=6
For t2d_proj_g, error=0.026466949975904146 in dim=6
For ptb_proj_g, error=0.10588733805285427 in dim=7


  alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)


For ptb_proj_g, error=0.004943457715440745 in dim=7
For ptb_proj_o, error=0.13624929448609538 in dim=5
For ptb_proj_o, error=0.005526263623023174 in dim=5
For AGP_proj_f, error=0.04475978060510081 in dim=9


  alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)
  alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)


For AGP_proj_f, error=-1.5478913159652612e-13 in dim=9
For AGP_proj_g, error=0.0420931467084964 in dim=9


  alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)
  alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)


For AGP_proj_g, error=-1.8722848233514589e-13 in dim=9
For ibd_proj_f, error=0.06639933174767507 in dim=4
For ibd_proj_f, error=0.028967864555859812 in dim=4
For t2d_proj_o, error=0.00972263565498953 in dim=4
For t2d_proj_o, error=0.007330093712709308 in dim=4
For ibd_proj_g, error=0.0447947161359915 in dim=5
For ibd_proj_g, error=0.02077309541024736 in dim=5

