# 다양체 학습

In [None]:
from sklearn import datasets
from sklearn import manifold
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import cross_val_score

In [None]:
knn_clf = KNeighborsClassifier()
svc = SVC()
decision_tree_clf = DecisionTreeClassifier()
random_forest_clf = RandomForestClassifier()

knn_reg = KNeighborsRegressor()
svr = SVR()
decision_tree_reg = DecisionTreeRegressor()
random_forest_reg = RandomForestRegressor()

In [None]:
clf = [knn_clf, svc, decision_tree_clf, random_forest_clf]
reg = [knn_reg, svr, decision_tree_reg, random_forest_reg]
model_name = ['KNN', 'SVM', 'Deicision Tree', 'Random Forest']
manifold_name = ['LLE', 'LTSA', 'HLLE', 'MLLE', 'ISOMAP', 'MDS', 'SE', 'TSNE']

## 와인 데이터

In [None]:
raw_wine, target = datasets.load_wine(return_X_y=True)

In [None]:
wine_lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                             method='standard', random_state=0).fit_transform(raw_wine)
wine_ltsa = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='ltsa', random_state=0).fit_transform(raw_wine)
wine_hlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=3,
                                             method='hessian', random_state=0).fit_transform(raw_wine)
wine_mlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='modified', random_state=0).fit_transform(raw_wine)
wine_isomap = manifold.Isomap(n_neighbors=30, n_components=2).fit_transform(raw_wine)
wine_mds = manifold.MDS(n_components=2, random_state=0).fit_transform(raw_wine)
wine_se = manifold.SpectralEmbedding(n_components=2, random_state=0).fit_transform(raw_wine)
wine_tsne = manifold.TSNE(n_components=2, random_state=0).fit_transform(raw_wine)

In [None]:
wine_manifold = [wine_lle, wine_ltsa, wine_hlle, wine_mlle, wine_isomap, wine_mds, wine_se, wine_tsne]

### 원본 데이터를 사용할 때

In [None]:
i = 0

for estimator in clf:
  score = cross_val_score(
      estimator=estimator,
      X=raw_wine, y=target,
      cv=5
  )
  print('Model: <{}>'.format(model_name[i]))
  print('\tcross validation score: {}'.format(score))
  print('\tmean cross validation score: {} (+/- {})\n'.format(score.mean(), score.std()))
  i += 1

Model: <KNN>
	cross validation score: [0.72222222 0.66666667 0.63888889 0.65714286 0.77142857]
	mean cross validation score: 0.6912698412698413 (+/- 0.04877951071049148)

Model: <SVM>
	cross validation score: [0.63888889 0.61111111 0.63888889 0.68571429 0.74285714]
	mean cross validation score: 0.6634920634920635 (+/- 0.04636170738133653)

Model: <Deicision Tree>
	cross validation score: [0.94444444 0.77777778 0.86111111 0.91428571 0.85714286]
	mean cross validation score: 0.8709523809523809 (+/- 0.05700512501657816)

Model: <Random Forest>
	cross validation score: [0.97222222 0.94444444 1.         0.97142857 0.97142857]
	mean cross validation score: 0.9719047619047618 (+/- 0.017572511115045146)



### 정제된 데이터를 사용할 때

In [None]:
i = 0

for estimator in clf:
  j = 0
  print('Model: <{}>'.format(model_name[i]))
  for X in wine_manifold:
    score = cross_val_score(
        estimator=estimator,
        X=X, y=target,
        cv=5
    )
    print('\t"{}" cross validation score: {}'.format(manifold_name[j], score))
    print('\t"{}" mean cross validation score: {} (+/- {})\n'.format(manifold_name[j], score.mean(), score.std()))
    j += 1
  i += 1

Model: <KNN>
	"LLE" cross validation score: [0.75       0.69444444 0.72222222 0.85714286 0.82857143]
	"LLE" mean cross validation score: 0.7704761904761905 (+/- 0.06231326348982479)

	"LTSA" cross validation score: [0.72222222 0.63888889 0.66666667 0.8        0.77142857]
	"LTSA" mean cross validation score: 0.7198412698412697 (+/- 0.06078349618867878)

	"HLLE" cross validation score: [0.72222222 0.63888889 0.63888889 0.77142857 0.8       ]
	"HLLE" mean cross validation score: 0.7142857142857142 (+/- 0.0663996921920941)

	"MLLE" cross validation score: [0.75       0.63888889 0.61111111 0.57142857 0.74285714]
	"MLLE" mean cross validation score: 0.6628571428571428 (+/- 0.07156142847126215)

	"ISOMAP" cross validation score: [0.69444444 0.63888889 0.66666667 0.6        0.8       ]
	"ISOMAP" mean cross validation score: 0.68 (+/- 0.06764102780099747)

	"MDS" cross validation score: [0.72222222 0.69444444 0.63888889 0.68571429 0.74285714]
	"MDS" mean cross validation score: 0.69682539682539

## 붓꽃 데이터

In [None]:
raw_iris, target = datasets.load_iris(return_X_y=True)

In [None]:
iris_lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                             method='standard', random_state=0).fit_transform(raw_iris)
iris_ltsa = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='ltsa', random_state=0).fit_transform(raw_iris)
iris_hlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=3,
                                             method='hessian', random_state=0).fit_transform(raw_iris)
iris_mlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='modified', random_state=0).fit_transform(raw_iris)
iris_isomap = manifold.Isomap(n_neighbors=30, n_components=2).fit_transform(raw_iris)
iris_mds = manifold.MDS(n_components=2, random_state=0).fit_transform(raw_iris)
iris_se = manifold.SpectralEmbedding(n_components=2, random_state=0).fit_transform(raw_iris)
iris_tsne = manifold.TSNE(n_components=2, random_state=0).fit_transform(raw_iris)



In [None]:
iris_manifold = [iris_lle, iris_ltsa, iris_hlle, iris_mlle, iris_isomap, iris_mds, iris_se, iris_tsne]
manifold_name = ['LLE', 'LTSA', 'HLLE', 'MLLE', 'ISOMAP', 'MDS', 'SE', 'TSNE']

### 원본 데이터를 사용할 때

In [None]:
i = 0

for estimator in clf:
  score = cross_val_score(
      estimator=estimator,
      X=raw_iris, y=target,
      cv=5
  )
  print('Model: <{}>'.format(model_name[i]))
  print('\tcross validation score: {}'.format(score))
  print('\tmean cross validation score: {} (+/- {})\n'.format(score.mean(), score.std()))
  i += 1

Model: <KNN>
	cross validation score: [0.96666667 1.         0.93333333 0.96666667 1.        ]
	mean cross validation score: 0.9733333333333334 (+/- 0.02494438257849294)

Model: <SVM>
	cross validation score: [0.96666667 0.96666667 0.96666667 0.93333333 1.        ]
	mean cross validation score: 0.9666666666666666 (+/- 0.02108185106778919)

Model: <Deicision Tree>
	cross validation score: [0.96666667 0.96666667 0.9        1.         1.        ]
	mean cross validation score: 0.9666666666666668 (+/- 0.036514837167011066)

Model: <Random Forest>
	cross validation score: [0.96666667 0.96666667 0.93333333 0.96666667 1.        ]
	mean cross validation score: 0.9666666666666668 (+/- 0.02108185106778919)



### 정제된 데이터를 사용할 때

In [None]:
i = 0

for estimator in clf:
  j = 0
  print('Model: <{}>'.format(model_name[i]))
  for X in iris_manifold:
    score = cross_val_score(
        estimator=estimator,
        X=X, y=target,
        cv=5
    )
    print('\t"{}" cross validation score: {}'.format(manifold_name[j], score))
    print('\t"{}" mean cross validation score: {} (+/- {})\n'.format(manifold_name[j], score.mean(), score.std()))
    j += 1
  i += 1

Model: <KNN>
	"LLE" cross validation score: [0.96666667 1.         0.9        0.96666667 1.        ]
	"LLE" mean cross validation score: 0.9666666666666668 (+/- 0.036514837167011066)

	"LTSA" cross validation score: [0.86666667 0.9        0.86666667 0.86666667 0.96666667]
	"LTSA" mean cross validation score: 0.8933333333333333 (+/- 0.038873012632301994)

	"HLLE" cross validation score: [0.73333333 0.9        0.86666667 0.9        0.96666667]
	"HLLE" mean cross validation score: 0.8733333333333333 (+/- 0.07717224601860152)

	"MLLE" cross validation score: [0.83333333 0.9        0.86666667 0.86666667 0.96666667]
	"MLLE" mean cross validation score: 0.8866666666666667 (+/- 0.04521553322083511)

	"ISOMAP" cross validation score: [0.93333333 1.         0.93333333 0.96666667 0.96666667]
	"ISOMAP" mean cross validation score: 0.96 (+/- 0.02494438257849294)

	"MDS" cross validation score: [0.96666667 0.96666667 0.93333333 0.96666667 1.        ]
	"MDS" mean cross validation score: 0.96666666666

## 보스턴 주택 가격 데이터

In [None]:
raw_boston, target = datasets.load_boston(return_X_y=True)

In [None]:
boston_lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                             method='standard', random_state=0).fit_transform(raw_boston)
boston_ltsa = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='ltsa', random_state=0).fit_transform(raw_boston)
boston_hlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=3,
                                             method='hessian', random_state=0).fit_transform(raw_boston)
boston_mlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='modified', random_state=0).fit_transform(raw_boston)
boston_isomap = manifold.Isomap(n_neighbors=30, n_components=2).fit_transform(raw_boston)
boston_mds = manifold.MDS(n_components=2, random_state=0).fit_transform(raw_boston)
boston_se = manifold.SpectralEmbedding(n_components=2, random_state=0).fit_transform(raw_boston)
boston_tsne = manifold.TSNE(n_components=2, random_state=0).fit_transform(raw_boston)

In [None]:
boston_manifold = [boston_lle, boston_ltsa, boston_hlle, boston_mlle, boston_isomap, boston_mds, boston_se, boston_tsne]
manifold_name = ['LLE', 'LTSA', 'HLLE', 'MLLE', 'ISOMAP', 'MDS', 'SE', 'TSNE']

### 원본 데이터를 사용할 때

In [None]:
i = 0

for estimator in reg:
  score = cross_val_score(
      estimator=estimator,
      X=raw_boston, y=target,
      cv=5
  )
  print('Model: <{}>'.format(model_name[i]))
  print('\tcross validation score: {}'.format(score))
  print('\tmean cross validation score: {} (+/- {})\n'.format(score.mean(), score.std()))
  i += 1

Model: <KNN>
	cross validation score: [-1.10921186  0.14934963 -0.4259195  -0.01474393 -0.17455668]
	mean cross validation score: -0.31501646812514134 (+/- 0.4401304871369981)

Model: <SVM>
	cross validation score: [ 0.13724306  0.03198807 -0.64089321  0.0623277  -0.0422011 ]
	mean cross validation score: -0.09030709431523076 (+/- 0.2812443133954511)

Model: <Deicision Tree>
	cross validation score: [ 0.61574863  0.4236546   0.65186834  0.37788644 -1.38989432]
	mean cross validation score: 0.13585273743576307 (+/- 0.7701800399340308)

Model: <Random Forest>
	cross validation score: [0.76527122 0.86516139 0.77264733 0.47446146 0.3246172 ]
	mean cross validation score: 0.6404317209980646 (+/- 0.20535541236360355)



### 정제된 데이터를 사용할 때

In [None]:
i = 0

for estimator in reg:
  j = 0
  print('Model: <{}>'.format(model_name[i]))
  for X in boston_manifold:
    score = cross_val_score(
        estimator=estimator,
        X=X, y=target,
        cv=5
    )
    print('\t"{}" cross validation score: {}'.format(manifold_name[j], score))
    print('\t"{}" mean cross validation score: {} (+/- {})\n'.format(manifold_name[j], score.mean(), score.std()))
    j += 1
  i += 1

Model: <KNN>
	"LLE" cross validation score: [-1.33881304 -0.33397386 -0.67191675 -0.04599274 -0.94195321]
	"LLE" mean cross validation score: -0.6665299225457146 (+/- 0.4524403877544172)

	"LTSA" cross validation score: [-0.74761053 -0.09704964 -0.42123471 -0.20145846 -1.50494999]
	"LTSA" mean cross validation score: -0.5944606660192052 (+/- 0.506817793527769)

	"HLLE" cross validation score: [-1.39277357 -0.17275698 -0.28470042 -0.15244171 -1.32658683]
	"HLLE" mean cross validation score: -0.6658518998653019 (+/- 0.5686817794091726)

	"MLLE" cross validation score: [-0.98937446 -0.36775715 -0.53000288 -0.15969615 -1.4852893 ]
	"MLLE" mean cross validation score: -0.7064239900222219 (+/- 0.4756938324782842)

	"ISOMAP" cross validation score: [-1.11293162 -0.17282115 -0.44728592 -0.16630727 -1.09683238]
	"ISOMAP" mean cross validation score: -0.599235666834885 (+/- 0.42516610531405896)

	"MDS" cross validation score: [-1.06990306 -0.11963214 -0.63416493 -0.14534954 -1.14649463]
	"MDS" m

## 당뇨병 데이터

In [None]:
raw_diabetes, target = datasets.load_diabetes(return_X_y=True)

In [None]:
diabetes_lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                             method='standard', random_state=0).fit_transform(raw_diabetes)
diabetes_ltsa = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='ltsa', random_state=0).fit_transform(raw_diabetes)
diabetes_hlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=3,
                                             method='hessian', random_state=0).fit_transform(raw_diabetes)
diabetes_mlle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2,
                                              method='modified', random_state=0).fit_transform(raw_diabetes)
diabetes_isomap = manifold.Isomap(n_neighbors=30, n_components=2).fit_transform(raw_diabetes)
diabetes_mds = manifold.MDS(n_components=2, random_state=0).fit_transform(raw_diabetes)
diabetes_se = manifold.SpectralEmbedding(n_components=2, random_state=0).fit_transform(raw_diabetes)
diabetes_tsne = manifold.TSNE(n_components=2, random_state=0).fit_transform(raw_diabetes)

In [None]:
diabetes_manifold = [diabetes_lle, diabetes_ltsa, diabetes_hlle, diabetes_mlle, diabetes_isomap, diabetes_mds, diabetes_se, diabetes_tsne]
manifold_name = ['LLE', 'LTSA', 'HLLE', 'MLLE', 'ISOMAP', 'MDS', 'SE', 'TSNE']

### 원본 데이터를 사용할 때

In [None]:
i = 0

for estimator in reg:
  score = cross_val_score(
      estimator=estimator,
      X=raw_diabetes, y=target,
      cv=5
  )
  print('Model: <{}>'.format(model_name[i]))
  print('\tcross validation score: {}'.format(score))
  print('\tmean cross validation score: {} (+/- {})\n'.format(score.mean(), score.std()))
  i += 1

Model: <KNN>
	cross validation score: [0.35008817 0.36602995 0.43172985 0.3231243  0.41156846]
	mean cross validation score: 0.37650814581101094 (+/- 0.03984044198258091)

Model: <SVM>
	cross validation score: [0.14739166 0.12560752 0.18203803 0.12242334 0.15658678]
	mean cross validation score: 0.14680946577304238 (+/- 0.021822887398936373)

Model: <Deicision Tree>
	cross validation score: [-0.25749649 -0.10437255 -0.16869476  0.07708638 -0.20128903]
	mean cross validation score: -0.13095329013801638 (+/- 0.11521436991400158)

Model: <Random Forest>
	cross validation score: [0.37144264 0.50076574 0.42056336 0.36995856 0.41943073]
	mean cross validation score: 0.4164322067979077 (+/- 0.04758584250214636)



### 정제된 데이터를 사용할 때

In [None]:
i = 0

for estimator in reg:
  j = 0
  print('Model: <{}>'.format(model_name[i]))
  for X in diabetes_manifold:
    score = cross_val_score(
        estimator=estimator,
        X=X, y=target,
        cv=5
    )
    print('\t"{}" cross validation score: {}'.format(manifold_name[j], score))
    print('\t"{}" mean cross validation score: {} (+/- {})\n'.format(manifold_name[j], score.mean(), score.std()))
    j += 1
  i += 1

Model: <KNN>
	"LLE" cross validation score: [0.12113113 0.46765251 0.38218372 0.41437627 0.31859728]
	"LLE" mean cross validation score: 0.34078818130085214 (+/- 0.11997083792822473)

	"LTSA" cross validation score: [-0.07886348  0.13720927  0.15650032  0.18060403  0.28470378]
	"LTSA" mean cross validation score: 0.13603078657060633 (+/- 0.11890363216190929)

	"HLLE" cross validation score: [-0.15261761  0.35882038  0.25926034  0.2493607   0.33063481]
	"HLLE" mean cross validation score: 0.20909172269433193 (+/- 0.1855605098190248)

	"MLLE" cross validation score: [0.17675756 0.23721264 0.20312755 0.28434557 0.11342527]
	"MLLE" mean cross validation score: 0.2029737171533541 (+/- 0.057445483274801645)

	"ISOMAP" cross validation score: [0.12679091 0.343113   0.35926966 0.27281668 0.13077716]
	"ISOMAP" mean cross validation score: 0.24655348345297412 (+/- 0.10046458536676843)

	"MDS" cross validation score: [-0.00626013  0.28584868  0.28430492  0.25653428  0.32580058]
	"MDS" mean cross 