In [1]:
import pandas as pd 
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn import datasets 
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE 
from sklearn.model_selection import train_test_split

%load_ext autoreload
%autoreload 2
random_state = 42

# Обучим модели SVC, DecisionTreeClassifier, LogisticRegression

In [2]:
data = datasets.load_digits()
X = data.data
y = data.target

In [3]:
Svc =  SVC(random_state=42)
DecisionTree =  DecisionTreeClassifier(random_state=42)
LogisticReg = LogisticRegression(random_state=42, max_iter=10000)

pca = PCA(n_components=0.90, random_state=random_state)
tsne = TSNE(n_components=2, random_state=42)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
Svc.fit(X_train, y_train)
DecisionTree.fit(X_train, y_train)
LogisticReg.fit(X_train, y_train)

In [6]:
svc_pred = Svc.predict(X_test)
tree_pred = DecisionTree.predict(X_test)
log_reg_pred = LogisticReg.predict(X_test)

# Посмотрим на accuracy:

In [7]:
from sklearn.metrics import accuracy_score

print('SVC:', accuracy_score(y_true=y_test, y_pred=svc_pred))
print('DecisionTree:', accuracy_score(y_true=y_test, y_pred=tree_pred))
print('LogisticReg:', accuracy_score(y_true=y_test, y_pred=log_reg_pred))

SVC: 0.987037037037037
DecisionTree: 0.8425925925925926
LogisticReg: 0.9722222222222222


# Обучим PCA алгоритм и посмотрим на accuracy:

In [8]:
import time


start = time.time()
pca.fit(X_train)
X_train_ = pca.transform(X_train)
print('PCA работал для X_Train:', time.time() - start)

start = time.time()
X_test_ = pca.transform(X_test)
print('PCA работал для X_Test:', time.time() - start)


PCA работал для X_Train: 0.024466753005981445
PCA работал для X_Test: 0.0009989738464355469


In [9]:
n_components_for_var = pca.n_components_
print(f'Для 90% дисперсии PCA нужно {n_components_for_var}  компонент')

Для 90% дисперсии PCA нужно 21  компонент


In [10]:
Svc.fit(X_train_, y_train)
DecisionTree.fit(X_train_, y_train)
LogisticReg.fit(X_train_, y_train)

svc_pred = Svc.predict(X_test_)
tree_pred = DecisionTree.predict(X_test_)
log_reg_pred = LogisticReg.predict(X_test_)

In [11]:
print('SVC:', accuracy_score(y_true=y_test, y_pred=svc_pred))
print('DecisionTree:', accuracy_score(y_true=y_test, y_pred=tree_pred))
print('LogisticReg:', accuracy_score(y_true=y_test, y_pred=log_reg_pred))

SVC: 0.9888888888888889
DecisionTree: 0.8574074074074074
LogisticReg: 0.9537037037037037


# Обучим TSNE и посмотрим на accuracy:

In [12]:
start = time.time()
X_ = tsne.fit_transform(X)
print('PCA работал для X_train и X_Test:', time.time() - start)

X_train_, X_test_, y_train, y_test = train_test_split(X_, y, test_size=0.3, random_state=42)

PCA работал для X_train и X_Test: 10.662317752838135


In [13]:
Svc.fit(X_train_, y_train)
DecisionTree.fit(X_train_, y_train)
LogisticReg.fit(X_train_, y_train)

svc_pred = Svc.predict(X_test_)
tree_pred = DecisionTree.predict(X_test_)
log_reg_pred = LogisticReg.predict(X_test_)

In [14]:
print('SVC:', accuracy_score(y_true=y_test, y_pred=svc_pred))
print('DecisionTree:', accuracy_score(y_true=y_test, y_pred=tree_pred))
print('LogisticReg:', accuracy_score(y_true=y_test, y_pred=log_reg_pred))

SVC: 0.975925925925926
DecisionTree: 0.9777777777777777
LogisticReg: 0.9277777777777778


# Результаты:
## Accuracy без всего:

SVC: 0.987037037037037

DecisionTree: 0.8425925925925926

LogisticReg: 0.9722222222222222

## Accuracy с PCA:

SVC: 0.9888888888888889

DecisionTree: 0.8574074074074074

LogisticReg: 0.9537037037037037

## Accuracy с PCA:

SVC: 0.975925925925926

DecisionTree: 0.9777777777777777

LogisticReg: 0.9277777777777778

# Резюмируем:
1) Лучше всего сработал SVC с PCA.
2) Для Решаюшего дерева TSNE дал очень сильный прирост метрики
3) Для Логистической регрессии оба преобразования дали ухудшение метрик

# Для сохранения 90% дисперии PCA алгоритму необходимо: 
21 компонента 

# Время работы алгоритмов уменьшения размерности:
TSNE: 10.662317752838135 сек

PCA: 0.040997982025146484 сек