In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from mlxtend.plotting import plot_decision_regions
from sklearn import datasets

In [0]:
wine = datasets.load_wine()
X = wine.data
y = wine.target

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

# 標準化
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std =sc.transform(X_test)

X_train_std.shape,  X_test_std.shape

In [0]:
from sklearn.decomposition import PCA
# 削除後の次元を2に指定し、主成分分析を実行
PCA = PCA(n_components=2)
# 訓練データで主成分分析のモデル作成
X_train_pca = PCA.fit_transform(X_train_std)
# 訓練データで作成したモデルでテストデータを主成分分析
X_test_pca = PCA.transform(X_test_std)

In [0]:
# 第2主成分までの主成分分析の結果
print("固有値")
print(PCA.explained_variance_) 
print("因子寄与率")
print(PCA.explained_variance_ratio_)

In [0]:
print("固有ベクトルの形状")
print(PCA.components_.shape)
print("固有ベクトル")
print(PCA.components_)

In [0]:
X_train_pca.shape, X_test_pca.shape

In [0]:
print('先頭5件の削除後の特徴量') 
print(X_train_pca[:5])

In [0]:
# ロジスティック回帰モデルを作成
model = LogisticRegression( multi_class='ovr', max_iter=100, solver='liblinear', penalty='l2', random_state=0)

model.fit(X_train_pca, y_train)

y_test_pred = model.predict(X_test_pca)
ac_score = accuracy_score(y_test, y_test_pred)
print('accuracy = %.2f' % (ac_score))

In [0]:
# 訓練データのプロット
plt.figure(figsize=(8,4))
plot_decision_regions(X_train_pca, y_train, model)

In [0]:
# テストデータのプロット
plt.figure(figsize=(8,4))
plot_decision_regions(X_test_pca, y_test, model) 

In [0]:
from sklearn.decomposition import PCA
# 削除後の次元を指定しないで、主成分分析を実行
PCA2 = PCA(n_components=None)
X_train_pca2 = PCA2.fit_transform(X_train_std)

In [0]:
# 全ての固有値の主成分分析の結果
print("固有値")
print(PCA2.explained_variance_)
print("因子寄与率")
print(PCA2.explained_variance_ratio_)

In [0]:
# 次元数と累積寄与率
ratio = PCA2.explained_variance_ratio_
ratio = np.hstack([0, ratio.cumsum()])

plt.figure(figsize=(8,4)) #プロットのサイズ指定
plt.plot(ratio)
plt.ylabel('Cumulative contribution rate')
plt.xlabel('Principal component index k')
plt.title('Wine dataset')

plt.show()

In [0]:
from sklearn.decomposition import PCA
# 累積寄与率を指定して、主成分分析を実行
PCA3 = PCA(n_components=0.8)
X_train_pca3 = PCA3.fit_transform(X_train_std)

# 指定した累積寄与率を超えるよう主成分分析した結果
print("固有値")
print(PCA3.explained_variance_)
print("因子寄与率")
print(PCA3.explained_variance_ratio_)