# scikit-learn付属のデータセットの読み込み（wine）
<https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_wine.html#sklearn.datasets.load_wine>

In [0]:
import numpy as np
import sklearn
from sklearn.datasets import load_wine
wine_data = sklearn.datasets.load_wine()

In [15]:
# データの確認
wine_data

 'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 'feature_names': ['alcohol',
  'malic_acid',
  'ash',
  'alcalinity_of_ash',
  'magnesium',
  'total_phenols',
  'flavanoids',
  'nonflavanoid_phenols',
  'proanthocyanins',
  'color_intensity',
  'hue',
  'od280/od315_of_diluted_wines',
  'proline'],
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 

In [0]:
# データセットを学習用とテスト用に分ける
N = len(wine_data.data)
index = np.arange(N)
np.random.seed(2019)
np.random.shuffle(index)

# 訓練データ（8割を訓練データに）
X_train, t_train = wine_data.data[index[:int(0.8*N)]], wine_data.target[index[:int(0.8*N)]]
# テスト用データ（残りの2割）
X_test, t_test = wine_data.data[index[int(0.8*N):]], wine_data.target[index[int(0.8*N):]]

# ロジスティック回帰による分類

In [6]:
# モデルの学習
from sklearn.linear_model import LogisticRegression
sk_logit = LogisticRegression()
sk_logit.fit(X_train, t_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [0]:
# テストデータの分類
predict = sk_logit.predict(X_test)

In [9]:
predict

array([2, 0, 2, 1, 1, 2, 1, 0, 1, 2, 1, 1, 0, 1, 2, 0, 1, 2, 1, 0, 0, 2,
       1, 2, 0, 2, 0, 1, 2, 0, 0, 1, 0, 2, 1, 1])

In [10]:
# 分類精度
from sklearn.metrics import accuracy_score
accuracy_score(predict, t_test)

0.9166666666666666

# 演習
いくつかの機械学習アルゴリズムを適用してみましょう

In [0]:
# SVM

In [18]:
from sklearn.svm import SVC
svm_clf = SVC(kernel='linear',gamma=2,C=0.025)
svm_clf.fit(X_train, t_train)

SVC(C=0.025, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=2, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [0]:
# テストデータの分類
predict = svm_clf.predict(X_test)

In [23]:
predict

array([2, 0, 2, 1, 1, 2, 1, 0, 1, 1, 1, 1, 0, 1, 2, 0, 1, 2, 1, 0, 0, 2,
       1, 2, 0, 2, 0, 1, 2, 0, 0, 0, 0, 2, 1, 1])

In [24]:
# 分類精度
from sklearn.metrics import accuracy_score
accuracy_score(predict, t_test)

0.9722222222222222