# scikit-learn

## 機械学習モデル

### 試験データ（iris）をロードする

***

In [55]:
from sklearn.datasets import load_iris

iris = load_iris()

print(iris.data.shape)
print(iris.target_names)
print(iris.target[:2])
print(iris.data[:2])

(150, 4)
['setosa' 'versicolor' 'virginica']
[0 0]
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]]


### 学習用及び予測用データの準備

***

In [56]:
from sklearn.model_selection import train_test_split

(train_X, test_X, train_Y, test_Y) = train_test_split(iris.data, iris.target, test_size=0.2)

print(iris.target_names[test_Y])
print(test_Y)
print(test_X)

['versicolor' 'virginica' 'versicolor' 'setosa' 'versicolor' 'versicolor'
 'virginica' 'setosa' 'setosa' 'versicolor' 'virginica' 'setosa' 'setosa'
 'versicolor' 'virginica' 'setosa' 'virginica' 'versicolor' 'setosa'
 'virginica' 'virginica' 'versicolor' 'versicolor' 'setosa' 'virginica'
 'virginica' 'setosa' 'setosa' 'versicolor' 'versicolor']
[1 2 1 0 1 1 2 0 0 1 2 0 0 1 2 0 2 1 0 2 2 1 1 0 2 2 0 0 1 1]
[[5.6 2.5 3.9 1.1]
 [5.9 3.  5.1 1.8]
 [6.2 2.9 4.3 1.3]
 [5.5 4.2 1.4 0.2]
 [5.  2.  3.5 1. ]
 [5.8 2.7 3.9 1.2]
 [5.7 2.5 5.  2. ]
 [5.1 3.8 1.9 0.4]
 [4.6 3.6 1.  0.2]
 [5.6 2.9 3.6 1.3]
 [6.4 3.2 5.3 2.3]
 [5.3 3.7 1.5 0.2]
 [5.5 3.5 1.3 0.2]
 [6.6 3.  4.4 1.4]
 [6.3 3.4 5.6 2.4]
 [5.7 3.8 1.7 0.3]
 [4.9 2.5 4.5 1.7]
 [6.  2.9 4.5 1.5]
 [4.4 3.  1.3 0.2]
 [7.2 3.  5.8 1.6]
 [6.8 3.  5.5 2.1]
 [6.3 2.3 4.4 1.3]
 [7.  3.2 4.7 1.4]
 [5.  3.5 1.3 0.3]
 [5.8 2.7 5.1 1.9]
 [6.7 2.5 5.8 1.8]
 [4.3 3.  1.1 0.1]
 [5.  3.4 1.6 0.4]
 [5.5 2.4 3.7 1. ]
 [6.1 3.  4.6 1.4]]


### K近傍法

***

In [57]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = KNeighborsClassifier()
model.fit(train_X, iris.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(iris.target_names[test_Y], pred)))
print(classification_report(iris.target_names[test_Y], pred))
print(confusion_matrix(iris.target_names[test_Y], pred))

score:0.9666666666666667
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.92      1.00      0.96        11
   virginica       1.00      0.89      0.94         9

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

[[10  0  0]
 [ 0 11  0]
 [ 0  1  8]]


### ロジスティック回帰

***

In [58]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = LogisticRegression()
model.fit(train_X, iris.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(iris.target_names[test_Y], pred)))
print(classification_report(iris.target_names[test_Y], pred))
print(confusion_matrix(iris.target_names[test_Y], pred))

score:0.9666666666666667
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.92      1.00      0.96        11
   virginica       1.00      0.89      0.94         9

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

[[10  0  0]
 [ 0 11  0]
 [ 0  1  8]]


### 単純パーセプトロン

***

In [59]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = Perceptron(max_iter=100)
model.fit(train_X, iris.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(iris.target_names[test_Y], pred)))
print(classification_report(iris.target_names[test_Y], pred))
print(confusion_matrix(iris.target_names[test_Y], pred))

score:0.6333333333333333
              precision    recall  f1-score   support

      setosa       0.56      1.00      0.71        10
  versicolor       0.00      0.00      0.00        11
   virginica       0.75      1.00      0.86         9

    accuracy                           0.63        30
   macro avg       0.44      0.67      0.52        30
weighted avg       0.41      0.63      0.50        30

[[10  0  0]
 [ 8  0  3]
 [ 0  0  9]]


  _warn_prf(average, modifier, msg_start, len(result))


### 多層パーセプトロン（MLP）

***

In [60]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = MLPClassifier(max_iter=700)
model.fit(train_X, iris.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(iris.target_names[test_Y], pred)))
print(classification_report(iris.target_names[test_Y], pred))
print(confusion_matrix(iris.target_names[test_Y], pred))

score:1.0
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00        11
   virginica       1.00      1.00      1.00         9

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

[[10  0  0]
 [ 0 11  0]
 [ 0  0  9]]


### SVM（Support Vector Machine）

***

In [61]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = SVC(C=100., gamma=0.001)
model.fit(train_X, iris.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(iris.target_names[test_Y], pred)))
print(classification_report(iris.target_names[test_Y], pred))
print(confusion_matrix(iris.target_names[test_Y], pred))

score:0.9666666666666667
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.92      1.00      0.96        11
   virginica       1.00      0.89      0.94         9

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

[[10  0  0]
 [ 0 11  0]
 [ 0  1  8]]


### 教師なし学習（K平均法）

***

In [62]:
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = KMeans(n_clusters=3)
model.fit(train_X, iris.target_names[train_Y])

pred = model.fit_predict(iris.data)
print(iris.target)
print(pred)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 2 0 2 2 2 2
 2 2 0 0 2 2 2 2 0 2 0 2 0 2 2 0 0 2 2 2 2 2 0 2 2 2 2 0 2 2 2 0 2 2 2 0 2
 2 0]


## 様々な試験データの利用

### 試験データ（digits）をロードする

***

In [63]:
from sklearn.datasets import load_digits

digits = load_digits()

print(digits.data.shape)
print(digits.target_names)
print(digits.target[:2])
print(digits.data[:2])

(1797, 64)
[0 1 2 3 4 5 6 7 8 9]
[0 1]
[[ 0.  0.  5. 13.  9.  1.  0.  0.  0.  0. 13. 15. 10. 15.  5.  0.  0.  3.
  15.  2.  0. 11.  8.  0.  0.  4. 12.  0.  0.  8.  8.  0.  0.  5.  8.  0.
   0.  9.  8.  0.  0.  4. 11.  0.  1. 12.  7.  0.  0.  2. 14.  5. 10. 12.
   0.  0.  0.  0.  6. 13. 10.  0.  0.  0.]
 [ 0.  0.  0. 12. 13.  5.  0.  0.  0.  0.  0. 11. 16.  9.  0.  0.  0.  0.
   3. 15. 16.  6.  0.  0.  0.  7. 15. 16. 16.  2.  0.  0.  0.  0.  1. 16.
  16.  3.  0.  0.  0.  0.  1. 16. 16.  6.  0.  0.  0.  0.  1. 16. 16.  6.
   0.  0.  0.  0.  0. 11. 16. 10.  0.  0.]]


### 学習用及び予測用データの準備

***

In [64]:
from sklearn.model_selection import train_test_split

(train_X, test_X, train_Y, test_Y) = train_test_split(digits.data, digits.target, test_size=0.2, random_state=0)

print(digits.target_names[test_Y])
print(test_Y)
print(test_X)

[2 8 2 6 6 7 1 9 8 5 2 8 6 6 6 6 1 0 5 8 8 7 8 4 7 5 4 9 2 9 4 7 6 8 9 4 3
 1 0 1 8 6 7 7 1 0 7 6 2 1 9 6 7 9 0 0 5 1 6 3 0 2 3 4 1 9 2 6 9 1 8 3 5 1
 2 8 2 2 9 7 2 3 6 0 5 3 7 5 1 2 9 9 3 1 7 7 4 8 5 8 5 5 2 5 9 0 7 1 4 7 3
 4 8 9 7 9 8 2 6 5 2 5 8 4 8 7 0 6 1 5 9 9 9 5 9 9 5 7 5 6 2 8 6 9 6 1 5 1
 5 9 9 1 5 3 6 1 8 9 8 7 6 7 6 5 6 0 8 8 9 8 6 1 0 4 1 6 3 8 6 7 4 5 6 3 0
 3 3 3 0 7 7 5 7 8 0 7 8 9 6 4 5 0 1 4 6 4 3 3 0 9 5 9 2 1 4 2 1 6 8 9 2 4
 9 3 7 6 2 3 3 1 6 9 3 6 3 2 2 0 7 6 1 1 9 7 2 7 8 5 5 7 5 2 3 7 2 7 5 5 7
 0 9 1 6 5 9 7 4 3 8 0 3 6 4 6 3 2 6 8 8 8 4 6 7 5 2 4 5 3 2 4 6 9 4 5 4 3
 4 6 2 9 0 1 7 2 0 9 6 0 4 2 0 7 9 8 5 4 8 2 8 4 3 7 2 6 9 1 5 1 0 8 2 1 9
 5 6 8 2 7 2 1 5 1 6 4 5 0 9 4 1 1 7 0 8 9 0 5 4 3 8 8]
[2 8 2 6 6 7 1 9 8 5 2 8 6 6 6 6 1 0 5 8 8 7 8 4 7 5 4 9 2 9 4 7 6 8 9 4 3
 1 0 1 8 6 7 7 1 0 7 6 2 1 9 6 7 9 0 0 5 1 6 3 0 2 3 4 1 9 2 6 9 1 8 3 5 1
 2 8 2 2 9 7 2 3 6 0 5 3 7 5 1 2 9 9 3 1 7 7 4 8 5 8 5 5 2 5 9 0 7 1 4 7 3
 4 8 9 7 9 8 2 6 5 2 5 8 4 8 7 0 6 1 5 9 9 9

### K近傍法

***

In [65]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = KNeighborsClassifier()
model.fit(train_X, digits.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(digits.target_names[test_Y], pred)))
print(classification_report(digits.target_names[test_Y], pred))
print(confusion_matrix(digits.target_names[test_Y], pred))

score:0.975
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       0.97      0.97      0.97        35
           2       1.00      0.97      0.99        36
           3       0.91      1.00      0.95        29
           4       1.00      0.97      0.98        30
           5       0.95      0.97      0.96        40
           6       1.00      1.00      1.00        44
           7       0.95      1.00      0.97        39
           8       1.00      0.90      0.95        39
           9       0.98      0.98      0.98        41

    accuracy                           0.97       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.97      0.97       360

[[27  0  0  0  0  0  0  0  0  0]
 [ 0 34  0  0  0  1  0  0  0  0]
 [ 0  0 35  1  0  0  0  0  0  0]
 [ 0  0  0 29  0  0  0  0  0  0]
 [ 0  0  0  0 29  0  0  1  0  0]
 [ 0  0  0  0  0 39  0  0  0  1]
 [ 0  0  0  0  0  0 44  0  0  0

### ロジスティック回帰

***

In [66]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = LogisticRegression()
model.fit(train_X, digits.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(digits.target_names[test_Y], pred)))
print(classification_report(digits.target_names[test_Y], pred))
print(confusion_matrix(digits.target_names[test_Y], pred))

score:0.9666666666666667
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       0.92      0.97      0.94        35
           2       0.97      0.97      0.97        36
           3       0.97      1.00      0.98        29
           4       0.97      0.97      0.97        30
           5       0.97      0.93      0.95        40
           6       1.00      0.98      0.99        44
           7       0.97      0.97      0.97        39
           8       0.97      0.92      0.95        39
           9       0.93      0.98      0.95        41

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360

[[27  0  0  0  0  0  0  0  0  0]
 [ 0 34  0  0  0  0  0  0  1  0]
 [ 0  0 35  1  0  0  0  0  0  0]
 [ 0  0  0 29  0  0  0  0  0  0]
 [ 0  0  0  0 29  0  0  1  0  0]
 [ 0  0  0  0  0 37  0  0  0  3]
 [ 0  1  0  0  0  

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


### 単純パーセプトロン

***

In [67]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = Perceptron(max_iter=100)
model.fit(train_X, digits.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(digits.target_names[test_Y], pred)))
print(classification_report(digits.target_names[test_Y], pred))
print(confusion_matrix(digits.target_names[test_Y], pred))

score:0.9388888888888889
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       0.86      0.91      0.89        35
           2       0.97      0.97      0.97        36
           3       1.00      0.93      0.96        29
           4       0.94      1.00      0.97        30
           5       0.95      0.97      0.96        40
           6       0.96      1.00      0.98        44
           7       1.00      0.95      0.97        39
           8       0.78      0.90      0.83        39
           9       1.00      0.78      0.88        41

    accuracy                           0.94       360
   macro avg       0.95      0.94      0.94       360
weighted avg       0.94      0.94      0.94       360

[[27  0  0  0  0  0  0  0  0  0]
 [ 0 32  0  0  0  0  1  0  2  0]
 [ 0  0 35  0  0  0  0  0  1  0]
 [ 0  0  0 27  0  1  0  0  1  0]
 [ 0  0  0  0 30  0  0  0  0  0]
 [ 0  1  0  0  0 39  0  0  0  0]
 [ 0  0  0  0  0  

### 多層パーセプトロン（MLP）

***

In [68]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = MLPClassifier(max_iter=700)
model.fit(train_X, digits.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(digits.target_names[test_Y], pred)))
print(classification_report(digits.target_names[test_Y], pred))
print(confusion_matrix(digits.target_names[test_Y], pred))

score:0.9722222222222222
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       0.94      0.94      0.94        35
           2       1.00      1.00      1.00        36
           3       0.97      1.00      0.98        29
           4       0.94      1.00      0.97        30
           5       0.97      0.97      0.97        40
           6       0.98      0.98      0.98        44
           7       1.00      0.97      0.99        39
           8       0.95      0.90      0.92        39
           9       0.98      0.98      0.98        41

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360

[[27  0  0  0  0  0  0  0  0  0]
 [ 0 33  0  0  0  0  0  0  2  0]
 [ 0  0 36  0  0  0  0  0  0  0]
 [ 0  0  0 29  0  0  0  0  0  0]
 [ 0  0  0  0 30  0  0  0  0  0]
 [ 0  0  0  0  0 39  0  0  0  1]
 [ 0  1  0  0  0  

### SVM（Support Vector Machine）

***

In [69]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = SVC(C=100., gamma=0.001)
model.fit(train_X, digits.target_names[train_Y])

pred = model.predict(test_X)
print('score:{0}'.format(accuracy_score(digits.target_names[test_Y], pred)))
print(classification_report(digits.target_names[test_Y], pred))
print(confusion_matrix(digits.target_names[test_Y], pred))

score:0.9916666666666667
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       0.97      1.00      0.99        35
           2       1.00      1.00      1.00        36
           3       1.00      1.00      1.00        29
           4       1.00      1.00      1.00        30
           5       0.97      0.97      0.97        40
           6       1.00      1.00      1.00        44
           7       1.00      1.00      1.00        39
           8       1.00      0.97      0.99        39
           9       0.98      0.98      0.98        41

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360

[[27  0  0  0  0  0  0  0  0  0]
 [ 0 35  0  0  0  0  0  0  0  0]
 [ 0  0 36  0  0  0  0  0  0  0]
 [ 0  0  0 29  0  0  0  0  0  0]
 [ 0  0  0  0 30  0  0  0  0  0]
 [ 0  0  0  0  0 39  0  0  0  1]
 [ 0  0  0  0  0  

### 教師なし学習（K平均法）

***

In [71]:
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = KMeans(n_clusters=3)
model.fit(train_X, digits.target_names[train_Y])

pred = model.fit_predict(digits.data)
print(digits.target[:100].reshape(10, 10))
print(pred[:100].reshape(10, 10))

[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]
 [0 9 5 5 6 5 0 9 8 9]
 [8 4 1 7 7 3 5 1 0 0]
 [2 2 7 8 2 0 1 2 6 3]
 [3 7 3 3 4 6 6 6 4 9]
 [1 5 0 9 5 2 8 2 0 0]
 [1 7 6 3 2 1 7 4 6 3]
 [1 3 9 1 7 6 8 4 3 1]]
[[2 0 0 1 2 1 2 0 1 1]
 [2 0 1 1 2 0 2 0 0 1]
 [2 0 1 1 2 0 2 0 0 1]
 [2 1 0 0 2 0 2 1 0 1]
 [0 2 0 0 0 1 0 0 2 2]
 [0 0 0 0 0 2 0 0 2 1]
 [1 0 1 1 2 2 2 2 0 0]
 [0 0 2 1 1 0 0 0 2 2]
 [0 0 2 1 1 0 0 0 2 1]
 [0 1 1 0 0 2 0 2 1 0]]
