In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import random
from decimal import Decimal, ROUND_HALF_UP
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report

In [2]:
def scratch_train_test_split(X, y, train_size):
    """
    検証データを分割する。

    Parameters
    ----------
    X : 次の形のndarray, shape (n_samples, n_features)
      訓練データ
    y : 次の形のndarray, shape (n_samples, )
      正解値
    train_size : float (0<train_size<1)
      何割をtrainとするか指定

    Returns
    ----------
    X_train : 次の形のndarray, shape (n_samples, n_features)
      訓練データ
    X_test : 次の形のndarray, shape (n_samples, n_features)
      検証データ
    y_train : 次の形のndarray, shape (n_samples, )
      訓練データの正解値
    y_test : 次の形のndarray, shape (n_samples, )
      検証データの正解値
    """
    #ここにコードを書く

    # 乱数を初期化
    random.seed(0)
    
    #trainデータの数を計算
    n_X_train = int(Decimal(str(len(X) * train_size)).quantize(Decimal('0'), rounding=ROUND_HALF_UP))
    
    #trainデータの数だけランダムにデータを抽出
    list_index_train = []
    for i in np.random.choice((len(X) - 1), n_X_train, replace=False):
        list_index_train.append(i)
        
    #testデータを抽出
    list_index_test = list(set(range(len(X))) - set(list_index_train))
    
    X_train = X[list_index_train]
    X_test = X[list_index_test]
    y_train = y[list_index_train]
    y_test = y[list_index_test]
    
    pass
    return X_train, X_test, y_train, y_test

In [3]:
#学習から検証までを関数化
def learning_to_verification(X, y, train_size, model_criteria):
    # 訓練データと検証データの分割。訓練データ75%、検証データ25%として分割する。
    X_train, X_test, y_train, y_test = scratch_train_test_split(X, y, train_size)
    
    #標準化
    scaler = StandardScaler()
    scaler.fit(X_train) #訓練用のデータでfit
    X_train_std = scaler.transform(X_train) #訓練用データをtransform
    X_test_std = scaler.transform(X_test) #検証用データをtransform

    #学習〜予測
    clf = model_criteria
    clf.fit(X_train_std, y_train) # 学習
    pred = clf.predict(X_test_std) #クラスの予測

    # 評価
    print("正解率：{:.2f}".format(accuracy_score(y_test, pred)))
    print("適合率：{:.2f}".format(precision_score(y_test, pred, pos_label=1)))
    print("再現率：{:.2f}".format(recall_score(y_test, pred, pos_label=1)))
    print("F値：{:.2f}".format(f1_score(y_test, pred, pos_label=1)))
    print("混同行列：")
    print(confusion_matrix(y_test, pred))
    print("classification_report：") #評価をまとめて出力するやつ
    print(classification_report(y_test, pred))

In [4]:
#irisデータセットを取得
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names) #説明変数
y = pd.DataFrame(iris.target, columns=["Species"]) #目的変数
df = pd.concat([X, y], axis=1)#説明変数と目的変数を結合
df = df.replace({"Species": dict(enumerate(iris.target_names))}) #Speciesの要素をtarget_namesに変更
df_versicolor_virginica = df.query('Species == ["versicolor",  "virginica"]')# versicolorとvirginicaを抽出
df_versicolor_virginica = df_versicolor_virginica.replace({"Species": {'versicolor': 1,  'virginica': 2}})

#ndarrayに変換
X_data = np.array(df_versicolor_virginica.iloc[:, :4])
y_data = np.array(df_versicolor_virginica["Species"])

model_criteria = SGDClassifier(loss="log")

learning_to_verification(X_data, y_data, 0.75, model_criteria) #学習から検証までを関数で実行

正解率：1.00
適合率：1.00
再現率：1.00
F値：1.00
混同行列：
[[12  0]
 [ 0 13]]
classification_report：
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        13

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25



In [12]:
X_data

array([[7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3

In [14]:
X_data[y_data==2][:, 0]

array([6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8,
       5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2,
       6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. ,
       6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])

In [20]:
y_data==2

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [5]:
#グラフ化の関数
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches
def decision_region(X, y, model, step=0.01, title='decision region', xlabel='xlabel', ylabel='ylabel', target_names=['versicolor', 'virginica']):
    # setting
    scatter_color = ['red', 'blue']
    contourf_color = ['pink', 'skyblue']
    n_class = 2
    # pred
    mesh_f0, mesh_f1  = np.meshgrid(np.arange(np.min(X[:,0])-0.5, np.max(X[:,0])+0.5, step), np.arange(np.min(X[:,1])-0.5, np.max(X[:,1])+0.5, step))
    mesh = np.c_[np.ravel(mesh_f0),np.ravel(mesh_f1)]
    y_pred = model.predict(mesh).reshape(mesh_f0.shape)
    # plot
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.contourf(mesh_f0, mesh_f1, y_pred, n_class-1, cmap=ListedColormap(contourf_color))
    plt.contour(mesh_f0, mesh_f1, y_pred, n_class-1, colors='y', linewidths=3, alpha=0.5)
    for i, target in enumerate(set(y)):
        plt.scatter(X[y==target][:, 0], X[y==target][:, 1], s=80, color=scatter_color[i], label=target_names[i], marker='o')
    patches = [mpatches.Patch(color=scatter_color[i], label=target_names[i]) for i in range(n_class)]
    plt.legend(handles=patches)
    plt.legend()
    plt.show()

In [25]:

mesh = [mesh_f0, mesh_f1, mesh_f2, mesh_f3]
for i in range(X_n_columns):
    mesh_f0 = np.meshgrid(np.arange(np.min(X[:,0])-0.5, np.max(X[:,0])+0.5, step), np.arange(np.min(X[:,1])-0.5, np.max(X[:,1])+0.5, step))
mesh = np.c_[np.ravel(mesh_f0),np.ravel(mesh_f1)]
y_pred = clf.predict(mesh).reshape(mesh_f0.shape)

NameError: name 'c' is not defined

In [40]:
mesh = {0 : "mesh_f0", 1 : "mesh_f1", 2 : "mesh_f2", 3 :"mesh_f3"}
mesh(key:0)

SyntaxError: invalid syntax (<ipython-input-40-8a8f617f4eba>, line 2)