### 2024/05/20 グリッドサーチを行う

##### ▶ライブラリを読み込む

In [1]:
import pandas as pd
import numpy as np
import requests
import io
import math
import copy
from scipy.stats import gmean
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import patches
from pipeline_functions import PipelineFunctions
pf = PipelineFunctions()
from view_functions import ViewFunctions
vf = ViewFunctions()

import umap
from sklearn.manifold import TSNE
import seaborn as sns
palette = ['#CC521D', '#4F4AD7', '#39AE3D']

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

##### ▶データの読み込み

In [2]:
PT_CNT = 5
data = pd.read_pickle(f'../data/data_unify_ptcnt/ptcnt_{PT_CNT}.pkl')

##### ▶特徴量を導出用関数を定義

In [5]:
# 元座標を結合
def calc_org_coord(x, y):
    return x + y

# 線分のベクトルを結合
def calc_segment_vec(x, y):
    return [x[i]-x[i-1] for i in range(1, len(x))] + [y[i]-y[i-1] for i in range(1, len(x))]

# 線分のベクトルどうしのコサイン類似度
def calc_segment_cossim(x, y):
    def calc_cos_sim(v1, v2):
        if (np.linalg.norm(v1) * np.linalg.norm(v2)) == 0:
            print('------------ exception segment -------------')
            return 1
        else:
            return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    cos_sim = [calc_cos_sim(np.array([x[i]-x[i-1], y[i]-y[i-1]]), 
                            np.array([x[i-1]-x[i-2], y[i-1]-y[i-2]])) for i in range(2, len(x))]
    return cos_sim

# 線分のベクトルと始点終点間ベクトルのコサイン類似度
def calc_startend_seg_cossim(x, y):
    def calc_cos_sim(v1, v2):
        if (np.linalg.norm(v1) * np.linalg.norm(v2)) == 0:
            print('------------ exception startend -------------')
            return 1
        else:
            return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    startend_vec = np.array([x[-1] - x[0], y[-1] - y[0]])
    vec_x = [x[i]-x[i-1] for i in range(1, len(x))]
    vec_y = [y[i]-y[i-1] for i in range(1, len(y))]
    cos_sim = [calc_cos_sim(startend_vec, np.array([vecx, vecy])) for vecx, vecy in zip(vec_x, vec_y)]
    return cos_sim

# 点上での勾配ベクトルと始点終点間ベクトルのコサイン類似度
def calc_startend_grad_cossim(x, y, grad_x, grad_y):
    def calc_cos_sim(v1, v2):
        if (np.linalg.norm(v1) * np.linalg.norm(v2)) == 0:
            print('------------ exception gradient-------------')
            return 1
        else:
            return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    startend_vec = np.array([x[-1] - x[0], y[-1] - y[0]])
    cos_sim = [calc_cos_sim(startend_vec, np.array([vec_x, vec_y])) for vec_x, vec_y in zip(grad_x, grad_y)]
    return cos_sim

def calc_segment_grad_cossim(grad_x, grad_y):
    def calc_cos_sim(v1, v2):
        if (np.linalg.norm(v1) * np.linalg.norm(v2)) == 0:
            print('------------ exception segment -------------')
            return 1
        else:
            return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    cos_sim = [calc_cos_sim(np.array([grad_x[i-1], grad_y[i-1]]), 
                            np.array([grad_x[i], grad_y[i]])) for i in range(1, len(grad_x))]
    return cos_sim

##### ▶ 座標数の揃え方ごとで特徴量を導出する

In [4]:
PTCNT_COORD_INTERVAL = ['equal_coord_intervel_x', 'equal_coord_intervel_y']
PTCNT_SMALLER_COSSIM = ['smaller_cossim_x', 'smaller_cossim_y']
PTCNT_LEN_INTERVAL = ['equal_interval_x', 'equal_interval_y']
PTCNT_LEN_INTERVAL_GRAD = ['equal_interval_vecx', 'equal_interval_vecy']
PTCNT = [PTCNT_COORD_INTERVAL, PTCNT_SMALLER_COSSIM, PTCNT_LEN_INTERVAL, PTCNT_LEN_INTERVAL_GRAD]

##### ▶　座票数（3~83個）と座標数の揃え方（3種類）の組み合わせでファイルを作成する

In [6]:
for pt_cnt in range(3, 84):
    data = pd.read_pickle(f'../data/data_unify_ptcnt/ptcnt_{pt_cnt}.pkl')
    ptcnt_type = 2
    copy_data = data.copy()
    org_coord = [calc_org_coord(x, y) for x, y in zip(data[PTCNT[ptcnt_type][0]], data[PTCNT[ptcnt_type][1]])]
    segment_vec  = [calc_segment_vec(x, y) for x, y in zip(data[PTCNT[ptcnt_type][0]], data[PTCNT[ptcnt_type][1]])]
    segment_cossim  = [calc_segment_cossim(x, y) for x, y in zip(data[PTCNT[ptcnt_type][0]], data[PTCNT[ptcnt_type][1]])]
    startend_seg_cossim = [calc_startend_seg_cossim(x, y) for x, y in zip(data[PTCNT[ptcnt_type][0]], data[PTCNT[ptcnt_type][1]])]
    feature = pd.DataFrame({
        'org_coord': org_coord,
        'segment_vec': segment_vec,
        'segment_cossim': segment_cossim,
        'startend_seg_cossim': startend_seg_cossim
    })
    
    startend_grad_cossim = [calc_startend_grad_cossim(x, y, grad_x, grad_y) for x, y, grad_x, grad_y in zip(
        data['equal_interval_x'], data['equal_interval_y'], data['equal_interval_vecx'], data['equal_interval_vecy'])]
    
    segment_grad_cossim = [calc_segment_grad_cossim(grad_x, grad_y) for grad_x, grad_y in zip(
        data['equal_interval_vecx'], data['equal_interval_vecy'])]
    
    feature = pd.DataFrame({
        'segment_grad_cossim': segment_grad_cossim,
        'startend_grad_cossim': startend_grad_cossim
    })
    
    data_ptcnt_feature = pd.concat([copy_data, feature], axis=1)
    data_ptcnt_feature.to_pickle(f'../temp/grad_feature/{ptcnt_type}_{pt_cnt}.pkl')
    print(pt_cnt, ' | ', ptcnt_type)

3  |  2
4  |  2
5  |  2
6  |  2
7  |  2
8  |  2
9  |  2
10  |  2
11  |  2
12  |  2
13  |  2
14  |  2
15  |  2
16  |  2
17  |  2
18  |  2
19  |  2
20  |  2
21  |  2
22  |  2
23  |  2
24  |  2
25  |  2
26  |  2
27  |  2
28  |  2
29  |  2
30  |  2
31  |  2
32  |  2
33  |  2
34  |  2
35  |  2
36  |  2
37  |  2
38  |  2
39  |  2
40  |  2
41  |  2
42  |  2
43  |  2
44  |  2
45  |  2
46  |  2
47  |  2
48  |  2
49  |  2
50  |  2
51  |  2
52  |  2
53  |  2
54  |  2
55  |  2
56  |  2
57  |  2
58  |  2
59  |  2
60  |  2
61  |  2
62  |  2
63  |  2
64  |  2
65  |  2
66  |  2
67  |  2
68  |  2
69  |  2
70  |  2
71  |  2
72  |  2
73  |  2
74  |  2
75  |  2
76  |  2
77  |  2
78  |  2
79  |  2
80  |  2
81  |  2
82  |  2
83  |  2


##### ▶特徴点ごとに列を分けたcsvファイルを作成する

In [11]:
# X_cols = ['segment_grad_cossim', 'startend_grad_cossim']
# for pt_cnt in range(3, 84):
#     ptcnt_type = 2
#     data = pd.read_pickle(f'../temp/grad_feature/{ptcnt_type}_{pt_cnt}.pkl')
#     for X_col in X_cols:
#         eachpt_feature = {}
#         eachpt_feature['drawing_id'] = data['drawing_id']
#         eachpt_feature['stroke_id'] = data['stroke_id']
#         eachpt_feature['saito_label'] = data['saito_label']
#         eachpt_feature['is_good_saito'] = data['is_good_saito']
#         eachpt_feature['is_good_rulebase'] = data['is_good_rulebase']
#         for i in range(len(data[X_col].iloc[-1])):
#             eachpt_feature[f'feature_{i}'] = [vals[i] for vals in data[X_col]]
#         df_eachpt_feature = pd.DataFrame(eachpt_feature)
#         file_name = f'../temp/eachpt_feature_grad/{ptcnt_type}_{pt_cnt}_{X_col}.csv'
#         df_eachpt_feature.to_csv(file_name)
#         print(file_name)

##### ▶検証を行う

In [6]:
from sklearn.svm import SVC
def grid_search_svm(train_valid, test, X_col, kernel, c, gamma, degree):
    K = 5
    KFOLD_SEED = 1
    SHUFFLE_LABEL = 'saito_label'
    
    # 学習データと検証データ：入力する特徴量を各特徴点で分ける
    dict_feature_pt_col = {}
    dict_feature_pt_col['drawing_id'] = train_valid['drawing_id']
    dict_feature_pt_col['stroke_id'] = train_valid['stroke_id']
    for i in range(len(train_valid[X_col].iloc[-1])):
        dict_feature_pt_col[f'feature_{i}'] = [vals[i] for vals in train_valid[X_col]]
    feature_pt = pd.DataFrame(dict_feature_pt_col)
    train_valid = pd.concat([train_valid.copy(), feature_pt], axis=1)    
    
    # テストデータ：入力する特徴量を各特徴点で分ける
    dict_feature_pt_col_test = {}
    dict_feature_pt_col_test['drawing_id'] = test['drawing_id']
    dict_feature_pt_col_test['stroke_id'] = test['stroke_id']
    for i in range(len(test[X_col].iloc[-1])):
        dict_feature_pt_col_test[f'feature_{i}'] = [vals[i] for vals in test[X_col]]
    feature_pt_test = pd.DataFrame(dict_feature_pt_col_test)
    test = pd.concat([test.copy(), feature_pt_test], axis=1)

    # K-Fold
    skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=KFOLD_SEED)
    for i, (train_index, valid_index) in enumerate(skf.split(train_valid, train_valid[SHUFFLE_LABEL])):
        
        # 訓練データと検証データに分ける
        train, valid = train_valid.iloc[train_index], train_valid.iloc[valid_index]
        
        # 説明変数と目的変数に分ける
        X_pt_col = [col for col in train_valid if 'feature_' in col]
        y_col = 'saito_label'
        train_X = train.copy()[X_pt_col]
        train_y = train.copy()[y_col]
        valid_X = valid.copy()[X_pt_col]
        valid_y = valid.copy()[y_col]

        test_X = test.copy()[X_pt_col]
        test_y = test.copy()[y_col]
        
        # 学習データのスケーリング(正規化, 学習データのmin, maxを検証データとテストデータに適用)
        scaler = MinMaxScaler()
        for col in train_X:
            train_minmax = scaler.fit(train[[col]])
            train_X[f'norm_{col}'] = scaler.transform(train[[col]])
            valid_X[f'norm_{col}'] = scaler.transform(valid[[col]])
            test_X[f'norm_{col}'] = scaler.transform(test[[col]])
            del train_X[col]
            del valid_X[col]
            del test_X[col]
        
        # モデルへの入力形式に変換する
        train_X = train_X.to_numpy()
        train_y = train_y.to_numpy()
        valid_X = valid_X.to_numpy()
        valid_y = valid_y.to_numpy()
        test_X = test_X.to_numpy()
        test_y = test_y.to_numpy()
        
        # モデルを生成する
        svm_model = SVC(kernel=kernel, C=c, gamma=gamma, degree=degree)
        # モデルを学習させる
        svm_result = svm_model.fit(train_X, train_y)
        # 検証データで精度を算出する
        svm_valid = svm_model.predict(valid_X)
        recall = recall_score(valid_y, svm_valid, average=None)[1]
        precision = precision_score(valid_y, svm_valid, average=None)[1]
        f1 = f1_score(valid_y, svm_valid, average=None)[1]
        accuracy = accuracy_score(valid_y, svm_valid)
        
        print()
        
    # 学習+検証データで学習し, テストデータで精度を算出する
    # 説明変数と目的変数に分ける
    X_pt_col = [col for col in train_valid if 'feature_' in col]
    y_col = 'saito_label'
    train_X = train_valid.copy()[X_pt_col]
    train_y = train_valid.copy()[y_col]
    
    test_X = test.copy()[X_pt_col]
    test_y = test.copy()[y_col]

    # 学習データのスケーリング(正規化, 学習データのmin, maxを検証データとテストデータに適用)
    scaler = MinMaxScaler()
    for col in train_X:
        train_minmax = scaler.fit(train_valid[[col]])
        train_X[f'norm_{col}'] = scaler.transform(train_valid[[col]])
        test_X[f'norm_{col}'] = scaler.transform(test[[col]])
        del train_X[col]
        del test_X[col]
        
    # モデルへの入力形式に変換する
    train_X = train_X.to_numpy()
    train_y = train_y.to_numpy()
    test_X = test_X.to_numpy()
    test_y = test_y.to_numpy()
    
    # モデルを生成する
    svm_model = SVC(kernel=kernel, C=c, gamma=gamma, degree=degree)
    # モデルを学習させる
    svm_result = svm_model.fit(train_X, train_y)
    # 検証データで精度を算出する
    svm_test = svm_model.predict(test_X)
    recall = recall_score(test_y, svm_test, average=None)[1]
    precision = precision_score(test_y, svm_test, average=None)[1]
    f1 = f1_score(test_y, svm_test, average=None)[1]
    accuracy = accuracy_score(test_y, svm_test)
    
    print(recall)
    
    return 

##### ▶実行

In [12]:
# 変えないパラメータ
TRAIN_TEST_SPLIT_SEED = 1
Y_COL = 'saito_label'

# 変えるパラメータ
PTCNT = [i for i in range(3, 84)]
PTCNT_TYPE = [1, 2]
X_COL = ['org_coord', 'segment_vec', 'segment_cossim', 'startend_seg_cossim']
SVM_PARAMETER = pd.read_csv('../temp/svm_parameter.csv', delimiter=',', index_col=0)

# 各組み合わせごとにモデルを作成し検証
for ptcnt_type in PTCNT_TYPE:
    for ptcnt in PTCNT:
        data = pd.read_pickle(f'../data/data_ptcnt_feature_combo/{ptcnt_type}_{ptcnt}.pkl')
        X = data.copy()
        y = data[Y_COL]
        # 学習データとテストデータを725:310に分割する
        train, test, y_train, y_test = train_test_split(
            X, y, train_size=725, shuffle=True, random_state=TRAIN_TEST_SPLIT_SEED, stratify=y)
        
        for X_col in X_COL:
            for i, row in SVM_PARAMETER.iterrows():

                grid_search_svm(train, test, X_col, row['kernel'], row['c'], row['gamma'], row['degree'])
                
                

##### ▶▶SVM

In [None]:
from sklearn.svm import SVC

# ハイパーパラメータ #
svm_parameter = {
    'kernel': ['linear', 'poly', 'rbf'],
    'c': [1, 5, 20, 100],
    'gamma': ['auto', 'scale', 0.1, 0.5, 1.0, 5.0, 20.0],
    'degree': [1, 3, 5, 10],
}


INPUT_COL = 'equal_interval_vecx'
K = 5
SHUFFLE_LABEL = 'saito_label'
SHUFFLE_SEED = 1
Y_COL = 'is_good_saito'
SVM_SEED = 1
###############################################

# 入力する特徴量を各特徴点で分ける
dict_feature_pt_col = {}
for i in range(len(features[INPUT_COL][0])):
    dict_feature_pt_col[f'feature_{i}'] = [vals[i] for vals in features[INPUT_COL]]
input_feature = pd.DataFrame(dict_feature_pt_col)

data_featurepts = pd.concat([data.copy(), input_feature.copy()], axis=1)

###############################################

skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=SEED)
for i, (train_index, test_index) in enumerate(skf.split(data_featurepts, data_featurepts[SHUFFLE_LABEL])):
    # 訓練データとテストデータに分ける
    train, test = data_featurepts.iloc[train_index], data_featurepts.iloc[test_index]
    
    # 説明変数と目的変数に分ける
    X_COL = [col for col in data_featurepts if 'feature_' in col]
    train_X = train.copy()[X_COL]
    train_y = train.copy()[Y_COL]
    test_X = test.copy()[X_COL]
    test_y = test.copy()[Y_COL]
    
    # スケーリング
    scaler = MinMaxScaler()
    for col in train_X:
        train_X[f'norm_{col}'] = scaler.fit_transform(train[[col]])
        del train_X[col]
    
    # モデルへの入力形式に変換する
    train_X = train_X.to_numpy()
    train_y = train_y.to_numpy()
    test_X = test_X.to_numpy()
    test_y = test_y.to_numpy()
    
    # モデルに入力する
    svm_model = SVC(kernel='rbf', gamma='auto', random_state=SVM_SEED)
    svm_result = svm_model.fit(train_X, train_y)
    svm_pred = svm_model.predict(test_X)
    print(confusion_matrix(test_y, svm_pred))
    print(classification_report(test_y, svm_pred))
    
    print('##########################################')

##### ▶ハイパーパラメータを組み合わせたdfを作成

##### ▶▶SVM

In [16]:
from sklearn.svm import SVC

# ハイパーパラメータ #
svm_parameter = {
    'kernel': ['rbf'],
    'c': [1, 5, 20, 100],
    'gamma': [0.01, 0.1, 0.5, 1.0, 5.0, 20.0],
    'degree': [1],
}
#####################
kernels = []
cs = []
gammas = []
degrees = []

for kernel in svm_parameter['kernel']:
    for c in svm_parameter['c']:
        for gamma in svm_parameter['gamma']:
            for degree in svm_parameter['degree']:
                kernels.append(kernel)
                cs.append(c)
                gammas.append(gamma)
                degrees.append(degree)

svm_parameter_combo = pd.DataFrame({
    'kernel': kernels,
    'c': cs,
    'gamma': gammas,
    'degree': degrees
})

svm_parameter_combo.to_csv('../temp/svm_parameter.csv')

In [13]:
df = pd.read_csv('../temp/svm_parameter.csv', delimiter=',', index_col=0)

In [14]:
df

Unnamed: 0,kernel,c,gamma,degree
0,rbf,1,0.5,1
1,rbf,1,1.0,1
2,rbf,1,5.0,1
3,rbf,1,20.0,1
4,rbf,5,0.5,1
5,rbf,5,1.0,1
6,rbf,5,5.0,1
7,rbf,5,20.0,1
8,rbf,20,0.5,1
9,rbf,20,1.0,1


##### ▶svm 1回行う

In [2]:
### 変えるパラメータ ###
PTCNT_TYPE = [1, 2]
PTCNT = [i for i in range(3, 84)]
X_COL = ['org_coord', 'segment_vec', 'segment_cossim', 'startend_seg_cossim']

ptcnt_types = []
ptcnts = []
X_cols = []
for ptcnt_type in PTCNT_TYPE:
    for ptcnt in PTCNT:
        for X_col in X_COL:
            # data = pd.read_csv(f'../temp/eachpt_feature/{ptcnt_type}_{ptcnt}_{X_col}.csv', delimiter=',', index_col=0)
            # X = data.copy()
            # y = data[Y_COL]
            # # 学習データとテストデータを725:310に分割する
            # train_valid, test, y_train_valid, y_test = train_test_split(X, y, train_size=725, shuffle=True, stratify=y, random_state=TRAIN_TEST_SPLIT_SEED)
            
            ptcnt_types.append(ptcnt_type)
            ptcnts.append(ptcnt)
            X_cols.append(X_col)
            
param_combo = pd.DataFrame({
    'ptcnt_type': ptcnt_types,
    'ptcnt': ptcnts,
    'X_col': X_cols
})
param_combo.to_csv('../temp/param_conbo.csv')


ptcnt_types = []
ptcnts = []
X_cols = []
X_COL_GRAD = ['segment_grad_cossim', 'startend_grad_cossim']
for ptcnt in PTCNT:
    for X_col in X_COL_GRAD:        
        ptcnt_types.append(2)
        ptcnts.append(ptcnt)
        X_cols.append(X_col)

param_combo_grad = pd.DataFrame({
    'ptcnt_type': ptcnt_types,
    'ptcnt': ptcnts,
    'X_col': X_cols
})
param_combo_grad.to_csv('../temp/param_conbo_grad.csv')

In [43]:
from sklearn.svm import SVC

### 変えないパラメータ ###
TRAIN_TEST_SPLIT_SEED = 1
KFOLD_SHUFFLE_SEED = 1
KFOLD_SHUFFLE_LABEL = 'saito_label'
Y_COL = 'is_good_saito'
K = 5

### 変えるパラメータ ###
PTCNT_TYPE = [1, 2]
PTCNT = [i for i in range(3, 84)]
X_COL = ['org_coord', 'segment_vec', 'segment_cossim', 'startend_seg_cossim']

### ハイパーパラメータ ###
kernel = 'rbf'
c = 1
gamma = 0.1
degree = 3
#########################
num = 0

ptcnt_types = []
ptcnts = []
X_cols = []
for ptcnt_type in PTCNT_TYPE:
    for ptcnt in PTCNT:
        for X_col in X_COL:
            # data = pd.read_csv(f'../temp/eachpt_feature/{ptcnt_type}_{ptcnt}_{X_col}.csv', delimiter=',', index_col=0)
            # X = data.copy()
            # y = data[Y_COL]
            # # 学習データとテストデータを725:310に分割する
            # train_valid, test, y_train_valid, y_test = train_test_split(X, y, train_size=725, shuffle=True, stratify=y, random_state=TRAIN_TEST_SPLIT_SEED)
            
            ptcnt_types.append(ptcnt_type)
            ptcnts.append(ptcnt)
            X_cols.append(X_col)
            
param_combo = pd.DataFrame({
    'ptcnt_type': ptcnt_types,
    'ptcnt': ptcnts,
    'X_col': X_cols
})
param_combo.to_csv('../temp/param_conbo.csv')


ptcnt_types = []
ptcnts = []
X_cols = []
X_COL_GRAD = ['segment_grad_cossim', 'startend_grad_cossim']
for ptcnt in PTCNT:
    for X_col in X_COL_GRAD:        
        ptcnt_types.append(2)
        ptcnts.append(ptcnt)
        X_cols.append(X_col)

param_combo_grad = pd.DataFrame({
    'ptcnt_type': ptcnt_types,
    'ptcnt': ptcnts,
    'X_col': X_cols
})
param_combo_grad.to_csv('../temp/param_conbo_grad.csv')    

# K-Fold
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=KFOLD_SHUFFLE_SEED)
for i, (train_index, valid_index) in enumerate(skf.split(train_valid, train_valid[KFOLD_SHUFFLE_LABEL])):
    
    ### 訓練データと検証データに分ける ###
    train, valid = train_valid.iloc[train_index], train_valid.iloc[valid_index]
    
    ### 説明変数と目的変数に分ける ###
    X_col = [col for col in train_valid if 'feature_' in col]
    y_col = 'is_good_saito'
    train_X = train.copy()[X_col]
    train_y = train.copy()[y_col]
    valid_X = valid.copy()[X_col]
    valid_y = valid.copy()[y_col]
    
    ### 学習データのスケーリング(正規化, 学習データのmin, maxを検証データとテストデータに適用) ###
    scaler = MinMaxScaler()
    for col in train_X:
        train_minmax = scaler.fit(train_X[[col]])
        train_X[f'norm_{col}'] = scaler.transform(train_X[[col]])
        valid_X[f'norm_{col}'] = scaler.transform(valid_X[[col]])
        del train_X[col]
        del valid_X[col]
    
    ### モデルへの入力形式に変換する ###
    train_X = train_X.to_numpy()
    train_y = train_y.to_numpy()
    valid_X = valid_X.to_numpy()
    valid_y = valid_y.to_numpy()
    
    ### モデルを生成する ###
    model = SVC(kernel=kernel, C=c, gamma=gamma, degree=degree)
    ### モデルを学習させる ###
    result = model.fit(train_X, train_y)
    ### 検証データで精度を算出する ###
    valid_pred = model.predict(valid_X)
    ### 各指標の値(validationデータに対する) ###
    recall = recall_score(valid_y, valid_pred, average=None)[1]
    precision = precision_score(valid_y, valid_pred, average=None)[1]
    f1 = f1_score(valid_y, valid_pred, average=None)[1]
    accuracy = accuracy_score(valid_y, valid_pred)
    print(precision)

### 説明変数と目的変数に分ける ###
X_col = [col for col in train_valid if 'feature_' in col]
y_col = 'is_good_saito'
train_valid_X = train_valid.copy()[X_col]
train_valid_y = train_valid.copy()[y_col]
test_X = test.copy()[X_col]
test_y = test.copy()[y_col]

### 学習データのスケーリング(正規化, 学習データのmin, maxを検証データとテストデータに適用) ###
scaler = MinMaxScaler()
for col in train_valid_X:
    train_minmax = scaler.fit(train_valid_X[[col]])
    train_valid_X[f'norm_{col}'] = scaler.transform(train_valid_X[[col]])
    test_X[f'norm_{col}'] = scaler.transform(test_X[[col]])
    del train_valid_X[col]
    del test_X[col]
    
### モデルへの入力形式に変換する ###
train_valid_X = train_valid_X.to_numpy()
train_valid_y = train_valid_y.to_numpy()
test_X = test_X.to_numpy()
test_y = test_y.to_numpy()

### モデルを生成する ###
model = SVC(kernel=kernel, C=c, gamma=gamma, degree=degree)
### モデルを学習させる ###
result = model.fit(train_X, train_y)
### 検証データで精度を算出する ###
test_pred = model.predict(test_X)
### 各指標の値(testデータに対する) ###
recall = recall_score(test_y, test_pred, average=None)[1]
precision = precision_score(test_y, test_pred, average=None)[1]
f1 = f1_score(test_y, test_pred, average=None)[1]
accuracy = accuracy_score(test_y, test_pred)


0.6275862068965518
0.6275862068965518
0.6206896551724138
0.6206896551724138
0.6275862068965518
0.6225806451612903


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
