In [2]:
import pandas as pd
import numpy as np
import requests
import io
import math
import copy
from scipy.stats import gmean
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import patches
from pipeline_functions import PipelineFunctions
pf = PipelineFunctions()
from view_functions import ViewFunctions
vf = ViewFunctions()

import umap
from sklearn.manifold import TSNE
import seaborn as sns
palette = ['#CC521D', '#4F4AD7', '#39AE3D']

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
Y_COL = 'is_good_saito'
TRAIN_TEST_SPLIT_SEED = 1

data = pd.read_csv(f'../temp/eachpt_feature/1_3_org_coord.csv', delimiter=',', index_col=0)
X = data.copy()
y = data[Y_COL]
# 学習データとテストデータを725:310に分割する
train_valid, test, y_train_valid, y_test = train_test_split(X, y, train_size=725, shuffle=True, stratify=y, random_state=TRAIN_TEST_SPLIT_SEED)

tp = len([ans for pred, ans in zip(test['is_good_rulebase'], test['is_good_saito']) if pred == 1 and ans == 1])
tn = len([ans for pred, ans in zip(test['is_good_rulebase'], test['is_good_saito']) if pred == 0 and ans == 0])
fp = len([ans for pred, ans in zip(test['is_good_rulebase'], test['is_good_saito']) if pred == 1 and ans == 0])
fn = len([ans for pred, ans in zip(test['is_good_rulebase'], test['is_good_saito']) if pred == 0 and ans == 1])

recall = tp / (tp + fn)
precision = tp / (tp + fp)
f_measure = 2 * (precision * recall) / (precision + recall)
print('TP : ', tp)
print('TF : ', tn)
print('FP : ', fp)
print('FN : ', fn)
print('recall : ', round(recall, 4))
print('precision : ', round(precision, 4))
print('f_measure : ', round(f_measure, 4))

TP :  192
TF :  102
FP :  15
FN :  1
recall :  0.9948
precision :  0.9275
f_measure :  0.96


In [4]:
[(did, sid) for did, sid, pred, ans in zip(test['drawing_id'], test['stroke_id'],test['is_good_rulebase'], test['is_good_saito']) if pred == 1 and ans == 0]

[(960, 1801),
 (3, 615),
 (958, 2276),
 (21, 568),
 (50, 93),
 (956, 2232),
 (138, 15),
 (352, 1443),
 (324, 803),
 (3, 1035),
 (324, 1788),
 (959, 2148),
 (142, 1636),
 (3, 901),
 (100, 2244)]

In [6]:
# K-Fold
TRAIN_TEST_SPLIT_SEED = 1
KFOLD_SHUFFLE_SEED = 1
KFOLD_SHUFFLE_LABEL = 'saito_label'
K = 5

data = pd.read_csv(f'../temp/eachpt_feature/1_3_org_coord.csv', delimiter=',', index_col=0)
X = data.copy()
y = data[Y_COL]
# 学習データとテストデータを725:310に分割する
train_valid, test, y_train_valid, y_test = train_test_split(X, y, train_size=725, shuffle=True, stratify=y, random_state=TRAIN_TEST_SPLIT_SEED)

recalls = []
precisions = []
f1s = []
accuracies = []


skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=KFOLD_SHUFFLE_SEED)
for k, (train_index, valid_index) in enumerate(skf.split(train_valid, train_valid[KFOLD_SHUFFLE_LABEL])):
    
    ### 訓練データと検証データに分ける ###
    train, valid = train_valid.iloc[train_index], train_valid.iloc[valid_index]
    tp = len([ans for pred, ans in zip(valid['is_good_rulebase'], valid['is_good_saito']) if pred == 1 and ans == 1])
    tn = len([ans for pred, ans in zip(valid['is_good_rulebase'], valid['is_good_saito']) if pred == 0 and ans == 0])
    fp = len([ans for pred, ans in zip(valid['is_good_rulebase'], valid['is_good_saito']) if pred == 1 and ans == 0])
    fn = len([ans for pred, ans in zip(valid['is_good_rulebase'], valid['is_good_saito']) if pred == 0 and ans == 1])

    recall = tp / (tp + fn)
    precision = tp / (tp + fp)
    f_measure = 2 * (precision * recall) / (precision + recall)
    print('TP : ', tp)
    print('TF : ', tn)
    print('FP : ', fp)
    print('FN : ', fn)
    print('recall : ', round(recall, 4))
    print('precision : ', round(precision, 4))
    print('f_measure : ', round(f_measure, 4))
    
    recalls.append(recall)
    precisions.append(precision)
    f1s.append(f_measure)    
    

TP :  91
TF :  46
FP :  8
FN :  0
recall :  1.0
precision :  0.9192
f_measure :  0.9579
TP :  90
TF :  48
FP :  6
FN :  1
recall :  0.989
precision :  0.9375
f_measure :  0.9626
TP :  90
TF :  50
FP :  5
FN :  0
recall :  1.0
precision :  0.9474
f_measure :  0.973
TP :  89
TF :  47
FP :  8
FN :  1
recall :  0.9889
precision :  0.9175
f_measure :  0.9519
TP :  90
TF :  48
FP :  6
FN :  1
recall :  0.989
precision :  0.9375
f_measure :  0.9626


In [8]:
# ルールベースのK-fold平均
print(sum(recalls) / len(recalls))
print(sum(precisions) / len(precisions))
print(sum(f1s) / len(f1s))

0.9933821733821734
0.9318172226880854
0.9615746114817323
