# 必要なライブラリを読み込む

In [20]:
from qore_sdk.client import WebQoreClient
import qore_sdk.utils
from sklearn import model_selection
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
import time
import numpy as np
import os

# データの読み込み

In [65]:
def load_xyz(str_dir):
    with open(os.path.join(str_dir, 'x.txt'), 'r') as f:
        x = np.loadtxt(f, delimiter=',', usecols=1)
    with open(os.path.join(str_dir, 'y.txt'), 'r') as f:
        y = np.loadtxt(f, delimiter=',', usecols=1)
    with open(os.path.join(str_dir, 'z.txt'), 'r') as f:
        z = np.loadtxt(f, delimiter=',', usecols=1)
    return np.stack([x, y, z], 1)  # 2D-array

In [117]:
list_data = [
    './data/udetate', 
    './data/hukkin', 
    './data/squat', 
    './data/roller']

list_X = []
list_y = []
j_label = 0  # incremental label for each data
for i_data in list_data:
    print(i_data)
    array_loaded = load_xyz(i_data)
    list_X.append(array_loaded)
    array_label = np.repeat(j_label, array_loaded.shape[0])
    list_y.append(array_label)
    j_label += 1
X_all = np.concatenate(list_X, 0)
y_all = np.concatenate(list_y, 0)

./data/udetate
./data/hukkin
./data/squat
./data/roller


# 時系列を複数の小時系列に分割する。

https://qcore-info.github.io/advent-calendar-2019/index.html#qore_sdk.utils.sliding_window

In [118]:
print(X_all.shape)
X_all, y_all = qore_sdk.utils.sliding_window(X_all, width=100, stepsize=1, axis=0, y=y_all, y_def='mode')
print(X_all.shape, y_all.shape)

(2116, 3)
(2017, 100, 3) (2017, 1)


# n_samples_per_classでクラス当たりのサンプル数を揃える。

https://qcore-info.github.io/advent-calendar-2019/index.html#qore_sdk.utils.under_sample

In [120]:
_, counts = np.unique(y_all, return_counts=True)
print(counts)

# サンプル数が一番少ないデータの数に合わせる
X, y = qore_sdk.utils.under_sample(X_all, y_all.flatten(), n_samples_per_class=counts.min())

_, counts = np.unique(y, return_counts=True)
print(counts)

[349 661 506 501]
[349 349 349 349]


このままのデータで学習しても構わないが、  
ラベルが順番に並んでいるなどの偏りが見られるため、データ全体を結合しシャッフルする。

In [0]:
data = np.concatenate((X_train, X_test), axis=0)
target = np.concatenate((y_train, y_test), axis=0)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, target, test_size=0.2, random_state=1
)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(512, 29, 12)
(512, 1)
(128, 29, 12)
(128, 1)


# Qoreクライアントを準備する
事前に発行されたユーザーネーム、パスワード、Endpointが必要  
詳しくは[Advent Calenderの公式Github](https://github.com/qcore-info/advent-calendar-2019)を参照

In [0]:
client = WebQoreClient(username="", 
                       password="", 
                       endpoint="")

学習を行う

In [0]:
start = time.time()
res = client.classifier_train(X=X_train, Y=y_train)
print(res)

{'res': 'ok', 'train_time': 1.2235112190246582}


`
classifier_test
`を用いると、精度が簡単に求められて便利




In [0]:
res = client.classifier_test(X=X_test, Y=y_test)
print(res)

{'accuracy': 0.9921875, 'f1': 0.9922253787878788, 'res': 'ok'}


最後には推論もしてみる

In [0]:
res = client.classifier_predict(X=X_test)
print("acc=", accuracy_score(y_test.tolist(), res["Y"]))
print("f1=", f1_score(y_test.tolist(), res["Y"], average="weighted"))
elapsed_time = time.time() - start
print("elapsed_time:{0}".format(elapsed_time) + "[sec]")
print(res['Y'])


acc= 0.9921875
f1= 0.9921496212121212
elapsed_time:3.022348165512085[sec]
[5, 9, 7, 1, 3, 3, 4, 9, 9, 3, 8, 2, 1, 6, 9, 7, 3, 4, 6, 9, 1, 4, 8, 1, 8, 3, 7, 7, 8, 4, 8, 4, 7, 2, 6, 7, 3, 9, 4, 2, 8, 3, 7, 6, 5, 4, 2, 1, 8, 7, 2, 7, 3, 6, 5, 2, 5, 7, 1, 4, 2, 4, 8, 2, 7, 1, 8, 9, 3, 7, 4, 6, 8, 8, 3, 7, 3, 1, 6, 2, 3, 8, 7, 9, 8, 3, 7, 2, 4, 5, 3, 2, 6, 3, 5, 8, 3, 8, 6, 9, 8, 3, 6, 1, 9, 2, 3, 7, 6, 3, 4, 9, 5, 8, 8, 3, 3, 3, 1, 8, 5, 3, 9, 4, 7, 4, 1, 8]


ちなみに、サーバーはAWSのMediumで動いている  
メモリサイズは1GB

# 参考
単純な線形回帰、簡単な深層学習と比較する

In [0]:
X_train = X_train.reshape(len(X_train), -1).astype(np.float64)
X_test = X_test.reshape(len(X_test), -1).astype(np.float64)
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

print("===LogisticRegression(Using Sklearn)===")
start = time.time()
lr_cls = LogisticRegression(C=9.0)
lr_cls.fit(X_train, y_train)
elapsed_time = time.time() - start
print("elapsed_time:{0}".format(elapsed_time) + "[sec]")
res = lr_cls.predict(X=X_test)
print("acc=", accuracy_score(y_test.tolist(), res))
print("f1=", f1_score(y_test.tolist(), res, average="weighted"))

print("===MLP(Using Sklearn)===")
start = time.time()
mlp_cls = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 10))
mlp_cls.fit(X_train, y_train)
elapsed_time = time.time() - start
print("elapsed_time:{0}".format(elapsed_time) + "[sec]")
res = mlp_cls.predict(X=X_test)
print("acc=", accuracy_score(y_test.tolist(), res))
print("f1=", f1_score(y_test.tolist(), res, average="weighted"))

===LogisticRegression(Using Sklearn)===




elapsed_time:0.21763110160827637[sec]
acc= 0.9765625
f1= 0.9761245153216563
===MLP(Using Sklearn)===
elapsed_time:1.273435354232788[sec]
acc= 0.9609375
f1= 0.9602474709896586
