In [120]:
#ライブラリをインポート
import os #OSに依存する様々な機能を利用するためのモジュール(ファイルやディレクトリ操作など)
import re #正規表現を利用するためのモジュール
import csv  #csvファイルを扱うためのモジュール
import math #数学的計算のためのモジュール
import matplotlib.pyplot as plt #グラフ描画のためのモジュール
import numpy as np  #多次元配列計算のためのモジュール
import pandas as pd #データフレームを扱うためのモジュール
from scipy.stats import kurtosis  #尖度を調べるためのモジュール
from sklearn.model_selection import train_test_split  #データをトレーニング用とテスト用に分けるためのモジュール
from sklearn.linear_model import LinearRegression #線型回帰
from sklearn.svm import SVC #サポートベクターマシン
from sklearn.ensemble import RandomForestClassifier #ランダムフォレスト
from sklearn.neighbors import KNeighborsClassifier  #k-近傍法
from sklearn.metrics import accuracy_score  #機械学習モデルの性能評価のためのモジュール

In [121]:
#定数を定義
BINS = 4000  #ヒストグラムのビンの数
EPSILON = .00001  #スムージングパラメータ
UPPER_LIMIT = 1.1 #静止区間の上限
LOWER_LIMIT = 0.9 #静止区間の加減
STATIONARY_INTERVALS = 5  #静止区間除去のサンプルの間隔(静止区間が何サンプル連続したら除去するか)
TRAIN_SIZE = 0.8  #ランダムフォレストのトレーニングデータの割合

In [122]:
#ディレクトリ内のデータセットのファイル名と周波数を取得する関数
def get_Hz_and_filename(path: str) -> list[int, str]:
    filename = os.listdir(path) #引数のパスのディレクトリの中のファイル名一覧を取得
    Hz_and_filename=[]  #ファイル名と周波数を格納するリストを宣言

    for file in filename:
        Hz = re.search(r'\d+', file)    #正規表現を用いてファイル名の中で一番最初に出てくる数字(周波数)を取得
        if Hz:  #数字の入っていないファイル名があるとエラーを吐くので、このif文でチェックする
            Hz_and_filename.append([int(Hz.group(0)), file])    #ファイル名と周波数を格納

    return Hz_and_filename

In [123]:
#ファイル名と周波数を分けて出力する関数
def divide_Hz_and_filename(Hz_and_filename: list[int, str]) -> tuple[list[int], list[str]]:
    Hz = []
    filename = []
    for row in Hz_and_filename:
      Hz.append(row[0])
      filename.append(row[1])

    return Hz, filename

In [124]:
#加速度データのCSVファイルから3軸加速度を取得する関数
def get_acceleration(filename: str) -> tuple[list[float], list[float], list[float]]:
    AccX, AccY, AccZ = [], [], []
    with open(filename) as f:
        reader = csv.reader(f)
        for row in reader:
            AccX.append(float(row[2]))
            AccY.append(float(row[3]))
            AccZ.append(float(row[4]))

    return AccX, AccY, AccZ

In [125]:
#静止区間を除去する関数
def remove_stationary_intervals(AccX: list[float], AccY: list[float], AccZ: list[float]) -> list[float]:
    #各軸の加速度の平均を求める
    AvgAccX = sum(AccX) / len(AccX)
    AvgAccY = sum(AccY) / len(AccY)
    AvgAccZ = sum(AccZ) / len(AccZ)

    AvgResultantAcc = math.sqrt(AvgAccX ** 2 + AvgAccY ** 2 + AvgAccZ ** 2) #重力加速度の推定値=合成加速度の平均を求める

    ResultantAcc = [math.sqrt(x ** 2 + y ** 2 + z ** 2) for x, y, z in zip(AccX, AccY, AccZ)]   #各時刻の合成加速度を求める

    #各時刻の合成加速度から静止区間(重力加速度の推定値に近い値が一定以上以上連続している区間)を除去する
    i = 0 #ループ変数
    counter = 0 #静止区間がSTATIONARY_INTERVALS分続いているかをカウントする変数
    while i < len(ResultantAcc):
        if AvgResultantAcc * LOWER_LIMIT < ResultantAcc[i] < AvgResultantAcc * UPPER_LIMIT:   #平均のLOWER_LIMIT倍~UPPER_LIMIT倍の範囲を調べる
            counter += 1    #範囲内ならカウントを増やす
            if counter == STATIONARY_INTERVALS: #カウントがSTATIONARY_INTERVALSに達したらその区間を削除
                del ResultantAcc[i+1-STATIONARY_INTERVALS:i+1]    #スライスでは選択範囲の開始位置startと終了位置stopを[start:stop]のように書くとstart <= x < stopの範囲が選択される #start番目の値は含まれるがstop番目の値は含まれない
                counter = 0 #カウンターをリセット
                i -= STATIONARY_INTERVALS   #削除した分インデックスがズレるので補正する
        else:
            counter = 0 #カウンターをリセット
        i += 1

    return ResultantAcc  #静止区間を除去した後のリストを返す

In [126]:
#連続する2サンプルの差分を取る関数
def calculate_differences_of_acceleration(ResultantAcc: list[float]) -> list[float]:
    DifferenceAcc = [math.fabs(ResultantAcc[i + 1] * 100000 - ResultantAcc[i] * 100000) for i in range(len(ResultantAcc) - 1)]  #100000倍して誤差を取る
    return DifferenceAcc

In [127]:
#KLダイバージェンス関数 #引数として与える2つの分布は非負の値の集合でなければならないことに注意
def KL_divergence(a: list[float], b: list[float]) -> float:
    min_value = min(min(a), min(b)) #a,bの最小値の小さい方
    max_value = max(max(a), max(b)) #a,bの最大値の大きい方

    #a,bのヒストグラムを作成し、同じ数のビンで区切る
    a_hist, _ = np.histogram(a, bins=BINS, range=(min_value, max_value))
    b_hist, _ = np.histogram(b, bins=BINS, range=(min_value, max_value))

    #正規化する(確率分布に変換する、合計を1にする)ために全合計で割る
    a_hist = (a_hist + EPSILON) / a_hist.sum()
    b_hist = (b_hist + EPSILON) / b_hist.sum()

    #KLダイバージェンスの値を返す
    return np.sum([ai * np.log(ai / bi) for ai, bi in zip(a_hist, b_hist)])

In [128]:
#JSダイバージェンス関数 #引数として与える2つの分布は非負の値の集合でなければならないことに注意
def JS_divergence(a: list[float], b: list[float]) -> float:
    min_value = min(min(a), min(b)) #a,bの最小値の小さい方
    max_value = max(max(a), max(b)) #a,bの最大値の大きい方

    #a,bのヒストグラムを作成し、同じ数のビンで区切る
    a_hist, _ = np.histogram(a, bins=BINS, range=(min_value, max_value))
    b_hist, _ = np.histogram(b, bins=BINS, range=(min_value, max_value))

    #正規化する(確率分布に変換する、合計を1にする)ために全合計で割る
    a_hist = (a_hist + EPSILON) / a_hist.sum()
    b_hist = (b_hist + EPSILON) / b_hist.sum()

    #2つの分布の平均値を求める
    mean_hist = (a_hist + b_hist) / 2.0

    #平均とそれぞれの分布のKLダイバージェンスを算出
    kl_a = np.sum([ai * np.log(ai / bi) for ai, bi in zip(a_hist, mean_hist)])
    kl_b = np.sum([ai * np.log(ai / bi) for ai, bi in zip(b_hist, mean_hist)])

    #JSダイバージェンスの値を返す
    return (kl_a + kl_b) / 2.0

In [129]:
#データフレームの各行の中で2番目に小さい値が格納されている場所を調べる関数(最小値は同じ確率分布同士の0.0)
def get_index_and_columns_of_second_smallest(df: pd.DataFrame) -> list[str, str]:
    index_and_columns_of_second_smallest = []  #データフレームの中で2番目に小さい値が格納されている場所のインデックス名とカラム名を格納する変数
    for i in range(len(df)):
        sorted_row = df.iloc[i].sort_values()   #.ilocでデータフレームの要素を行、列の番号の添字で指定する    #各行の要素を昇順に並び替える
        second_smallest_columns = sorted_row.index[1] #各行の2番目に小さい値が格納されているカラム[1]の名前を取得
        #second_smallest_label = df.columns.get_loc(second_smallest_index)
        index_and_columns_of_second_smallest.append((df.index[i], second_smallest_columns))    #インデックスとカラムのラベル名の組を二次元配列に追加
    return index_and_columns_of_second_smallest

In [130]:
#推定精度を算出する関数
def calculate_accuracy(index_and_columns_of_second_smallest: list[str, str]) -> tuple[float, list[int]]:
    counter = 0
    error_index_list = []
    for i in range(len(index_and_columns_of_second_smallest)):
        #インデックスとカラムのラベル名が同じならばカウンターを1増やす
        if index_and_columns_of_second_smallest[i][0] == index_and_columns_of_second_smallest[i][1]:
            counter += 1
        else:
            error_index_list.append(i)
            print(f"間違ってるやつは{i}番目の{index_and_columns_of_second_smallest[i][0]}と{index_and_columns_of_second_smallest[i][1]}です")

    return (counter / len(index_and_columns_of_second_smallest)) * 100, error_index_list  #精度を100分率で返す

In [131]:
#入力された加速度の差分のリストからヒストグラムを作る関数（ビンの範囲が全加速度の差分データの最小値〜最大値）
def create_histogram(DifferenceAcc_list: list[float]) -> np.histogram:
    min_value = min(map(lambda x:max(x), DifferenceAcc_list))   #入力されたリストの中で最も小さい数
    max_value = max(map(lambda x:max(x), DifferenceAcc_list))   #入力されたリストの中で最も大きい数

    DifferenceAcc_hist = np.zeros((len(DifferenceAcc_list), BINS), dtype=float)

    for i in range(len(DifferenceAcc_list)):
        DifferenceAcc_hist[i], _ = np.histogram(DifferenceAcc_list[i], bins=BINS, range=(min_value, max_value)) #ヒストグラムを作成し、同じ数のビンで区切る
    return DifferenceAcc_hist

In [132]:
#入力された加速度の差分のリストからヒストグラムを作る関数（ビンの範囲が各加速度の差分データの最小値〜最大値）
def create_histogram2(DifferenceAcc_list: list[float]) -> np.histogram:
    DifferenceAcc_hist = np.zeros((len(DifferenceAcc_list), BINS), dtype=float)
    for i in range(len(DifferenceAcc_list)):
        min_value = min(DifferenceAcc_list[i])
        max_value = max(DifferenceAcc_list[i])
        DifferenceAcc_hist[i], _ = np.histogram(DifferenceAcc_list[i], bins=BINS, range=(min_value, max_value)) #ヒストグラムを作成し、同じ数のビンで区切る
    return DifferenceAcc_hist

In [133]:
#KLダイバージェンスとJSダイバージェンス算出の一連の流れを自動化した関数
def KL_and_JS(path: str):
    Hz_and_filename = get_Hz_and_filename(path)
    Hz_and_filename.sort(reverse=True)  #周波数の大きい順にソート
    Hz, filename = divide_Hz_and_filename(Hz_and_filename)
    Hz = [str(hz) + "Hz" for hz in Hz]  #周波数の値+"Hz"のリストを作りデータフレームのラベルに用いる

    #使う変数を宣言
    AccX, AccY, AccZ = [], [], []
    ResultantAcc = []
    DifferenceAcc_list = []
    resultKLD = [[0.0 for j in range(len(filename))] for i in range(len(filename))]  # resultKLDの要素を0.0で初期化
    resultJSD = [[0.0 for j in range(len(filename))] for i in range(len(filename))]  # resultKLDの要素を0.0で初期化
    error_index_list = []

    #各データセットからデータを読み込み静止区間を除去したものを二次元配列に格納
    for i in filename:
        AccX, AccY, AccZ = get_acceleration(path+i)
        ResultantAcc = remove_stationary_intervals(AccX, AccY, AccZ)
        DifferenceAcc_list.append(calculate_differences_of_acceleration(ResultantAcc))

    #KLダイバージェンスの値を格納
    for i in range(len(filename)):
        for j in range(len(filename)):
            resultKLD[i][j] = KL_divergence(DifferenceAcc_list[i], DifferenceAcc_list[j])

    #JSダイバージェンスの値を格納
    for i in range(len(filename)):
        for j in range(len(filename)):
            resultJSD[i][j] = JS_divergence(DifferenceAcc_list[i], DifferenceAcc_list[j])

    #結果を出力
    df_KLD = pd.DataFrame(resultKLD, index=Hz, columns=Hz)
    display(df_KLD)
    accuracyKLD, error_index_list = calculate_accuracy(get_index_and_columns_of_second_smallest(df_KLD))
    for i in range(len(error_index_list)):
        print(filename[error_index_list[i]])
    print(f"KLダイバージェンスによる推定精度は{accuracyKLD}%です")

    df_JSD = pd.DataFrame(resultJSD, index=Hz, columns=Hz)
    display(df_JSD)
    accuracyJSD, error_index_list = calculate_accuracy(get_index_and_columns_of_second_smallest(df_JSD))
    for i in range(len(error_index_list)):
        print(filename[error_index_list[i]])
    print(f"JSダイバージェンスによる推定精度は{accuracyJSD}%です")

In [134]:
#ランダムフォレストによる機械学習モデル構築と性能評価までを自動化した関数
def random_forest(path: str):
    Hz_and_filename = get_Hz_and_filename(path)
    Hz_and_filename.sort(reverse=True)  #周波数の大きい順にソート
    Hz, filename = divide_Hz_and_filename(Hz_and_filename)

    #使う変数を宣言
    AccX, AccY, AccZ = [], [], []
    ResultantAcc = []
    DifferenceAcc_list = []

    #各データセットからデータを読み込み静止区間を除去したものを二次元配列に格納
    for i in filename:
        AccX, AccY, AccZ = get_acceleration(path+i)
        ResultantAcc = remove_stationary_intervals(AccX, AccY, AccZ)
        DifferenceAcc_list.append(calculate_differences_of_acceleration(ResultantAcc))

    DifferenceAcc_hist = create_histogram2(DifferenceAcc_list)

    x_train, x_test, y_train, y_test = train_test_split(DifferenceAcc_hist, Hz, train_size = TRAIN_SIZE, shuffle = True)

    # 学習する
    clf = RandomForestClassifier(random_state=1234)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print("正解率 = ", accuracy_score(y_test, y_pred))

In [135]:
#ランダムフォレストによる機械学習モデル構築と性能評価までを自動化した関数
def k_neighbors(path: str):
    Hz_and_filename = get_Hz_and_filename(path)
    Hz_and_filename.sort(reverse=True)  #周波数の大きい順にソート
    Hz, filename = divide_Hz_and_filename(Hz_and_filename)

    #使う変数を宣言
    AccX, AccY, AccZ = [], [], []
    ResultantAcc = []
    DifferenceAcc_list = []

    #各データセットからデータを読み込み静止区間を除去したものを二次元配列に格納
    for i in filename:
        AccX, AccY, AccZ = get_acceleration(path+i)
        ResultantAcc = remove_stationary_intervals(AccX, AccY, AccZ)
        DifferenceAcc_list.append(calculate_differences_of_acceleration(ResultantAcc))

    DifferenceAcc_hist = create_histogram2(DifferenceAcc_list)

    x_train, x_test, y_train, y_test = train_test_split(DifferenceAcc_hist, Hz, train_size = TRAIN_SIZE, shuffle = True)

    # 学習する
    clf = KNeighborsClassifier()
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print("正解率 = ", accuracy_score(y_test, y_pred))

In [136]:
path = "all_walk_data/"

In [18]:
KL_and_JS(path)

Unnamed: 0,100Hz,100Hz.1,100Hz.2,100Hz.3,100Hz.4,100Hz.5,100Hz.6,100Hz.7,100Hz.8,100Hz.9,...,10Hz,10Hz.1,10Hz.2,10Hz.3,10Hz.4,10Hz.5,10Hz.6,10Hz.7,10Hz.8,10Hz.9
100Hz,0.0,0.279831,0.101499,0.637612,1.220173,0.751915,0.676784,0.635931,0.96965,1.177161,...,0.426561,1.035662,0.8428,0.931305,1.100209,0.475882,0.458733,1.195379,1.087133,0.982135
100Hz,0.157241,0.0,0.189593,0.176998,0.473445,0.285519,0.397539,0.251406,0.339924,0.59177,...,0.212865,1.345868,1.149535,1.045169,1.54228,0.479027,0.249697,1.525866,1.439181,1.246279
100Hz,0.123109,0.335159,0.0,0.705917,1.274617,0.771737,0.675536,0.655018,1.04702,1.203014,...,0.458373,1.02653,0.859847,0.959807,1.118735,0.503986,0.502052,1.188557,1.084822,0.983996
100Hz,0.325478,0.055052,0.363968,0.0,0.12525,0.087316,0.293633,0.093125,0.070683,0.303027,...,0.170248,1.603945,1.431404,1.202794,1.884113,0.564115,0.210906,1.778152,1.672574,1.471561
100Hz,0.654218,0.207939,0.707252,0.075459,0.0,0.204732,0.561957,0.239071,0.01639,0.431546,...,0.35229,2.093393,1.887118,1.4848,2.401826,0.838184,0.358395,2.302211,2.171276,1.931322
100Hz,0.211491,0.045866,0.239739,0.048034,0.219589,0.0,0.128324,0.039812,0.152426,0.193934,...,0.143338,1.470968,1.305216,1.207405,1.724063,0.509588,0.217744,1.578059,1.521834,1.318563
100Hz,0.104034,0.140089,0.106483,0.235812,0.579464,0.122794,0.0,0.098418,0.449476,0.217736,...,0.163438,1.117954,1.003363,1.057797,1.358448,0.413777,0.240882,1.184499,1.09995,0.990794
100Hz,0.172481,0.042428,0.196346,0.068491,0.284254,0.067041,0.126542,0.0,0.197061,0.253758,...,0.141243,1.372271,1.217949,1.141483,1.631394,0.467753,0.20018,1.489936,1.431383,1.234702
100Hz,0.55849,0.156351,0.606842,0.048423,0.03004,0.163185,0.478313,0.189673,0.0,0.376733,...,0.293864,1.952861,1.759122,1.396168,2.256851,0.754926,0.304384,2.160083,2.040851,1.803532
100Hz,0.246893,0.163994,0.272374,0.192055,0.420583,0.100331,0.101695,0.099242,0.33386,0.0,...,0.282572,1.527143,1.374596,1.387711,1.77854,0.64574,0.384565,1.64474,1.543022,1.388966


間違ってるやつは1番目の100Hzと50Hzです
間違ってるやつは3番目の100Hzと50Hzです
間違ってるやつは6番目の100Hzと50Hzです
間違ってるやつは19番目の50Hzと100Hzです
間違ってるやつは20番目の50Hzと100Hzです
間違ってるやつは21番目の50Hzと100Hzです
間違ってるやつは22番目の50Hzと100Hzです
間違ってるやつは23番目の50Hzと100Hzです
間違ってるやつは25番目の50Hzと100Hzです
間違ってるやつは26番目の50Hzと100Hzです
間違ってるやつは34番目の50Hzと100Hzです
間違ってるやつは35番目の50Hzと100Hzです
間違ってるやつは36番目の10Hzと50Hzです
間違ってるやつは37番目の10Hzと50Hzです
間違ってるやつは39番目の10Hzと100Hzです
間違ってるやつは40番目の10Hzと50Hzです
間違ってるやつは41番目の10Hzと50Hzです
間違ってるやつは43番目の10Hzと100Hzです
間違ってるやつは44番目の10Hzと50Hzです
間違ってるやつは49番目の10Hzと50Hzです
間違ってるやつは50番目の10Hzと50Hzです
walk100Hz-20230312-123621260.csv
walk100Hz-20230310-173457927.csv
walk100Hz-20230303-111623870.csv
walk50Hz-20230312-123621730.csv
walk50Hz-20230312-120723109.csv
walk50Hz-20230310-173507115.csv
walk50Hz-20230309-180539425.csv
walk50Hz-20230309-171159109.csv
walk50Hz-20230302-165451196.csv
walk50Hz-20230302-115947707.csv
walk50Hz-0803-1229.csv
walk50Hz-0803-1126.csv
walk10Hz-20230312-164255614.csv
walk10Hz-20230312-123610993.csv
walk10Hz-20230310-173449164.csv

Unnamed: 0,100Hz,100Hz.1,100Hz.2,100Hz.3,100Hz.4,100Hz.5,100Hz.6,100Hz.7,100Hz.8,100Hz.9,...,10Hz,10Hz.1,10Hz.2,10Hz.3,10Hz.4,10Hz.5,10Hz.6,10Hz.7,10Hz.8,10Hz.9
100Hz,0.0,0.036565,0.009214,0.08014,0.157998,0.058525,0.032646,0.047729,0.133952,0.075769,...,0.02859,0.188286,0.164055,0.175936,0.216866,0.060722,0.042236,0.19966,0.185247,0.167281
100Hz,0.036565,0.0,0.043653,0.014419,0.055275,0.013882,0.037279,0.012212,0.040904,0.045715,...,0.022891,0.275634,0.249624,0.238348,0.310077,0.103602,0.034555,0.292306,0.278995,0.254539
100Hz,0.009214,0.043653,0.0,0.088878,0.169406,0.065049,0.03311,0.053351,0.144406,0.082642,...,0.033224,0.187511,0.165289,0.179644,0.219514,0.063366,0.046813,0.19669,0.184242,0.16666
100Hz,0.08014,0.014419,0.088878,0.0,0.019274,0.011479,0.058547,0.014672,0.011016,0.04805,...,0.042569,0.340128,0.3169,0.294818,0.379774,0.148954,0.05368,0.354975,0.339331,0.317766
100Hz,0.157998,0.055275,0.169406,0.019274,0.0,0.04848,0.129811,0.05784,0.002858,0.096738,...,0.095132,0.418201,0.394835,0.35755,0.454182,0.218312,0.100065,0.435848,0.4207,0.39838
100Hz,0.058525,0.013882,0.065049,0.011479,0.04848,0.0,0.027663,0.005109,0.035652,0.024387,...,0.039949,0.332486,0.310456,0.299747,0.373804,0.141858,0.058744,0.341447,0.329217,0.306013
100Hz,0.032646,0.037279,0.03311,0.058547,0.129811,0.027663,0.0,0.021898,0.106764,0.028098,...,0.04388,0.281697,0.261853,0.272265,0.328002,0.118394,0.065006,0.284217,0.26878,0.253218
100Hz,0.047729,0.012212,0.053351,0.014672,0.05784,0.005109,0.021898,0.0,0.043235,0.02565,...,0.03442,0.313216,0.291156,0.283192,0.356307,0.128264,0.050924,0.322395,0.30944,0.286711
100Hz,0.133952,0.040904,0.144406,0.011016,0.002858,0.035652,0.106764,0.043235,0.0,0.079975,...,0.078168,0.396308,0.372568,0.339145,0.433471,0.197304,0.083743,0.413339,0.398557,0.375497
100Hz,0.075769,0.045715,0.082642,0.04805,0.096738,0.024387,0.028098,0.02565,0.079975,0.0,...,0.08124,0.374469,0.349618,0.348779,0.416196,0.185509,0.107451,0.390373,0.370943,0.349595


間違ってるやつは1番目の100Hzと50Hzです
間違ってるやつは3番目の100Hzと50Hzです
間違ってるやつは6番目の100Hzと50Hzです
間違ってるやつは10番目の100Hzと50Hzです
間違ってるやつは21番目の50Hzと100Hzです
間違ってるやつは22番目の50Hzと100Hzです
間違ってるやつは26番目の50Hzと100Hzです
間違ってるやつは37番目の10Hzと50Hzです
間違ってるやつは40番目の10Hzと50Hzです
間違ってるやつは41番目の10Hzと50Hzです
間違ってるやつは43番目の10Hzと50Hzです
間違ってるやつは44番目の10Hzと50Hzです
walk100Hz-20230312-123621260.csv
walk100Hz-20230310-173457927.csv
walk100Hz-20230303-111623870.csv
walk100Hz-20230228-195310844.csv
walk50Hz-20230310-173507115.csv
walk50Hz-20230309-180539425.csv
walk50Hz-20230302-115947707.csv
walk10Hz-20230312-123610993.csv
walk10Hz-20230309-180534022.csv
walk10Hz-20230309-171156758.csv
walk10Hz-20230302-165445244.csv
walk10Hz-20230302-115939846.csv
JSダイバージェンスによる推定精度は77.77777777777779%です


In [19]:
k_neighbors(path)

正解率 =  0.5454545454545454


In [137]:
random_forest(path)

正解率 =  0.9090909090909091


In [138]:
#個々実験用
Hz_and_filename = get_Hz_and_filename(path)
Hz_and_filename.sort(reverse=True)  #周波数の大きい順にソート
Hz, filename = divide_Hz_and_filename(Hz_and_filename)

#使う変数を宣言
AccX, AccY, AccZ = [], [], []
ResultantAcc = []
DifferenceAcc_list = []

    #各データセットからデータを読み込み静止区間を除去したものを二次元配列に格納
for i in filename:
    AccX, AccY, AccZ = get_acceleration(path+i)
    ResultantAcc = remove_stationary_intervals(AccX, AccY, AccZ)
    DifferenceAcc_list.append(calculate_differences_of_acceleration(ResultantAcc))

DifferenceAcc_hist = create_histogram2(DifferenceAcc_list)

In [139]:
DifferenceAcc_kurtosis_list = np.zeros(len(DifferenceAcc_list))
for i in range(len(DifferenceAcc_hist)):
    DifferenceAcc_kurtosis_list[i] = kurtosis(DifferenceAcc_hist[i])

In [140]:
histogram_var = np.zeros(len(DifferenceAcc_list))
for i in range(len(DifferenceAcc_hist)):
    histogram_var[i] = np.var(DifferenceAcc_list[i])


In [141]:
len(histogram_var)

54

In [142]:
X_new = np.concatenate((DifferenceAcc_kurtosis_list.reshape(-1, 1), histogram_var.reshape(-1, 1)), axis=1)

In [143]:
X = np.concatenate((DifferenceAcc_hist, X_new), axis=1)

In [144]:
len(X[3])

4002

In [99]:
#尖度と分散を入れた方
x_train, x_test, y_train, y_test = train_test_split(X, Hz, train_size = TRAIN_SIZE, shuffle = True)
clf = RandomForestClassifier(random_state=1234)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print("正解率 = ", accuracy_score(y_test, y_pred))

正解率 =  0.8181818181818182


In [100]:
#尖度と分散を入れない方
x_train, x_test, y_train, y_test = train_test_split(DifferenceAcc_hist, Hz, train_size = TRAIN_SIZE, shuffle = True)
clf = RandomForestClassifier(random_state=1234)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print("正解率 = ", accuracy_score(y_test, y_pred))

正解率 =  0.6363636363636364


In [145]:
sendotoka = []
sendonasi = []

for i in range(100):
    x_train, x_test, y_train, y_test = train_test_split(X, Hz, train_size = TRAIN_SIZE, shuffle = True)
    clf = RandomForestClassifier(random_state=1234)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    sendotoka.append(accuracy_score(y_test, y_pred))


    x_train, x_test, y_train, y_test = train_test_split(DifferenceAcc_hist, Hz, train_size = TRAIN_SIZE, shuffle = True)
    clf = RandomForestClassifier(random_state=1234)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    sendonasi.append(accuracy_score(y_test, y_pred))

In [146]:
mind = sum(sendonasi) / len(sendonasi)

In [147]:
mind

0.7763636363636359

In [148]:
minh = sum(sendotoka) / len(sendotoka)

In [149]:
minh

0.7681818181818183