In [108]:
#ライブラリをインポート
import os
import re
import sys
import csv
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [109]:
#定数を定義
BINS = 10000  #ヒストグラムのビンの数
EPSILON = .00001  #スムージングパラメータ
UPPER_LIMIT = 1.1 #静止区間の上限
LOWER_LIMIT = 0.9 #静止区間の加減
STATIONARY_INTERVALS = 5  #静止区間除去のサンプルの間隔(何サンプル静止区間が連続したら除去するか)

In [110]:
#ディレクトリ内のデータセットのファイル名と周波数を取得する関数
def get_filename_and_Hz(path: str) -> list[int, str]:
    filename = os.listdir(path)
    filename_and_Hz=[]

    for file in filename:
        Hz = re.search(r'\d+', file)
        if Hz:  #数字の入っていないファイル名があるとエラーを吐くので、このif文でチェックする
            filename_and_Hz.append([int(Hz.group(0)), file])

    return filename_and_Hz

In [111]:
#ファイル名と周波数を分けて出力する関数
def divide_filename_and_Hz(filename_and_Hz: list[int, str]) -> tuple[list[int], list[str]]:
    Hz = []
    filename = []
    for row in filename_and_Hz:
      Hz.append(row[0])
      filename.append(row[1])

    return Hz, filename

In [112]:
#加速度データのCSVファイルから3軸加速度を取得する関数
def get_acceleration(filename: str) -> tuple[list[float], list[float], list[float]]:
    AccX, AccY, AccZ = [], [], []
    with open(filename) as f:
        reader = csv.reader(f)
        for row in reader:
            AccX.append(float(row[2]))
            AccY.append(float(row[3]))
            AccZ.append(float(row[4]))

    return AccX, AccY, AccZ

In [113]:
#静止区間を除去する関数
def remove_stationary_intervals(AccX: list[float], AccY: list[float], AccZ: list[float]) -> list[float]:
    #各軸の加速度の平均を求める
    AvgAccX = sum(AccX) / len(AccX)
    AvgAccY = sum(AccY) / len(AccY)
    AvgAccZ = sum(AccZ) / len(AccZ)

    #重力加速度の推定値=合成加速度の平均を求める
    AvgResultantAcc = math.sqrt(AvgAccX ** 2 + AvgAccY ** 2 + AvgAccZ ** 2)

    #各時刻の合成加速度を求める
    ResultantAcc = [math.sqrt(x ** 2 + y ** 2 + z ** 2) for x, y, z in zip(AccX, AccY, AccZ)]

    #各時刻の合成加速度から静止区間(重力加速度の推定値に近い値が一定以上以上連続している区間)を除去する
    i=0 #ループ変数
    counter = 0 #静止区間がSTATIONARY_INTERVALS分続いているかをカウントする変数
    while i < len(ResultantAcc):
        if AvgResultantAcc * LOWER_LIMIT < ResultantAcc[i] < AvgResultantAcc * UPPER_LIMIT:   #平均のLOWER_LIMIT倍~UPPER_LIMIT倍の範囲を調べる
            counter += 1    #範囲内ならカウントを増やす
            if counter == STATIONARY_INTERVALS: #カウントがSTATIONARY_INTERVALSに達したらその区間を削除
                del ResultantAcc[i+1-STATIONARY_INTERVALS:i+1]    #スライスでは選択範囲の開始位置startと終了位置stopを[start:stop]のように書く。start <= x < stopの範囲が選択される。start番目の値は含まれるがstop番目の値は含まれない
                counter = 0
                i -= STATIONARY_INTERVALS   #削除した分インデックスがズレるので補正する
        else:
            counter = 0
        i += 1

    return ResultantAcc  #静止区間を除去した後のリストを返す

In [114]:
#連続する2サンプルの差分を取る関数
def differences_of_acceleration(ResultantAcc: list[float]) -> list[float]:
    index = 0
    DifferenceAcc = []
    for dif in ResultantAcc[:-1]:
        DifferenceAcc.append(math.fabs(ResultantAcc[index + 1]*100000 - ResultantAcc[index]*100000))
        index += 1

    return DifferenceAcc

In [115]:
#KLダイバージェンス関数 #引数として与える2つの分布は非負の値の集合でなければならないことに注意
def KL_divergence(a: list[float], b: list[float]) -> float:
    min_value = min(min(a), min(b)) #a,bの最小値の小さい方
    max_value = max(max(a), max(b)) #a,bの最大値の大きい方

    #a,bのヒストグラムを作成し、同じ数のビンで区切る
    a_hist, _ = np.histogram(a, bins=BINS, range=(min_value, max_value))
    b_hist, _ = np.histogram(b, bins=BINS, range=(min_value, max_value))

    #正規化する(確率分布に変換する、合計を1にする)ために全合計で割る
    a_hist = (a_hist + EPSILON) / a_hist.sum()
    b_hist = (b_hist + EPSILON) / b_hist.sum()

    #KLダイバージェンスの値を返す
    return np.sum([ai * np.log(ai / bi) for ai, bi in zip(a_hist, b_hist)])

In [116]:
#JSダイバージェンス関数 #引数として与える2つの分布は非負の値の集合でなければならないことに注意
def JS_divergence(a: list[float], b: list[float]) -> float:
    min_value = min(min(a), min(b)) #a,bの最小値の小さい方
    max_value = max(max(a), max(b)) #a,bの最大値の大きい方

    #a,bのヒストグラムを作成し、同じ数のビンで区切る
    a_hist, _ = np.histogram(a, bins=BINS, range=(min_value, max_value))
    b_hist, _ = np.histogram(b, bins=BINS, range=(min_value, max_value))

    #正規化する(確率分布に変換する、合計を1にする)ために全合計で割る
    a_hist = (a_hist + EPSILON) / a_hist.sum()
    b_hist = (b_hist + EPSILON) / b_hist.sum()

    #2つの分布の平均値を求める
    mean_hist = (a_hist + b_hist) / 2.0

    #平均とそれぞれの分布のKLダイバージェンスを算出
    kl_a = np.sum([ai * np.log(ai / bi) for ai, bi in zip(a_hist, mean_hist)])
    kl_b = np.sum([ai * np.log(ai / bi) for ai, bi in zip(b_hist, mean_hist)])

    #JSダイバージェンスの値を返す
    return (kl_a + kl_b) / 2.0

In [117]:
#データフレームの各行の中で2番目に小さい値が格納されている場所を調べる関数(最小値は同じ確率分布同士の0.0)
def get_index_and_columns_of_second_smallest(df: pd.DataFrame) -> list[str, str]:
    index_and_columns_of_second_smallest = []  #データフレームの中で2番目に小さい値が格納されている場所のインデックス名とカラム名を格納する変数
    for i in range(len(df)):
        sorted_row = df.iloc[i].sort_values()   #.ilocでデータフレームの要素を行、列の番号の添字で指定する    #各行を昇順に並び替える
        second_smallest_columns = sorted_row.index[1] #各行の2番目に小さい値が格納されているカラム[1]の名前を取得
        #second_smallest_label = df.columns.get_loc(second_smallest_index)
        index_and_columns_of_second_smallest.append((df.index[i], second_smallest_columns))    #インデックスとカラムのラベル名の組を二次元配列に追加
    return index_and_columns_of_second_smallest

In [118]:
#推定精度を算出する巻数
def calculate_accuracy(index_and_columns_of_second_smallest: list[str, str]) -> float:
    counter = 0
    for i in range(len(index_and_columns_of_second_smallest)):
        #インデックスとカラムのラベル名が同じならばカウンターを1増やす
        if index_and_columns_of_second_smallest[i][0] == index_and_columns_of_second_smallest[i][1]:
            counter += 1

    return (counter / len(index_and_columns_of_second_smallest)) * 100  #精度を100分率で返す

In [119]:
#一連の流れを自動化
def main():
    args = sys.argv
    path = args[1]
    filename_and_Hz = get_filename_and_Hz(path)
    filename_and_Hz.sort(reverse=True)  #周波数の大きい順にソート
    Hz, filename = divide_filename_and_Hz(filename_and_Hz)
    Hz = [str(hz) + "Hz" for hz in Hz]  #周波数の値+"Hz"のリストを作りデータフレームのラベルに用いる

    #使う変数を宣言
    AccX, AccY, AccZ = [], [], []
    ResultantAcc = []
    DifferenceAcc_list = []
    resultKLD = [[0.0 for j in range(len(filename))] for i in range(len(filename))]  # resultKLDの要素を0.0で初期化
    resultJSD = [[0.0 for j in range(len(filename))] for i in range(len(filename))]  # resultKLDの要素を0.0で初期化


    #各データセットからデータを読み込み静止区間を除去したものを二次元配列に格納
    for i in filename:
        AccX, AccY, AccZ = get_acceleration(path+i)
        ResultantAcc = remove_stationary_intervals(AccX, AccY, AccZ)
        DifferenceAcc_list.append(differences_of_acceleration(ResultantAcc))

    #KLダイバージェンスの値を格納
    for i in range(len(filename)):
        for j in range(len(filename)):
            resultKLD[i][j] = KL_divergence(DifferenceAcc_list[i], DifferenceAcc_list[j])

    #JSダイバージェンスの値を格納
    for i in range(len(filename)):
        for j in range(len(filename)):
            resultJSD[i][j] = JS_divergence(DifferenceAcc_list[i], DifferenceAcc_list[j])

    #結果を出力
    df_KLD = pd.DataFrame(resultKLD, index=Hz, columns=Hz)
    display(df_KLD)
    print(f"KLダイバージェンスによる推定精度は{calculate_accuracy(get_index_and_columns_of_second_smallest(df_KLD))}%です")

    df_JSD = pd.DataFrame(resultJSD, index=Hz, columns=Hz)
    display(df_JSD)
    print(f"JSダイバージェンスによる推定精度は{calculate_accuracy(get_index_and_columns_of_second_smallest(df_JSD))}%です")

In [121]:
#データセットと周波数を読み込む
path = "previous_research/data/"  #データセットのあるディレクトリまでのパス
filename_and_Hz = get_filename_and_Hz(path)
filename_and_Hz.sort(reverse=True)  #周波数の大きい順にソート
Hz, filename = divide_filename_and_Hz(filename_and_Hz)

#使う変数を宣言
AccX, AccY, AccZ = [], [], []
ResultantAcc = []
DifferenceAcc_list = []
resultKLD = [[0.0 for j in range(len(filename))] for i in range(len(filename))]  # resultKLDの要素を0.0で初期化
resultJSD = [[0.0 for j in range(len(filename))] for i in range(len(filename))]  # resultKLDの要素を0.0で初期化

#各データセットからデータを読み込み静止区間を除去したものを二次元配列に格納
for i in filename:
    AccX, AccY, AccZ = get_acceleration(path+i)
    ResultantAcc = remove_stationary_intervals(AccX, AccY, AccZ)
    DifferenceAcc_list.append(differences_of_acceleration(ResultantAcc))

In [122]:
#KLダイバージェンスの値を格納
for i in range(len(filename)):
    for j in range(len(filename)):
        resultKLD[i][j] = KL_divergence(DifferenceAcc_list[i], DifferenceAcc_list[j])

for i in range(len(filename)):
    for j in range(len(filename)):
        resultJSD[i][j] = JS_divergence(DifferenceAcc_list[i], DifferenceAcc_list[j])

In [123]:
#indexとcolumnsのラベル名についてのテストコード
#Zh = ["a","b","c","d","e","f","g","h","i","j","k","l","m"]
#df = pd.DataFrame(resultKLD, index=Hz, columns=Zh) #とりあえず残している

#KLダイバージェンスの結果を格納するデータフレームの定義
Hz = [str(hz) + "Hz" for hz in Hz]
df = pd.DataFrame(resultKLD, index=Hz, columns=Hz)

In [124]:
df_KLD = pd.DataFrame(resultKLD, index=Hz, columns=Hz)
display(df_KLD)
print(f"KLダイバージェンスによる推定精度は{calculate_accuracy(get_index_and_columns_of_second_smallest(df_KLD))}%です")

df_JSD = pd.DataFrame(resultJSD, index=Hz, columns=Hz)
display(df_JSD)
print(f"JSダイバージェンスによる推定精度は{calculate_accuracy(get_index_and_columns_of_second_smallest(df_JSD))}%です")

Unnamed: 0,100Hz,100Hz.1,100Hz.2,100Hz.3,100Hz.4,100Hz.5,100Hz.6,90Hz,90Hz.1,80Hz,...,40Hz,30Hz,30Hz.1,20Hz,20Hz.1,10Hz,10Hz.1,10Hz.2,10Hz.3,10Hz.4
100Hz,0.0,0.142298,0.027826,0.063785,0.056752,0.235415,0.206078,0.156743,0.082879,0.235045,...,0.370525,0.499206,0.549405,0.794533,0.861224,1.866039,6.22117,1.632231,2.673686,2.862089
100Hz,0.060716,0.0,0.035654,0.086962,0.077647,0.20186,0.131934,0.134539,0.100702,0.13523,...,0.37654,0.508652,0.580222,0.82511,0.890984,1.89032,5.816405,1.653716,2.31703,2.29185
100Hz,0.031389,0.052569,0.0,0.080518,0.068941,0.110806,0.049514,0.054115,0.034581,0.046691,...,0.351025,0.48862,0.561307,0.81587,0.894324,1.904839,3.469898,1.629254,1.682153,1.761272
100Hz,0.120977,0.2021,0.146167,0.0,0.083106,0.325057,0.223967,0.229125,0.138384,0.234982,...,0.214296,0.312183,0.36439,0.586276,0.654615,1.599983,2.443566,1.34868,1.396633,1.41219
100Hz,0.105659,0.164988,0.116378,0.066816,0.0,0.242996,0.160415,0.166023,0.111166,0.179587,...,0.30682,0.412543,0.477245,0.705388,0.773808,1.74589,3.054139,1.471423,1.593416,1.553274
100Hz,0.087331,0.147358,0.073227,0.152487,0.111132,0.0,0.09713,0.087926,0.109369,0.115555,...,0.571308,0.709155,0.791247,1.034842,1.105832,2.141988,6.49228,1.880663,3.014492,3.145932
100Hz,0.072193,0.11432,0.033356,0.096926,0.068114,0.119367,0.0,0.085204,0.075036,0.124553,...,0.479631,0.61854,0.677289,0.92466,0.992284,2.011062,6.399775,1.759259,2.832786,3.015695
90Hz,0.058387,0.118997,0.036419,0.106787,0.076669,0.111789,0.083626,0.0,0.079618,0.095624,...,0.47602,0.609937,0.689708,0.933975,1.002442,2.027272,6.405958,1.773621,2.832981,2.818799
90Hz,0.048694,0.128671,0.029188,0.064415,0.054644,0.189734,0.111071,0.12383,0.0,0.116407,...,0.369494,0.502003,0.572426,0.821859,0.887613,1.89473,5.887527,1.651151,2.424511,2.335355
80Hz,0.073684,0.098964,0.023929,0.096071,0.074735,0.128216,0.113148,0.084089,0.065091,0.0,...,0.458911,0.597908,0.653376,0.897687,0.969607,1.988894,6.341861,1.740581,2.838141,2.988468


KLダイバージェンスによる推定精度は60.60606060606061%です


Unnamed: 0,100Hz,100Hz.1,100Hz.2,100Hz.3,100Hz.4,100Hz.5,100Hz.6,90Hz,90Hz.1,80Hz,...,40Hz,30Hz,30Hz.1,20Hz,20Hz.1,10Hz,10Hz.1,10Hz.2,10Hz.3,10Hz.4
100Hz,0.0,0.010637,0.005026,0.016503,0.01271,0.021567,0.014451,0.012517,0.008056,0.014737,...,0.094791,0.130806,0.146561,0.209693,0.226425,0.41898,0.459662,0.382939,0.395895,0.405479
100Hz,0.010637,0.0,0.005303,0.021798,0.017137,0.025282,0.014748,0.015169,0.011878,0.012484,...,0.101,0.139127,0.158841,0.222656,0.238832,0.428494,0.46187,0.393578,0.402004,0.409578
100Hz,0.005026,0.005303,0.0,0.02058,0.015461,0.018595,0.006975,0.007605,0.003919,0.004773,...,0.096103,0.13526,0.155011,0.222106,0.240997,0.432537,0.432942,0.391988,0.392028,0.400019
100Hz,0.016503,0.021798,0.02058,0.0,0.008273,0.044666,0.027603,0.030571,0.017053,0.027753,...,0.045708,0.072725,0.089076,0.150694,0.168557,0.367968,0.367073,0.329927,0.32505,0.331982
100Hz,0.01271,0.017137,0.015461,0.008273,0.0,0.030567,0.017699,0.019985,0.011808,0.019268,...,0.068686,0.098393,0.116699,0.178898,0.196232,0.390891,0.390101,0.351341,0.35071,0.35672
100Hz,0.021567,0.025282,0.018595,0.044666,0.030567,0.0,0.012352,0.010672,0.020653,0.01555,...,0.157117,0.19413,0.214653,0.272113,0.288371,0.464906,0.496027,0.4292,0.445522,0.452256
100Hz,0.014451,0.014748,0.006975,0.027603,0.017699,0.012352,0.0,0.008015,0.010264,0.011452,...,0.129087,0.167956,0.184361,0.245845,0.262014,0.446308,0.482365,0.409866,0.424827,0.43357
90Hz,0.012517,0.015169,0.007605,0.030571,0.019985,0.010672,0.008015,0.0,0.011176,0.008879,...,0.129868,0.16756,0.188202,0.248701,0.265153,0.44891,0.483694,0.412545,0.427938,0.43344
90Hz,0.008056,0.011878,0.003919,0.017053,0.011808,0.020653,0.010264,0.011176,0.0,0.009293,...,0.098294,0.136304,0.155751,0.22044,0.236819,0.427665,0.463063,0.39134,0.401843,0.407903
80Hz,0.014737,0.012484,0.004773,0.027753,0.019268,0.01555,0.011452,0.008879,0.009293,0.0,...,0.125542,0.164713,0.180882,0.242916,0.260225,0.446615,0.480932,0.410379,0.42442,0.433366


JSダイバージェンスによる推定精度は51.515151515151516%です
