In [1]:
import turicreate as tc
from glob import glob

# labels.txtのデータセットの読み込み
labels = tc.SFrame.read_csv('./HAPT Data Set/RawData/labels.txt', 
    delimiter=' ', header=False, verbose=False)
labels = labels.rename({'X1': 'exp_id', 'X2': 'user_id', 
    'X3': 'activity_id', 'X4': 'start', 'X5': 'end'})

# データセットの確認
labels.explore()

  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
#　aac_*.txtとgyro_*.txtのファイル一覧の取得
acc_files = sorted(glob('./HAPT Data Set/RawData/acc_*.txt'))
gyro_files = sorted(glob('./HAPT Data Set/RawData/gyro_*.txt'))

# ファイル一覧の確認
print(acc_files)
print(gyro_files)

['./HAPT Data Set/RawData/acc_exp01_user01.txt', './HAPT Data Set/RawData/acc_exp02_user01.txt', './HAPT Data Set/RawData/acc_exp03_user02.txt', './HAPT Data Set/RawData/acc_exp04_user02.txt', './HAPT Data Set/RawData/acc_exp05_user03.txt', './HAPT Data Set/RawData/acc_exp06_user03.txt', './HAPT Data Set/RawData/acc_exp07_user04.txt', './HAPT Data Set/RawData/acc_exp08_user04.txt', './HAPT Data Set/RawData/acc_exp09_user05.txt', './HAPT Data Set/RawData/acc_exp10_user05.txt', './HAPT Data Set/RawData/acc_exp11_user06.txt', './HAPT Data Set/RawData/acc_exp12_user06.txt', './HAPT Data Set/RawData/acc_exp13_user07.txt', './HAPT Data Set/RawData/acc_exp14_user07.txt', './HAPT Data Set/RawData/acc_exp15_user08.txt', './HAPT Data Set/RawData/acc_exp16_user08.txt', './HAPT Data Set/RawData/acc_exp17_user09.txt', './HAPT Data Set/RawData/acc_exp18_user09.txt', './HAPT Data Set/RawData/acc_exp19_user10.txt', './HAPT Data Set/RawData/acc_exp20_user10.txt', './HAPT Data Set/RawData/acc_exp21_user

In [3]:
# acc_fileの行番号から活動IDを取得
def find_label_for_containing_interval(intervals, index):
    # indexがstart以上end以下
    containing_interval = intervals[:, 0][(intervals[:, 1] <= index) & (index <= intervals[:, 2])]
    if len(containing_interval) == 1:
        return containing_interval[0]

In [4]:
# 最終のデータセットの作成
data = tc.SFrame()
for acc_file, gyro_file in zip(acc_files, gyro_files):
    # 実験IDの抽出
    exp_id = int(acc_file.split('_')[1][-2:])

    # acc_fileのデータセットの作成し、exp_id列を追加
    sf = tc.SFrame.read_csv(acc_file, delimiter=' ', header=False, verbose=False)
    sf = sf.rename({'X1': 'acc_x', 'X2': 'acc_y', 'X3': 'acc_z'})

    # gyro_fileのデータセットを作成し、acc_fileのデータセットと連結
    gyro_sf = tc.SFrame.read_csv(gyro_file, delimiter=' ', header=False, verbose=False)
    gyro_sf = gyro_sf.rename({'X1': 'gyro_x', 'X2': 'gyro_y', 'X3': 'gyro_z'})
    sf = sf.add_columns(gyro_sf)
    
    # 実験IDをexp_id列に追加
    sf['exp_id'] = exp_id

    # 行番号をid列に追加
    sf = sf.add_row_number()

    # labelsから同じ実験IDのactivity_id、start、endを取得
    exp_labels = labels[labels['exp_id'] == exp_id][['activity_id', 'start', 'end']].to_numpy()

    # 活動IDをactivity_id列に追加
    sf['activity_id'] = sf['id'].apply(lambda index: find_label_for_containing_interval(exp_labels, index))

    # id列の削除
    sf = sf.remove_columns(['id'])

    # acc_fileのデータセットを最終のデータセットに追加
    data = data.append(sf)
    
# 最終のデータセットの確認
data.explore()

In [5]:
# 6種類の活動IDでフィルタリング
target_map = {
    1.: 'walking',
    2.: 'climbing_upstairs',
    3.: 'climbing_downstairs',
    4.: 'sitting',
    5.: 'standing',
    6.: 'laying'
}
data = data.filter_by(list(target_map.keys()), 'activity_id')

# 活動ラベルをactivity列に追加
data['activity'] = data['activity_id'].apply(lambda x: target_map[x])

# activity_id列を削除
data = data.remove_column('activity_id')

# 最終のデータセットの確認
data.explore()

In [6]:
# 訓練データと評価データの分割
train_data, test_data = tc.activity_classifier.util.random_split_by_session(
    data, session_id='exp_id', fraction=0.8)

The dataset has less than the minimum of 100 sessions required for train-validation split. Continuing without validation set


In [7]:
# 学習
model = tc.activity_classifier.create(
    train_data, session_id='exp_id', target='activity', prediction_window=50)

The dataset has less than the minimum of 100 sessions required for train-validation split. Continuing without validation set


Using CPU to create model
+----------------+----------------+----------------+----------------+
| Iteration      | Train Accuracy | Train Loss     | Elapsed Time   |
+----------------+----------------+----------------+----------------+
| 1              | 0.655          | 0.900          | 1.0            |
| 2              | 0.816          | 0.499          | 2.1            |
| 3              | 0.862          | 0.380          | 3.2            |
| 4              | 0.881          | 0.319          | 4.2            |
| 5              | 0.894          | 0.278          | 5.3            |
| 6              | 0.907          | 0.249          | 6.4            |
| 7              | 0.918          | 0.222          | 7.6            |
| 8              | 0.920          | 0.212          | 8.6            |
| 9              | 0.923          | 0.200          | 9.7            |
| 10             | 0.927          | 0.184          | 10.8           |
+----------------+----------------+----------------+------------

In [None]:
# 評価
metrics = model.evaluate(train_data) # test_dataがNoneのため

# 評価データの正解率
print(metrics['accuracy'])

In [None]:
# Turi Createモデルの保存
model.save('./ActivityClassification.model')

# Core ML形式のモデルの保存
model.export_coreml('./ActivityClassification.mlmodel')