In [None]:
import pandas as pd
import numpy as np
import random
import pickle
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tqdm import tqdm
from collections import Counter
from collections import OrderedDict
import matplotlib.pyplot as plt

In [None]:
src_names = ['acc_x', 'acc_y', 'acc_z', 'acc_xg', 'acc_yg', 'acc_zg', 'acc', 'acc_g']

In [None]:
def handle_features(data):
    data.drop(columns=['time_point'], inplace=True)

    data['acc'] = (data.acc_x ** 2 + data.acc_y ** 2 + data.acc_z ** 2) ** 0.5
    data['acc_g'] = (data.acc_xg ** 2 + data.acc_yg ** 2 + data.acc_zg ** 2) ** 0.5

    return data

In [None]:
# 传统padding
def handle_mats(grouped_data):
    mats = [i.values for i in grouped_data]
    max_len = 60
    for i in range(len(mats)):
        if len(mats[i]) < max_len:
            padding_size = max_len - len(mats[i])
            mats[i] = np.r_[mats[i], np.zeros([padding_size, mats[i].shape[-1]])]
        else:
            mats[i] = mats[i][:max_len]
        mats[i] = mats[i][np.newaxis, :, :]

    return np.concatenate(mats, axis=0)

In [None]:
sensor_train = pd.read_csv('../dataset/sensor_train_final.csv')
sensor_test = pd.read_csv('../dataset/sensor_test_final.csv')

In [None]:
train_data = handle_features(sensor_train)
test_data = handle_features(sensor_test)

scaler = StandardScaler()
train_data[src_names] = scaler.fit_transform(train_data[src_names])
test_data[src_names] = scaler.transform(test_data[src_names])

train_data_grouped = [i.drop(columns='fragment_id') for _, i in train_data.groupby('fragment_id')]
train_labels = np.array([int(i.iloc[0]['behavior_id']) for i in train_data_grouped])
test_data_grouped = [i.drop(columns='fragment_id') for _, i in test_data.groupby('fragment_id')]

for i in range(len(train_data_grouped)):
    train_data_grouped[i].drop(columns='behavior_id', inplace=True)

In [None]:
train_data = handle_mats(train_data_grouped)
test_data = handle_mats(test_data_grouped)

train_data.shape, train_labels.shape, test_data.shape

In [None]:
def get_length_data(data_grouped):
    cnt = []
    for i in range(len(data_grouped)):
        cnt.append(len(data_grouped[i]))
    return np.array(cnt)

train_length_data = get_length_data(train_data_grouped)
test_length_data = get_length_data(test_data_grouped)

In [None]:
np.savez_compressed('data',
                    train_data=train_data,
                    test_data=test_data,
                    train_labels=train_labels,
                    train_length_data=train_length_data,
                    test_length_data=test_length_data
                    )