In [1]:
import tempfile
from boto3_utilities import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
BUCKET_NAME = "sensor-data-keisuke-nakata"

In [3]:
SAMPLE = 64
DIM = 8

In [4]:
def sensor_reorder():
    arr = []
    for i in range(SAMPLE*DIM):
        arr.append(((i%8) * SAMPLE) + i//DIM)
    return arr

In [5]:
def read_df(key_name, label):
    # Load from S3
    df = read_s3_file(BUCKET_NAME, key_name)
    df = df.reset_index()
    df.columns = ['dt', 'grp', 'cnt', 'sample', '0', 'emg0', '1', 'emg1', 'acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'label']
    df = df.drop(['0', '1'], axis=1)

    # Pre-Process
    sensor_list = df.groupby("grp")["emg0"].apply(list)\
                + df.groupby("grp")["emg1"].apply(list)\
                + df.groupby("grp")["acc_x"].apply(list)\
                + df.groupby("grp")["acc_y"].apply(list)\
                + df.groupby("grp")["acc_z"].apply(list)\
                + df.groupby("grp")["gyro_x"].apply(list)\
                + df.groupby("grp")["gyro_y"].apply(list)\
                + df.groupby("grp")["gyro_z"].apply(list)
    df2 = pd.DataFrame(sensor_list).reset_index()[0]

    df2 = pd.DataFrame(df2.values.tolist())
    df2 = df2.reindex(sensor_reorder(), axis='columns')
    df2.columns = range(df2.shape[1])
    
    df2['label'] = label

    return df2

# None

In [6]:
# df_none1 = read_df("sample=64/label=none/dt=2022/02/19/16-08-49.csv", 0)
# df_none2 = read_df("sample=64/label=none/dt=2022/02/19/19-13-35.csv", 0)
# df_none = pd.concat([df_none1, df_none2], axis=0)

df_none1 = read_df("sample=64/label=none/dt=2022/02/19/19-13-35.csv", 0)
df_none2 = read_df("sample=64/label=none/dt=2022/02/23/11-55-00.csv", 0)
df_none = pd.concat([df_none1, df_none2], axis=0)


print(len(df_none))
df_none.head()

2027


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.012397,0.08059,0.040756,0.025315,0.016159,0.063413,25.428345,13.831261,9.642627,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.773739,0.063294,0.243524,0.006403,0.012579,0.037954,5.211552,0.731048,0.433987,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.077371,0.324197,0.130279,0.022728,0.014271,0.027002,1.193998,4.043405,3.474844,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.435667,0.402925,0.542318,0.029584,0.035069,0.03209,3.757797,1.206698,2.36699,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.117059,0.2657,0.229145,0.036241,0.127177,0.098117,24.174219,6.995932,3.091979,0


In [7]:
write_df_to_csv_on_s3(BUCKET_NAME, df_none, "sample=64/label=none/data.csv")

# Grab

In [8]:
df_grab = read_df("sample=64/label=grab/dt=2022/02/19/17-34-50.csv", 1)

print(len(df_grab))
df_grab.head()

1004


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.51485,0.06031,0.451891,0.087562,0.047981,0.045199,2.236968,1.859136,2.0736,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.096322,0.126851,0.272404,0.039693,0.028119,0.089124,11.86024,6.608422,2.337377,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.23231,0.124255,0.497477,0.044399,0.130649,0.063458,10.210983,15.1022,7.0531,1
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.496189,0.051641,0.086383,0.006725,0.075905,0.063669,11.971022,1.154183,4.446785,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.790484,0.093335,0.233175,0.044683,0.057064,0.024375,26.851717,12.549749,2.662844,1


In [9]:
write_df_to_csv_on_s3(BUCKET_NAME, df_grab, "sample=64/label=grab/data.csv")

# Handle

In [10]:
df_handle = read_df("sample=64/label=handle/dt=2022/02/19/16-51-42.csv", 2)

print(len(df_handle))
df_handle.head()

1006


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.429079,0.021672,0.109331,0.014774,0.016119,0.008815,0.897211,0.963814,0.500124,2
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.564194,0.057501,0.272598,0.032933,0.013745,0.026177,1.65194,0.87944,0.448017,2
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.929246,0.157881,0.087566,0.159987,0.110169,0.040757,9.286782,0.527131,2.480802,2
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,13.090975,0.079541,0.549131,0.01425,0.103749,0.06047,3.428333,1.849184,15.144716,2
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.670585,0.048521,0.05171,0.095814,0.097795,0.231317,9.511294,4.600858,3.380875,2


In [11]:
write_df_to_csv_on_s3(BUCKET_NAME, df_handle, "sample=64/label=handle/data.csv")

# Lock

In [12]:
df_lock = read_df("sample=64/label=lock/dt=2022/02/21/09-20-24.csv", 3)

print(len(df_lock))
df_lock.head()

1004


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.538862,0.132694,0.021055,0.034184,0.022378,0.015991,1.783718,2.726354,1.506741,3
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.078501,0.026402,0.12794,0.009382,0.049275,0.019509,4.535282,0.225915,0.65956,3
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,15.528671,0.308789,0.119219,0.032985,0.05863,0.086929,11.462497,1.918011,16.151207,3
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.654578,0.390068,0.088091,0.079491,0.108783,0.04703,17.628497,3.368139,3.81772,3
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.992326,0.042696,0.112864,0.004066,0.085362,0.072171,11.105656,3.307806,0.834401,3


In [13]:
write_df_to_csv_on_s3(BUCKET_NAME, df_lock, "sample=64/label=lock/data.csv")

# Door

In [14]:
df_door = read_df("sample=64/label=door/dt=2022/02/20/19-56-32.csv", 4)

print(len(df_door))
df_door.head()

1003


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.848761,0.032495,0.142261,0.050034,0.013785,0.013714,4.573199,2.612709,1.245714,4
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.905711,0.245502,0.251235,0.024869,0.036746,0.031317,3.703894,0.945003,0.395315,4
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.52889,0.172396,0.342532,0.091032,0.132922,0.074418,15.417963,1.754968,5.152563,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.62786,0.231849,0.336724,0.023492,0.020519,0.042313,5.038674,0.362818,10.747716,4
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.044365,0.102196,0.336066,0.129451,0.206295,0.008039,5.254551,4.712443,9.086397,4


In [15]:
write_df_to_csv_on_s3(BUCKET_NAME, df_door, "sample=64/label=lock/door.csv")

# Merge Data

In [16]:
df_merged = pd.concat([df_none, df_grab, df_handle, df_lock, df_door], axis=0)
df_merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.012397,0.08059,0.040756,0.025315,0.016159,0.063413,25.428345,13.831261,9.642627,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.773739,0.063294,0.243524,0.006403,0.012579,0.037954,5.211552,0.731048,0.433987,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.077371,0.324197,0.130279,0.022728,0.014271,0.027002,1.193998,4.043405,3.474844,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.435667,0.402925,0.542318,0.029584,0.035069,0.03209,3.757797,1.206698,2.36699,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.117059,0.2657,0.229145,0.036241,0.127177,0.098117,24.174219,6.995932,3.091979,0


In [17]:
df_merged.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
count,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,...,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0,6044.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.792002,0.131739,0.205323,0.060266,0.07199,0.071838,8.004973,3.903381,3.757607,1.661152
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.203839,0.100099,0.168152,0.075971,0.083162,0.097157,7.969183,4.071467,4.211343,1.490998
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.008577,0.002469,0.001791,0.000214,8.4e-05,0.000148,0.003966,0.019478,0.025372,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.960917,0.065077,0.087414,0.012709,0.016468,0.016157,1.943458,1.020482,0.932205,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.251963,0.105947,0.150753,0.034805,0.045505,0.041132,5.783857,2.526123,2.245849,1.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.087025,0.166028,0.275008,0.077802,0.096078,0.086711,11.274474,5.129936,4.975392,3.0
max,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,44.827453,0.83254,1.397792,0.914485,0.862436,1.16945,66.209615,36.451797,44.039687,4.0


In [18]:
# separate label
df_merged_label = df_merged['label']
df_merged_without_label = df_merged.drop("label", axis=1)

df_merged_without_label.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,502,503,504,505,506,507,508,509,510,511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14.934162,10.012397,0.08059,0.040756,0.025315,0.016159,0.063413,25.428345,13.831261,9.642627
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.102531,0.773739,0.063294,0.243524,0.006403,0.012579,0.037954,5.211552,0.731048,0.433987
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.193474,3.077371,0.324197,0.130279,0.022728,0.014271,0.027002,1.193998,4.043405,3.474844
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.232094,2.435667,0.402925,0.542318,0.029584,0.035069,0.03209,3.757797,1.206698,2.36699
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.974184,2.117059,0.2657,0.229145,0.036241,0.127177,0.098117,24.174219,6.995932,3.091979


In [19]:
arr_merged_data = df_merged_without_label.values
print(len(arr_merged_data))
print(arr_merged_data)

6044
[[ 0.        0.        0.       ... 25.428345 13.831261  9.642627]
 [ 0.        0.        0.       ...  5.211552  0.731048  0.433987]
 [ 0.        0.        0.       ...  1.193998  4.043405  3.474844]
 ...
 [ 0.        0.        0.       ... 14.593639  4.085609  7.912927]
 [ 0.        0.        0.       ...  2.353552  2.745436  2.79104 ]
 [ 0.        0.        0.       ...  2.481248 13.256127  1.147522]]


In [20]:
# category transform
le = LabelEncoder()
arr_merged_label = le.fit_transform(df_merged_label)
arr_merged_label = np.array(list(map(lambda x:np.array([int(x)]), arr_merged_label)))
print(len(arr_merged_label))

6044


In [21]:
windowsize = SAMPLE

arr_merged_data = arr_merged_data.astype('float32')
x_train, x_test, y_train, y_test = train_test_split(arr_merged_data, arr_merged_label, stratify=df_merged_label)

x_train = x_train.reshape(x_train.shape[0], windowsize, DIM, 1)
x_test = x_test.reshape(x_test.shape[0], windowsize, DIM, 1)
# y_train = y_train.reshape(y_train.shape[0], windowsize, DIM, 1)
# y_test = y_test.reshape(y_test.shape[0], windowsize, DIM, 1)

print(x_train.shape, x_test.shape)
print(y_train.shape, y_test.shape)

(4533, 64, 8, 1) (1511, 64, 8, 1)
(4533, 1) (1511, 1)


# Save numpy array

In [22]:
def nparr_save(nparr, key):
    s3 = boto3.resource('s3')
    with tempfile.TemporaryFile() as temp:
      np.save(temp, nparr)
      temp.seek(0)
      res = s3.Object(bucket_name=BUCKET_NAME, key=key).upload_fileobj(temp)

In [23]:
nparr_save(x_train, "sample=64/train_data.npy")
nparr_save(y_train, "sample=64/train_labels.npy")
nparr_save(x_test, "sample=64/eval_data.npy")
nparr_save(y_test, "sample=64/eval_labels.npy")

# Save CSV

In [24]:
write_df_to_csv_on_s3(BUCKET_NAME, df_merged, 'sample=64/merged.csv')
df_merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.012397,0.08059,0.040756,0.025315,0.016159,0.063413,25.428345,13.831261,9.642627,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.773739,0.063294,0.243524,0.006403,0.012579,0.037954,5.211552,0.731048,0.433987,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.077371,0.324197,0.130279,0.022728,0.014271,0.027002,1.193998,4.043405,3.474844,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.435667,0.402925,0.542318,0.029584,0.035069,0.03209,3.757797,1.206698,2.36699,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.117059,0.2657,0.229145,0.036241,0.127177,0.098117,24.174219,6.995932,3.091979,0
