In [1]:
import os
import pandas as pd
import numpy as np
import datetime
import scipy
from scipy.stats import skew, kurtosis

In [2]:
import tensorflow as tf

In [3]:
# Read Accelerometer Data
acc_data = pd.read_csv('../data/all_accelerometer_data_pids_13.csv')

def get_time_value(x):
  # x is ms. it is divided by 1000 to get microservond
  t = datetime.datetime.fromtimestamp(x/1000.0)
  t = t.replace(microsecond = 0)
  return int(t.timestamp())

acc_data['window10'] = acc_data['time'].apply(get_time_value)
acc_data = acc_data.drop(columns="time")
acc_data = acc_data.rename(columns = {"window10": "time"})

acc_data.head()

Unnamed: 0,pid,x,y,z,time
0,JB3156,0.0,0.0,0.0,0
1,CC6740,0.0,0.0,0.0,0
2,SA0297,0.0758,0.0273,-0.0102,1493733882
3,SA0297,-0.0359,0.0794,0.0037,1493733882
4,SA0297,-0.2427,-0.0861,-0.0163,1493733882


In [4]:

# Read clean tac data for pid = BK7610
clean_tac_data = pd.read_csv('../data/clean_tac/BK7610_clean_TAC.csv')
clean_tac_data["tac"] = np.where(clean_tac_data["TAC_Reading"] > 0.08, 1, 0)
clean_tac_data = clean_tac_data.drop(columns="TAC_Reading")
clean_tac_data = clean_tac_data.rename(columns={"tac": "TAC_Reading"})
clean_tac_data.describe()


Unnamed: 0,timestamp,TAC_Reading
count,57.0,57.0
mean,1493758000.0,0.22807
std,28415.95,0.423318
min,1493719000.0,0.0
25%,1493729000.0,0.0
50%,1493756000.0,0.0
75%,1493782000.0,0.0
max,1493808000.0,1.0


In [5]:
# Filtering for specific PID (temps)
acc_data_pid = acc_data[acc_data.pid == "BK7610"]
acc_data_pid

Unnamed: 0,pid,x,y,z,time
47136,BK7610,0.1261,-0.0078,-0.0243,1493735870
47138,BK7610,0.1336,-0.0697,-0.0446,1493735870
47140,BK7610,0.1443,-0.0474,-0.0447,1493735870
47142,BK7610,0.1255,-0.0038,0.0111,1493735870
47144,BK7610,0.1076,0.0032,0.0276,1493735870
...,...,...,...,...,...
6071104,BK7610,-0.0784,-0.0161,0.1719,1493767770
6071108,BK7610,-0.0395,-0.0816,0.1634,1493767770
6071112,BK7610,0.0160,-0.0853,0.0906,1493767770
6071117,BK7610,0.0901,-0.0767,0.0162,1493767770


In [6]:
# Up sampling tac data to match acc data
clean_ts = clean_tac_data['timestamp'] 
acc_ts = acc_data_pid['time']
all_labels = list()
offset_tac, offset_acc = 0, 0
# print(acc_ts.iloc[0])
# print(clean_ts.loc[0])
# print(clean_tac_data.loc[0]['TAC_Reading'])
# # acc_ts.iloc[0] #1493735870653
while offset_tac < len(clean_ts) and offset_acc < len(acc_ts):
  
  while acc_ts.iloc[offset_acc] < clean_ts.iloc[offset_tac]:
    all_labels.append([clean_tac_data.iloc[offset_tac]['TAC_Reading'], acc_ts.iloc[offset_acc]])
    offset_acc += 1
    if offset_acc >= len(acc_ts):
      break

  offset_tac += 1

all_labels


[[0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735872],
 [0, 14937

In [7]:
all_labels_df = pd.DataFrame(all_labels, columns = ["tac", "time"])
all_labels_df.shape, acc_data_pid.shape

# merged = merged.drop_duplicates().reset_index(drop=True)
# merged.to_csv("../data/BK7610_final_final.csv")

((1225727, 2), (1225727, 5))

In [8]:
# merged = acc_data_pid.head(10).merge(all_labels_df.head(10), on = 'time', how='inner')
# merged['time'].value_counts()

In [9]:
# len(all_labels_df['time'].unique()), len(acc_data_pid['time'].unique())

In [10]:
# acc_data_pid['tac_reading'] = 
clean_tac_data["from"] = clean_tac_data["timestamp"].shift(1, fill_value=-1) + 1


In [11]:
clean_tac_data.index = pd.IntervalIndex.from_arrays(clean_tac_data["from"], clean_tac_data["timestamp"], closed = "both")


In [12]:
acc_data_pid['tac'] = acc_data_pid["time"].apply(lambda x: clean_tac_data.iloc[clean_tac_data.index.get_loc(x)]["TAC_Reading"])
acc_data_pid

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_data_pid['tac'] = acc_data_pid["time"].apply(lambda x: clean_tac_data.iloc[clean_tac_data.index.get_loc(x)]["TAC_Reading"])


Unnamed: 0,pid,x,y,z,time,tac
47136,BK7610,0.1261,-0.0078,-0.0243,1493735870,0
47138,BK7610,0.1336,-0.0697,-0.0446,1493735870,0
47140,BK7610,0.1443,-0.0474,-0.0447,1493735870,0
47142,BK7610,0.1255,-0.0038,0.0111,1493735870,0
47144,BK7610,0.1076,0.0032,0.0276,1493735870,0
...,...,...,...,...,...,...
6071104,BK7610,-0.0784,-0.0161,0.1719,1493767770,1
6071108,BK7610,-0.0395,-0.0816,0.1634,1493767770,1
6071112,BK7610,0.0160,-0.0853,0.0906,1493767770,1
6071117,BK7610,0.0901,-0.0767,0.0162,1493767770,1


In [13]:
# print(acc_data_pid["tac"].unique().sort())
# clean_tac_data
# min: 1,493,718,714
# max: 1,493,807,899
# 
# acc_data_pid
# min: 1,493,735,870
# max: 1,493,767,770
# acc_data_pid["time"].max()


In [14]:
# TODO: Make n = 10 after either removing one record which has 7 records for a second or by adding 3 dummy values to it (latter is better)
# frame_temp.groupby([ "pid", "window10"]).count().describe()
# We are sampling with replacement, which should be okay since it is within a second
acc_data_pid_20s = acc_data_pid.groupby([ "pid", "time"]).sample(n = 20, replace=True)


In [15]:
acc_data_pid_20s

Unnamed: 0,pid,x,y,z,time,tac
47160,BK7610,0.0918,-0.0076,-0.0020,1493735870,0
47148,BK7610,0.1085,-0.0282,-0.0035,1493735870,0
47154,BK7610,0.1137,0.0097,-0.0038,1493735870,0
47138,BK7610,0.1336,-0.0697,-0.0446,1493735870,0
47142,BK7610,0.1255,-0.0038,0.0111,1493735870,0
...,...,...,...,...,...,...
6071071,BK7610,-0.0477,0.0720,-0.0905,1493767770,1
6071050,BK7610,0.0102,-0.2743,-0.0269,1493767770,1
6071104,BK7610,-0.0784,-0.0161,0.1719,1493767770,1
6071066,BK7610,-0.0465,0.0261,-0.3331,1493767770,1


In [16]:
# # x_sliding_window = np.lib.stride_tricks.sliding_window_view(data[:, 1], window_shape = 10)
# # x_sliding_window.shape
pids = ["BK7610"]
final = []
labels = []
for pid in pids:
  temptemp = acc_data_pid_20s[acc_data_pid_20s['pid'] == pid]
  times = temptemp.time.unique()
  final_temp =[]
  labels_temp = []
  for time in times:
    # x = np.lib.stride_tricks.sliding_window_view(frame_temp2[frame_temp2.pid == pid and frame_temp2.window10 == time], window_shape = 10)
    temptemptemp = temptemp[temptemp['time'] == time]
    # TODO: Create x y z sliding windows
    # x = np.lib.stride_tricks.sliding_window_view(temptemp["x"], window_shape = 200)
    # y = np.lib.stride_tricks.sliding_window_view(temptemp["y"], window_shape = 200)
    # z = np.lib.stride_tricks.sliding_window_view(temptemp["z"], window_shape = 200)
    a = np.vstack((temptemptemp["x"], temptemptemp["y"], temptemptemp["z"]))
    final_temp.append(a)
    labels_temp.append(temptemptemp.head(1)["tac"])
  final.append(np.array(final_temp))
  labels.append(np.array(labels_temp))
  # print(final)
  
  # break
# print(np.array(final,dtype=object).shape)


In [17]:
final_arr = np.asarray(final).astype('float32')
labels_arr = np.asarray(labels).astype('float32')
print(final_arr.shape)
print(labels_arr.shape)



(1, 30735, 3, 20)
(1, 30735, 1)


In [21]:
final_arr_reshape = np.reshape(final_arr, (30735, 3, 20))
labels_arr_reshape = np.reshape(labels_arr, (30735))
final_arr_reshape.shape, labels_arr_reshape.shape 

((30735, 3, 20), (30735,))

In [77]:
batch_size = 30735
epochs = 1 
# prep_data = pd.read_csv('../data/good_again_bhas.csv')
flatten = tf.keras.layers.Flatten()
conv_layer1 = tf.keras.layers.Conv1D(filters = 64, kernel_size = 3,padding='SAME')
conv_layer2 = tf.keras.layers.Conv1D(filters = 64, kernel_size = 3, padding='SAME')
dropout = tf.keras.layers.Dropout(0.5)
max_pooling = tf.keras.layers.MaxPool1D(pool_size=2)
# fc - fully connected layer
fc_layer = tf.keras.layers.Dense(units=128, activation = 'leaky_relu')
base_model = tf.keras.Sequential([
    # flatten, 
                                  conv_layer1, 
                                  conv_layer2, 
                                  dropout, 
                                  max_pooling, 
                                  fc_layer])

base_model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), \
                   optimizer=tf.keras.optimizers.Adam(), \
                   metrics=[tf.keras.metrics.Accuracy()])

In [78]:
base_model.build((30735, 3, 20))

In [79]:
base_model.summary()

Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_38 (Conv1D)          (30735, 3, 64)            3904      
                                                                 
 conv1d_39 (Conv1D)          (30735, 3, 64)            12352     
                                                                 
 dropout_19 (Dropout)        (30735, 3, 64)            0         
                                                                 
 dense_19 (Dense)            (30735, 3, 128)           8320      
                                                                 
Total params: 24,576
Trainable params: 24,576
Non-trainable params: 0
_________________________________________________________________


In [61]:
base_model.fit(final_arr_reshape, labels_arr_reshape, epochs = epochs,
            #    batch_size = batch_size, 
               verbose=1)
loss, accuracy = base_model.evaluate(final_arr_reshape, labels_arr_reshape,
                                    #  batch_size = batch_size, 
                                     verbose=0)

loss, accuracy 

ValueError: in user code:

    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/engine/training.py", line 1028, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/engine/training.py", line 1122, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 605, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/utils/metrics_utils.py", line 77, in decorated
        update_op = update_state_fn(*args, **kwargs)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/metrics/base_metric.py", line 140, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/metrics/base_metric.py", line 691, in update_state  **
        matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/sarah_prakriti_peters/miniconda3/envs/DL/lib/python3.10/site-packages/keras/metrics/metrics.py", line 3571, in accuracy  **
        y_true.shape.assert_is_compatible_with(y_pred.shape)

    ValueError: Shapes (None, 1) and (None, 128) are incompatible
