In [74]:
import os
import pandas as pd
import numpy as np
import datetime
import scipy
from scipy.stats import skew, kurtosis
import tensorflow as tf

In [199]:
# Read Accelerometer Data
acc_data = pd.read_csv('../data/all_accelerometer_data_pids_13.csv')


def get_time_value(x):
  # x is ms. it is divided by 1000 to get microsecond
  t = datetime.datetime.fromtimestamp(x/1000.0)
  t = t.replace(microsecond = 0)
  return int(t.timestamp())

acc_data['window10'] = acc_data['time'].apply(get_time_value)
acc_data = acc_data.drop(columns="time")
acc_data = acc_data.rename(columns = {"window10": "time"})

acc_data.head()

Unnamed: 0,pid,x,y,z,time
0,JB3156,0.0,0.0,0.0,0
1,CC6740,0.0,0.0,0.0,0
2,SA0297,0.0758,0.0273,-0.0102,1493733882
3,SA0297,-0.0359,0.0794,0.0037,1493733882
4,SA0297,-0.2427,-0.0861,-0.0163,1493733882


In [470]:
pids = acc_data['pid'].unique()
pids

array(['JB3156', 'CC6740', 'SA0297', 'PC6771', 'BK7610', 'DC6359',
       'MC7070', 'MJ8002', 'BU4707', 'JR8022', 'HV0618', 'SF3079',
       'DK3500'], dtype=object)

In [201]:
# Read clean tac data for pid = BK7610
clean_tac_data = pd.DataFrame()
for pid in pids:
    temp = pd.read_csv('../data/clean_tac/' + pid + '_clean_TAC.csv')
    # clean_tac_data = pd.read_csv('../data/clean_tac/JB3156_clean_TAC.csv')
    clean_tac_data.append(temp)
    
clean_tac_data["tac"] = np.where(clean_tac_data["TAC_Reading"] > 0.08, 1, 0)
clean_tac_data = clean_tac_data.drop(columns="TAC_Reading")
clean_tac_data = clean_tac_data.rename(columns={"tac": "TAC_Reading"})
clean_tac_data.describe()


Unnamed: 0,timestamp,TAC_Reading
count,57.0,57.0
mean,1493758000.0,0.22807
std,28415.95,0.423318
min,1493719000.0,0.0
25%,1493729000.0,0.0
50%,1493756000.0,0.0
75%,1493782000.0,0.0
max,1493808000.0,1.0


In [202]:
# Filtering for specific PID (temps)
acc_data_pid = acc_data[acc_data.pid == "BK7610"]
acc_data_pid['pid'].unique()

array(['BK7610'], dtype=object)

In [203]:
acc_data_pid.describe()

Unnamed: 0,x,y,z,time
count,1225727.0,1225727.0,1225727.0,1225727.0
mean,-0.006497703,0.007507374,0.002747567,1493752000.0
std,0.1380473,0.1387602,0.1279124,9276.766
min,-4.2748,-6.9489,-5.2772,1493736000.0
25%,-0.0094,-0.006,-0.0073,1493744000.0
50%,0.0001,0.0001,0.0057,1493752000.0
75%,0.0083,0.0094,0.0114,1493760000.0
max,6.4503,5.3441,4.6565,1493768000.0


In [204]:
acc_data_pid.shape

(1225727, 5)

In [205]:
# Up sampling tac data to match acc data
clean_ts = clean_tac_data['timestamp'] 
acc_ts = acc_data_pid['time']
all_labels = list()
offset_tac, offset_acc = 0, 0
# print(acc_ts.iloc[0])
# print(clean_ts.loc[0])
# print(clean_tac_data.loc[0]['TAC_Reading'])
# # acc_ts.iloc[0] #1493735870653
while offset_tac < len(clean_ts) and offset_acc < len(acc_ts):
  
  while acc_ts.iloc[offset_acc] < clean_ts.iloc[offset_tac]:
    all_labels.append([clean_tac_data.iloc[offset_tac]['TAC_Reading'], acc_ts.iloc[offset_acc]])
    offset_acc += 1
    if offset_acc >= len(acc_ts):
      break

  offset_tac += 1

all_labels


[[0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735870],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735871],
 [0, 1493735872],
 [0, 14937

In [206]:
all_labels_df = pd.DataFrame(all_labels, columns = ["tac", "time"])
all_labels_df.shape, acc_data_pid.shape

# merged = merged.drop_duplicates().reset_index(drop=True)
# merged.to_csv("../data/BK7610_final_final.csv")

((1225727, 2), (1225727, 5))

In [207]:
# merged = acc_data_pid.head(10).merge(all_labels_df.head(10), on = 'time', how='inner')
# merged['time'].value_counts()

In [208]:
# len(all_labels_df['time'].unique()), len(acc_data_pid['time'].unique())
clean_tac_data["timestamp"].is_monotonic_increasing

True

In [209]:
# acc_data_pid['tac_reading'] = 
# TODO: Make sure tac data is sorted on timestamp
clean_tac_data["from"] = clean_tac_data["timestamp"].shift(1, fill_value=-1) + 1

In [210]:
clean_tac_data.index = pd.IntervalIndex.from_arrays(clean_tac_data["from"], clean_tac_data["timestamp"], closed = "both")

In [211]:
acc_data_pid['tac'] = acc_data_pid["time"].apply(lambda x: clean_tac_data.iloc[clean_tac_data.index.get_loc(x)]["TAC_Reading"])
acc_data_pid

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_data_pid['tac'] = acc_data_pid["time"].apply(lambda x: clean_tac_data.iloc[clean_tac_data.index.get_loc(x)]["TAC_Reading"])


Unnamed: 0,pid,x,y,z,time,tac
47136,BK7610,0.1261,-0.0078,-0.0243,1493735870,0
47138,BK7610,0.1336,-0.0697,-0.0446,1493735870,0
47140,BK7610,0.1443,-0.0474,-0.0447,1493735870,0
47142,BK7610,0.1255,-0.0038,0.0111,1493735870,0
47144,BK7610,0.1076,0.0032,0.0276,1493735870,0
...,...,...,...,...,...,...
6071104,BK7610,-0.0784,-0.0161,0.1719,1493767770,1
6071108,BK7610,-0.0395,-0.0816,0.1634,1493767770,1
6071112,BK7610,0.0160,-0.0853,0.0906,1493767770,1
6071117,BK7610,0.0901,-0.0767,0.0162,1493767770,1


In [212]:
# print(acc_data_pid["tac"].unique().sort())
# clean_tac_data
# min: 1,493,718,714
# max: 1,493,807,899
# 
# acc_data_pid
# min: 1,493,735,870
# max: 1,493,767,770
# acc_data_pid["time"].max()
# groups = acc_data_pid.groupby(["time"])
# print(groups.apply(lambda x: x["tac"]<20))
# time_stamps = groups.apply(lambda x: x["tac"]<20)
# time_stamps 

min(acc_data_pid.index), max(acc_data_pid.index)

(47136, 6071121)

In [213]:
# TODO: Make n = 10 after either removing one record which has 7 records for a second or by adding 3 dummy values to it (latter is better)
# frame_temp.groupby([ "pid", "window10"]).count().describe()
# We are sampling with replacement, which should be okay since it is within a second
acc_data_pid_20s = acc_data_pid.groupby([ "pid", "time"]).sample(n = 20, replace=True)

In [214]:
# TODO: Make this as an assert statement in the begininng for both tac and accelerometer data
acc_data_pid_20s["time"].is_monotonic_increasing

True

In [218]:
# SLIDING WINDOW
acc_data_sliding = acc_data_pid_20s.copy()
# cols = ["x", "y", "z"]
# window_size = 10 # including current

# for col in cols:
#     cols_to_append = []
#     for i in range(0, window_size):
#         shifted_col_name = str(col) + "_" + str(i)
#         acc_data_sliding[shifted_col_name] = acc_data_sliding[col].shift(i, fill_value = 0)
#         cols_to_append.append(shifted_col_name)
    
#     # we have (windpw_size ) columsn for col
#     # Uncomment to keep original columns
#     acc_data_sliding = acc_data_sliding.drop(columns=[col])    
#     acc_data_sliding[str(col)] = acc_data_sliding[cols_to_append].values.tolist()
    
#     # acc_data_sliding = acc_data_sliding.drop(columns=cols_to_append)

In [262]:

# # x_sliding_window.shape
pids = ["BK7610"]
final = []
labels = []
for pid in pids:
  # temptemp = acc_data_pid_20s[acc_data_pid_20s['pid'] == pid]
  temptemp = acc_data_sliding[acc_data_sliding['pid'] == pid]
  times = temptemp.time.unique()
  final_temp =[]
  labels_temp = []
  for i in range(len(times)):
    # x = np.lib.stride_tricks.sliding_window_view(frame_temp2[frame_temp2.pid == pid and frame_temp2.window10 == time], window_shape = 10)
    # temptemptemp = temptemp[temptemp['time'] == time]
    time_to_filter = [times[j] if j >= 0 else -1 for j in range(i-9, i+1)]
    # print(time_to_filter)
    # if i == 10:
    #   break
    temptemptemp = temptemp[temptemp['time'].isin(time_to_filter)]
    # TODO: Create x y z sliding windows
    # x = np.lib.stride_tricks.sliding_window_view(temptemptemp["x"], window_shape = 10)
    # y = np.lib.stride_tricks.sliding_window_view(temptemptemp["y"], window_shape = 10)
    # z = np.lib.stride_tricks.sliding_window_view(temptemptemp["z"], window_shape = 10)

    # x_dash = np.array(temptemptemp[["x_"+str(i) for i in range(window_size)]]).flatten()
    # y_dash = np.array(temptemptemp[["y_"+str(i) for i in range(window_size)]]).flatten()
    # z_dash = np.array(temptemptemp[["z_"+str(i) for i in range(window_size)]]).flatten()

    x_dash = np.array(temptemptemp["x"])
    y_dash = np.array(temptemptemp["y"])
    z_dash = np.array(temptemptemp["z"])

    x_dash = np.pad(x_dash, (200 - len(x_dash), 0), "constant")
    y_dash = np.pad(y_dash, (200 - len(y_dash), 0), "constant")
    z_dash = np.pad(z_dash, (200 - len(z_dash), 0), "constant")

    # a = np.vstack((temptemptemp["x"].apply(lambda x: np.array(x, dtype="float32")), temptemptemp["y"].apply(lambda x: np.array(x, dtype="float32")), temptemptemp["z"].apply(lambda x: np.array(x, dtype="float32"))))
    a = np.transpose(np.vstack((x_dash, y_dash, z_dash)))
    final_temp.append(a)
    labels_temp.append(temptemptemp.head(1)["tac"])
  final.append(np.array(final_temp))
  labels.append(np.array(labels_temp))
  # print(final)
  
  # break
# print(np.array(final,dtype=object).shape)


In [260]:
# temptemptemp


Unnamed: 0,pid,x,y,z,time,tac
47140,BK7610,0.1443,-0.0474,-0.0447,1493735870,0
47152,BK7610,0.1133,0.0080,-0.0172,1493735870,0
47140,BK7610,0.1443,-0.0474,-0.0447,1493735870,0
47146,BK7610,0.1155,-0.0284,0.0000,1493735870,0
47138,BK7610,0.1336,-0.0697,-0.0446,1493735870,0
...,...,...,...,...,...,...
47856,BK7610,-0.0535,-0.0496,-0.0077,1493735879,0
47804,BK7610,-0.0108,-0.0559,0.0268,1493735879,0
47852,BK7610,-0.0672,-0.0969,0.0055,1493735879,0
47854,BK7610,-0.0636,-0.0894,0.0330,1493735879,0


In [263]:
# [-0.1331,  0.026 ,  0.1165, -0.0039, -0.1326, -0.1095,  0.1165,
        #  0.1467, -0.1494, -0.0233]
a.shape

(200, 3)

In [264]:
final_arr = np.asarray(final).astype('float32')

# final_arr = np.reshape(final_arr, (final_arr.shape[0], final_arr.shape[1], final_arr.shape[3], final_arr.shape[2]))

labels_arr = np.asarray(labels).astype('float32')
print(final_arr.shape)
print(labels_arr.shape)




(1, 30735, 200, 3)
(1, 30735, 1)


In [347]:
# final_arr[0][9][0:19] == final_arr[0][0][180:199]
# np.unique(labels_arr)

array([0., 1.], dtype=float32)

In [265]:
final_arr_reshape = np.reshape(final_arr, (30735, 200, 3)) 
labels_arr_reshape = np.reshape(labels_arr, (30735,1))
final_arr_reshape.shape, labels_arr_reshape.shape 



((30735, 200, 3), (30735, 1))

In [429]:
indices = range(len(final_arr_reshape))
indices = tf.random.shuffle(indices)

final_arr_reshape = tf.gather(final_arr_reshape, indices)
labels_arr_reshape = tf.gather(labels_arr_reshape, indices)



In [137]:
# from TF_Model import Drunk
# drunk = Drunk()

# drunk.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), \
#                    optimizer=tf.keras.optimizers.Adam(0.03), \
#                    metrics=[tf.keras.metrics.Accuracy()]) 

# drunk.build((30735, 600))

## Baseline Model 
#### 3 hidden layers (32 nodes, RELU, 32 nodes, RELU, 16 nodes, RELU)  
#### output layer uses sigmoid activation

In [454]:
base_model_data = np.reshape(final_arr, (30735, 200, 3)) 
base_model_labels = np.reshape(labels_arr, (30735,1))

batch_size = 32
epochs = 50
flatten = tf.keras.layers.Flatten()
fc_layer1 = tf.keras.layers.Dense(units=1024, activation = 'relu')
fc_layer2 = tf.keras.layers.Dense(units=768, activation = 'relu')
fc_layer3 = tf.keras.layers.Dense(units=512, activation = 'relu')
fc_layer4 = tf.keras.layers.Dense(units=256, activation = 'relu')
fc_layer5 = tf.keras.layers.Dense(units=128, activation = 'relu')
fc_layer6 = tf.keras.layers.Dense(units=64, activation = 'relu')
fc_layer7 = tf.keras.layers.Dense(units=32, activation = 'relu')
fc_layer8 = tf.keras.layers.Dense(units=1, activation = 'sigmoid')
# Cant use softmax at the end since it will normalize and give 1 
base_model = tf.keras.Sequential([
    flatten,
 fc_layer1, fc_layer2, 
fc_layer3, 
fc_layer4,
fc_layer5,
fc_layer6,
fc_layer7,
fc_layer8
])

base_model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits = False), \
                   optimizer=tf.keras.optimizers.Adam(0.001, beta_1=0.9, beta_2= 0.999), \
                   metrics=[tf.keras.metrics.Accuracy()]) 

base_model.build((30735, 200, 3))
base_model.summary()

Model: "sequential_78"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_76 (Flatten)        (30735, 600)              0         
                                                                 
 dense_250 (Dense)           (30735, 1024)             615424    
                                                                 
 dense_251 (Dense)           (30735, 768)              787200    
                                                                 
 dense_252 (Dense)           (30735, 512)              393728    
                                                                 
 dense_253 (Dense)           (30735, 256)              131328    
                                                                 
 dense_254 (Dense)           (30735, 128)              32896     
                                                                 
 dense_255 (Dense)           (30735, 64)             

In [455]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=100)
base_model.fit(base_model_data, base_model_labels, epochs = 1000,
               batch_size = 256, 
               verbose=1, 
               callbacks=[callback]) 

# Paper baseline model as-it-is
#                Epoch 500/500
# 961/961 [==============================] - 1s 579us/step - loss: 0.0989 - accuracy: 0.4819

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x3149ef250>

In [410]:
# base_model_data[0:32]
base_model(base_model_data[0:1])
# base_model_labels[0]

array([0.], dtype=float32)

In [468]:
loss, accuracy = base_model.evaluate(base_model_data, base_model_labels,
                                    #  batch_size = batch_size, 
                                     verbose=0)

loss,accuracy

(3.6636794931155237e-08, 0.7729299068450928)

In [459]:
base_model.save('../code/base_model_dense_layers_only.h5')

In [458]:
base_model.save('../code/base_model_dense_layers.pkl')



INFO:tensorflow:Assets written to: ../code/base_model_dense_layers.pkl/assets


INFO:tensorflow:Assets written to: ../code/base_model_dense_layers.pkl/assets


In [462]:
# Read clean tac data for pid = BK7610
clean_tac_data = pd.read_csv('../data/clean_tac/JB3156_clean_TAC.csv')
# clean_tac_data = pd.read_csv('../data/clean_tac/JB3156_clean_TAC.csv')
clean_tac_data["tac"] = np.where(clean_tac_data["TAC_Reading"] > 0.08, 1, 0)
clean_tac_data = clean_tac_data.drop(columns="TAC_Reading")
clean_tac_data = clean_tac_data.rename(columns={"tac": "TAC_Reading"})
# clean_tac_data.describe()


# Filtering for specific PID (temps)
acc_data_pid = acc_data[acc_data.pid == "JB3156"]
# acc_data_pid['pid'].unique()

# Up sampling tac data to match acc data
clean_ts = clean_tac_data['timestamp'] 
acc_ts = acc_data_pid['time']
all_labels = list()
offset_tac, offset_acc = 0, 0
# print(acc_ts.iloc[0])
# print(clean_ts.loc[0])
# print(clean_tac_data.loc[0]['TAC_Reading'])
# # acc_ts.iloc[0] #1493735870653
while offset_tac < len(clean_ts) and offset_acc < len(acc_ts):
  
  while acc_ts.iloc[offset_acc] < clean_ts.iloc[offset_tac]:
    all_labels.append([clean_tac_data.iloc[offset_tac]['TAC_Reading'], acc_ts.iloc[offset_acc]])
    offset_acc += 1
    if offset_acc >= len(acc_ts):
      break

  offset_tac += 1

# all_labels

all_labels_df = pd.DataFrame(all_labels, columns = ["tac", "time"])
# all_labels_df.shape, acc_data_pid.shape

# acc_data_pid['tac_reading'] = 
# TODO: Make sure tac data is sorted on timestamp
clean_tac_data["from"] = clean_tac_data["timestamp"].shift(1, fill_value=-1) + 1

clean_tac_data.index = pd.IntervalIndex.from_arrays(clean_tac_data["from"], clean_tac_data["timestamp"], closed = "both")
acc_data_pid['tac'] = acc_data_pid["time"].apply(lambda x: clean_tac_data.iloc[clean_tac_data.index.get_loc(x)]["TAC_Reading"])
# acc_data_pid

# TODO: Make n = 10 after either removing one record which has 7 records for a second or by adding 3 dummy values to it (latter is better)
# frame_temp.groupby([ "pid", "window10"]).count().describe()
# We are sampling with replacement, which should be okay since it is within a second
acc_data_pid_20s = acc_data_pid.groupby([ "pid", "time"]).sample(n = 20, replace=True)

acc_data_sliding = acc_data_pid_20s.copy()

# # x_sliding_window.shape
pids = ["JB3156"]
final = []
labels = []
for pid in pids:
  # temptemp = acc_data_pid_20s[acc_data_pid_20s['pid'] == pid]
  temptemp = acc_data_sliding[acc_data_sliding['pid'] == pid]
  times = temptemp.time.unique()
  final_temp =[]
  labels_temp = []
  for i in range(len(times)):
    # x = np.lib.stride_tricks.sliding_window_view(frame_temp2[frame_temp2.pid == pid and frame_temp2.window10 == time], window_shape = 10)
    # temptemptemp = temptemp[temptemp['time'] == time]
    time_to_filter = [times[j] if j >= 0 else -1 for j in range(i-9, i+1)]
    # print(time_to_filter)
    # if i == 10:
    #   break
    temptemptemp = temptemp[temptemp['time'].isin(time_to_filter)]
    # TODO: Create x y z sliding windows
    # x = np.lib.stride_tricks.sliding_window_view(temptemptemp["x"], window_shape = 10)
    # y = np.lib.stride_tricks.sliding_window_view(temptemptemp["y"], window_shape = 10)
    # z = np.lib.stride_tricks.sliding_window_view(temptemptemp["z"], window_shape = 10)

    # x_dash = np.array(temptemptemp[["x_"+str(i) for i in range(window_size)]]).flatten()
    # y_dash = np.array(temptemptemp[["y_"+str(i) for i in range(window_size)]]).flatten()
    # z_dash = np.array(temptemptemp[["z_"+str(i) for i in range(window_size)]]).flatten()

    x_dash = np.array(temptemptemp["x"])
    y_dash = np.array(temptemptemp["y"])
    z_dash = np.array(temptemptemp["z"])

    x_dash = np.pad(x_dash, (200 - len(x_dash), 0), "constant")
    y_dash = np.pad(y_dash, (200 - len(y_dash), 0), "constant")
    z_dash = np.pad(z_dash, (200 - len(z_dash), 0), "constant")

    # a = np.vstack((temptemptemp["x"].apply(lambda x: np.array(x, dtype="float32")), temptemptemp["y"].apply(lambda x: np.array(x, dtype="float32")), temptemptemp["z"].apply(lambda x: np.array(x, dtype="float32"))))
    a = np.transpose(np.vstack((x_dash, y_dash, z_dash)))
    final_temp.append(a)
    labels_temp.append(temptemptemp.head(1)["tac"])
  final.append(np.array(final_temp))
  labels.append(np.array(labels_temp))
  # print(final)
  
  # break
# print(np.array(final,dtype=object).shape)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  acc_data_pid['tac'] = acc_data_pid["time"].apply(lambda x: clean_tac_data.iloc[clean_tac_data.index.get_loc(x)]["TAC_Reading"])


In [464]:
final_arr = np.asarray(final).astype('float32')

# final_arr = np.reshape(final_arr, (final_arr.shape[0], final_arr.shape[1], final_arr.shape[3], final_arr.shape[2]))

labels_arr = np.asarray(labels).astype('float32')
print(final_arr.shape)
print(labels_arr.shape)

(1, 25559, 200, 3)
(1, 25559, 1)


In [465]:
final_arr_reshape = np.reshape(final_arr, (25559, 200, 3)) 
labels_arr_reshape = np.reshape(labels_arr, (25559,1))
final_arr_reshape.shape, labels_arr_reshape.shape 

((25559, 200, 3), (25559, 1))

In [467]:
loss, accuracy = base_model.evaluate(final_arr_reshape, labels_arr_reshape,
                                    #  batch_size = batch_size, 
                                     verbose=0)

round(loss,4), accuracy

(3410338560.0, 0.5765483975410461)

## CNN Base Model

In [124]:
# import tensorflow as tf

# class MyModel(tf.keras.Model):

#   def __init__(self):
#     super().__init__()
#     flatten = tf.keras.layers.Flatten()
#     conv_layer1 = tf.keras.layers.Conv1D(filters = 64, kernel_size = 3)
#     conv_layer2 = tf.keras.layers.Conv1D(filters = 64, kernel_size = 3)
#     dropout = tf.keras.layers.Dropout(0.5)
#     max_pooling = tf.keras.layers.MaxPool1D(pool_size=2)
#     # fc - fully connected layer
#     fc_layer = tf.keras.layers.Dense(units=64, activation = 'relu')
#     fc_layer2 = tf.keras.layers.Dense(units=1, activation = 'softmax') 
#     self.base_model = tf.keras.Sequential([
#                                   conv_layer1, 
#                                   conv_layer2, 
#                                   dropout, 
#                                   max_pooling, 
#                                   # flatten, 
#                                   fc_layer, 
#                                   fc_layer2
#                                 ])


#   def call(self, inputs, training=True):
#     return self.base_model(inputs)
    
#     # if training:
#     #   x = self.dropout(x, training=training)
#     # return self.dense2(x)

# model = MyModel()

In [125]:
# model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), \
#                    optimizer=tf.keras.optimizers.Adam(0.001), \
#                    metrics=[tf.keras.metrics.Accuracy()]) 

# model.build((30735, 3, 200))
# model.summary()

In [126]:
# model.fit(final_arr_reshape, labels_arr_reshape, epochs = 100,
#             #    batch_size = batch_size, 
#                verbose=1) 

In [388]:
batch_size = 32
epochs = 50
# prep_data = pd.read_csv('../data/good_again_bhas.csv')
flatten = tf.keras.layers.Flatten()
conv_layer1 = tf.keras.layers.Conv1D(filters = 64, kernel_size = 3, padding='SAME')
conv_layer2 = tf.keras.layers.Conv1D(filters = 64, kernel_size = 3, padding='SAME')
dropout = tf.keras.layers.Dropout(0.5)
max_pooling = tf.keras.layers.MaxPool1D(pool_size=2)
# fc - fully connected layer
fc_layer = tf.keras.layers.Dense(units=128, activation = 'relu')
fc_layer2 = tf.keras.layers.Dense(units=1, activation = 'softmax')
base_model = tf.keras.Sequential([
                                  conv_layer1,  
                                  conv_layer2, 
                                  dropout, 
                                  max_pooling, 
                                  flatten, 
                                  fc_layer, 
                                  fc_layer2
                                ])

# base_model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False, label_smoothing=0.0001), \
base_model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), \
                   optimizer=tf.keras.optimizers.Adam(0.001), \
                   metrics=[tf.keras.metrics.Accuracy()]) 

In [389]:
# loss_func = tf.keras.losses.BinaryCrossentropy(from_logits=False)
# optimizer = tf.keras.optimizers.Adam(0.001)

# for e in range(0, 5000):
#     with tf.GradientTape() as tape:
#         preds = base_model(final_arr_reshape)
#         print(preds)
        
#         # loss = loss_func(y_true=labels_arr_reshape, y_pred=preds)
#         # acc = np.sum(np.equal(labels, preds)) / 30735
#     #     print(acc)
#     # gradients = tape.gradient(loss, base_model.trainable_variables)
#     # optimizer.apply_gradients(zip(gradients, base_model.trainable_variables))
#     # print(f"Epoch: {e} | LOSS : {loss} | acc {acc}")

# # loss
            
# # total_loss += loss
# # gradients = tape.gradient(loss, model.trainable_variables)
# #     optimizer.apply_gradients(zip(gradients, model.trainable_variables))
# #     return total_loss


In [390]:
base_model.build((30735, 200, 3))

In [391]:
base_model.summary()

Model: "sequential_57"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_48 (Conv1D)          (30735, 200, 64)          640       
                                                                 
 conv1d_49 (Conv1D)          (30735, 200, 64)          12352     
                                                                 
 dropout_31 (Dropout)        (30735, 200, 64)          0         
                                                                 
 max_pooling1d_24 (MaxPoolin  (30735, 100, 64)         0         
 g1D)                                                            
                                                                 
 flatten_55 (Flatten)        (30735, 6400)             0         
                                                                 
 dense_140 (Dense)           (30735, 128)              819328    
                                                     

In [392]:
np.unique(labels_arr, return_counts=True)

(array([0., 1.], dtype=float32), array([11238, 19497]))

In [393]:
print(all_labels_df.shape)
all_labels_df['tac'].value_counts()


(1225727, 2)


tac
1    778034
0    447693
Name: count, dtype: int64

In [394]:
base_model.fit(final_arr_reshape, labels_arr_reshape, epochs = epochs,
               batch_size = batch_size, 
               verbose=1) 

# base_model.fit(base_model_data, base_model_labels, epochs = 100,
#             #    batch_size = batch_size, 
#                verbose=1) 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
136/961 [===>..........................] - ETA: 5s - loss: 0.5739 - accuracy: 0.6321

KeyboardInterrupt: 

In [150]:
# loss, accuracy = base_model.evaluate(final_arr_reshape, labels_arr_reshape,
#                                     #  batch_size = batch_size, 
#                                      verbose=0)

loss, accuracy = base_model.evaluate(base_model_data, base_model_labels,
                                    #  batch_size = batch_size, 
                                     verbose=0)

round(loss,4), accuracy

(0.5554, 0.6346510648727417)

## MLP Classifier

In [416]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
clf = MLPClassifier(solver='adam', shuffle=True, random_state=1)

In [417]:
final_arr_mlp = np.reshape(final_arr, (30735, 600))
labels_arr_mlp = np.reshape(labels_arr, (30735,1))

clf.fit(final_arr_mlp, labels_arr_mlp)
clf.get_params()

# print('Accuracy ', accuracy_score(y_test, clf.predict(x_test)))

  y = column_or_1d(y, warn=True)


{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': 1,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [278]:
print('Accuracy ', accuracy_score(labels_arr_mlp, clf.predict(final_arr_mlp)))

Accuracy  0.9113714006832602


## LSTM Model 
#### LSTM Layer (128 units), Dropout layer (p=0.5), Dense layer (128 units)

In [195]:
base_model_data = np.reshape(final_arr, (30735, 200, 3)) 
base_model_labels = np.reshape(labels_arr, (30735,1))

batch_size = 32
epochs = 50
# prep_data = pd.read_csv('../data/good_again_bhas.csv')
flatten = tf.keras.layers.Flatten()
lstm_layer = tf.keras.layers.LSTM(units=128)
dropout_layer = tf.keras.layers.Dropout(0.5)
fc_layer = tf.keras.layers.Dense(units=128, activation='relu', kernel_regularizer = 'l2')
fc_layer2 = tf.keras.layers.Dense(units=1, activation='softmax')
lstm_model = tf.keras.Sequential([lstm_layer, dropout_layer,
                                # flatten, 
                                fc_layer, fc_layer2])

lstm_model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), \
                   optimizer=tf.keras.optimizers.Adam(0.03), \
                   metrics=[tf.keras.metrics.Accuracy()]) 

lstm_model.build((30735, 200, 3))
lstm_model.summary()

Model: "sequential_29"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (30735, 128)              67584     
                                                                 
 dropout_23 (Dropout)        (30735, 128)              0         
                                                                 
 dense_66 (Dense)            (30735, 128)              16512     
                                                                 
 dense_67 (Dense)            (30735, 1)                129       
                                                                 
Total params: 84,225
Trainable params: 84,225
Non-trainable params: 0
_________________________________________________________________


In [196]:
lstm_model.fit(base_model_data, base_model_labels, epochs = 100,
            #    batch_size = batch_size, 
               verbose=1) 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

KeyboardInterrupt: 

Bad pipe message: %s [b'\xb9\xe3\xb2\x16\x96\xe7\xae\xcd\xd3\x02\x08M\xd6\xdc\xa5u\xeb\xfd \x11\t\xdc@ \xa9>\xb3\xbb\x88\x18\x94\xca!B\xab\xbeaS\xb6\x88;\x00\xf5\x15\xc2\x13A\xd5M3@\x00 ::\x13\x01\x13\x02\x13\x03\xc0+\xc0/\xc0,\xc00\xcc\xa9\xcc\xa8\xc0\x13\xc0\x14\x00\x9c\x00\x9d\x00/\x005\x01\x00\x01\x93\xea\xea\x00\x00\x00\x0b\x00\x02\x01\x00\x00\x17\x00\x00\x00\n\x00\n\x00\x08zz\x00\x1d\x00\x17\x00\x18\x003\x00+\x00)zz\x00\x01\x00\x00\x1d\x00 lQ\x8a\x99\x04\xaf\xc1\x96\x83\x0f\xa5,(\xde1\x18\x9d\xb1JEZ"P\xf5\xba\xf7\xfdQy\xe6{W\x00\x10\x00\x0e\x00\x0c\x02h2\x08http/1.1\x00\x12\x00\x00\xff\x01\x00\x01\x00\x00\x05\x00\x05\x01\x00\x00\x00\x00\x00\x1b\x00\x03\x02\x00\x02\x00-\x00\x02\x01\x01\x00+\x00\x07\x06jj\x03\x04\x03\x03\x00#\x00\x00Di\x00\x05\x00\x03\x02']
Bad pipe message: %s [b'\xfa}\xe5\xe9v\r\xc4a\x8c\xa0\x9d\x8d\xb6a\xb1$\xbcG \x12\x9c2\xab\xb0\xe0\xbd3\x00\x8d\xab\xc5\x86\x81b.<\xbb\xaa\xb6\xd7\xb6\xc7\x16+\xb6f\n\x9d\x91a\xe4\x00 zz\x13\x01\x13\x02\x13\x03\xc0+\xc0/\xc0,\xc

In [None]:
loss, accuracy = lstm_model.evaluate(base_model_data, base_model_labels,
                                    #  batch_size = batch_size, 
                                     verbose=1)

round(loss,4),accuracy



(0.5258, 0.6346510648727417)

Bad pipe message: %s [b'\x02P\xcep\xd4\x9d\xebA\x8a7+\x9a+\xc0\xbe\x08\xa8\x9d 2\xd4\xb9\xf5;\xc4\xe0\xde\xdeeKk\x1e\x01\x8e\xc3\x86\xb5\x03\x0e\x8e\x83\x82@\xfe\x06\x863Z\x91V\x87\x00 **\x13\x01\x13\x02\x13\x03\xc0+\xc0/\xc0,\xc00\xcc\xa9\xcc\xa8\xc0\x13\xc0\x14\x00\x9c\x00\x9d\x00/\x005\x01\x00\x01\x93\xba\xba\x00\x00\x00\x05\x00\x05\x01\x00\x00\x00\x00\x00\x1b\x00\x03\x02\x00\x02\x00\x17\x00\x00\x00\n\x00\n\x00\x08\x8a\x8a\x00\x1d\x00\x17\x00\x18\xff\x01\x00\x01\x00Di\x00\x05\x00\x03\x02h2\x00\r\x00\x12\x00\x10\x04\x03\x08\x04\x04\x01\x05\x03\x08\x05\x05\x01\x08\x06\x06\x01\x003\x00+\x00)\x8a\x8a\x00\x01\x00\x00\x1d\x00 \x15\xbb\xcc+\xdf\xd6R\xc2\xf4i\x96\xd2{x^\xe4#\xd4>\x9cq\xb6[']
Bad pipe message: %s [b'\x8e\x16\xea\xe5\x02\x89\x08\x00+\x00\x07\x06\xba\xba\x03\x04\x03\x03\x00\x0b\x00\x02\x01\x00\x00\x10\x00\x0e\x00\x0c\x02h2\x08http/1.1\x00-\x00\x02\x01\x01\x00\x12\x00\x00\x00#\x00\x00\xfa\xfa\x00\x01\x00\x00\x15\x00\xe0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0