In [1]:
import os
import pandas as pd
import numpy as np
import datetime
import scipy
from scipy.stats import skew, kurtosis

In [144]:
# Read Accelerometer Data
acc_data = pd.read_csv('../data/all_accelerometer_data_pids_13.csv')

def get_time_value(x):
  # x is ms. it is divided by 1000 to get microservond
  t = datetime.datetime.fromtimestamp(x/1000.0)
  t = t.replace(microsecond = 0)
  return int(t.timestamp())

acc_data['window10'] = acc_data['time'].apply(get_time_value)
acc_data = acc_data.drop(columns="time")
acc_data = acc_data.rename(columns = {"window10": "time"})

acc_data.head()

Unnamed: 0,time,pid,x,y,z,window10
0,0,JB3156,0.0,0.0,0.0,0
1,0,CC6740,0.0,0.0,0.0,0
2,1493733882409,SA0297,0.0758,0.0273,-0.0102,1493733882
3,1493733882455,SA0297,-0.0359,0.0794,0.0037,1493733882
4,1493733882500,SA0297,-0.2427,-0.0861,-0.0163,1493733882


In [121]:

# Read clean tac data for pid = BK7610
clean_tac_data = pd.read_csv('../data/clean_tac/BK7610_clean_TAC.csv')
clean_tac_data.describe()


Unnamed: 0,timestamp,TAC_Reading
count,57.0,57.0
mean,1493758000.0,0.041313
std,28415.95,0.050705
min,1493719000.0,-0.002732
25%,1493729000.0,0.000744
50%,1493756000.0,0.012099
75%,1493782000.0,0.074544
max,1493808000.0,0.171758


In [146]:
# Filtering for specific PID (temps)
acc_data_pid = acc_data[acc_data.pid == "BK7610"]
acc_data_pid

Unnamed: 0,pid,x,y,z,time
47136,BK7610,0.1261,-0.0078,-0.0243,1493735870
47138,BK7610,0.1336,-0.0697,-0.0446,1493735870
47140,BK7610,0.1443,-0.0474,-0.0447,1493735870
47142,BK7610,0.1255,-0.0038,0.0111,1493735870
47144,BK7610,0.1076,0.0032,0.0276,1493735870
...,...,...,...,...,...
6071104,BK7610,-0.0784,-0.0161,0.1719,1493767770
6071108,BK7610,-0.0395,-0.0816,0.1634,1493767770
6071112,BK7610,0.0160,-0.0853,0.0906,1493767770
6071117,BK7610,0.0901,-0.0767,0.0162,1493767770


In [149]:
# Up sampling tac data to match acc data
clean_ts = clean_tac_data['timestamp'] 
acc_ts = acc_data_pid['time']
all_labels = list()
offset_tac, offset_acc = 0, 0
# print(acc_ts.iloc[0])
# print(clean_ts.loc[0])
# print(clean_tac_data.loc[0]['TAC_Reading'])
# # acc_ts.iloc[0] #1493735870653
while offset_tac < len(clean_ts) and offset_acc < len(acc_ts):
  
  while acc_ts.iloc[offset_acc] < clean_ts.iloc[offset_tac]:
    all_labels.append([clean_tac_data.iloc[offset_tac]['TAC_Reading'], acc_ts.iloc[offset_acc]])
    offset_acc += 1
    if offset_acc >= len(acc_ts):
      break

  offset_tac += 1

all_labels


[[0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735870],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205, 1493735871],
 [0.0653566311750205

In [157]:
all_labels_df = pd.DataFrame(all_labels, columns = ["tac", "time"])
all_labels_df.shape, acc_data_pid.shape

# merged = merged.drop_duplicates().reset_index(drop=True)
# merged.to_csv("../data/BK7610_final_final.csv")

((1225727, 2), (1225727, 5))

In [170]:
# merged = acc_data_pid.head(10).merge(all_labels_df.head(10), on = 'time', how='inner')
# merged['time'].value_counts()

1493735870    100
Name: time, dtype: int64

In [183]:
# len(all_labels_df['time'].unique()), len(acc_data_pid['time'].unique())

(30735, 30735)

In [197]:
# acc_data_pid['tac_reading'] = 
clean_tac_data["from"] = clean_tac_data["timestamp"].shift(1, fill_value=-1) + 1


In [203]:
clean_tac_data.index = pd.IntervalIndex.from_arrays(clean_tac_data["from"], clean_tac_data["timestamp"], closed = "both")


47136    0.065357
47138    0.065357
47140    0.065357
47142    0.065357
47144    0.065357
           ...   
47326    0.065357
47328    0.065357
47330    0.065357
47332    0.065357
47334    0.065357
Name: time, Length: 100, dtype: float64

In [204]:
acc_data_pid['tac'] = acc_data_pid["time"].apply(lambda x: clean_tac_data.iloc[clean_tac_data.index.get_loc(x)]["TAC_Reading"])
acc_data_pid

KeyboardInterrupt: 

In [71]:
# # frame.groupby(by=["pid"])
# # frame.groupby(["pid"]).count()
# def get_time_ignore_second(x):
#     t = datetime.datetime.fromtimestamp(x/1000.0)
#     t = t.replace(microsecond = 0)
#     t = t.replace(second = int(t.second / 10))
#     return t.timestamp()

# 


# frame_temp = frame[frame.pid == "SA0297"]
# frame['window10'] = frame['time'].apply(get_time_value)
# frame_temp = frame.drop(columns="time")

# frame_temp.head


In [105]:
# TODO: Make n = 10 after either removing one record which has 7 records for a second or by adding 3 dummy values to it (latter is better)
# frame_temp.groupby([ "pid", "window10"]).count().describe()
# We are sampling with replacement, which should be okay since it is within a second
# frame_temp2 = frame_temp.groupby([ "pid", "window10"]).sample(n = 20, replace=True)


In [74]:

# frame_temp2
# frame_temp2.groupby([ "pid", "window10"]).describe()

Unnamed: 0,pid,x,y,z,window10
47798,BK7610,-0.0163,-0.0280,0.0027,1.493736e+09
47348,BK7610,-0.0121,-0.0107,0.0280,1.493736e+09
47484,BK7610,-0.0023,0.0300,0.1079,1.493736e+09
47670,BK7610,0.0137,-0.0014,-0.0155,1.493736e+09
47304,BK7610,-0.0052,-0.0018,-0.0083,1.493736e+09
...,...,...,...,...,...
8652227,SF3079,0.0685,0.0441,-0.0442,1.493791e+09
8652105,SF3079,0.0071,0.0139,0.0183,1.493791e+09
8652382,SF3079,0.0619,0.1040,0.1627,1.493791e+09
8652100,SF3079,0.0057,0.0135,0.0006,1.493791e+09


In [130]:
# # x_sliding_window = np.lib.stride_tricks.sliding_window_view(data[:, 1], window_shape = 10)
# # x_sliding_window.shape
# final = []
# for pid in pids:
#   temptemp = frame_temp2.loc[frame_temp2['pid'] == pid]
#   times = temptemp.window10.unique()
#   final_temp =[]
#   for time in times:
#     # x = np.lib.stride_tricks.sliding_window_view(frame_temp2[frame_temp2.pid == pid and frame_temp2.window10 == time], window_shape = 10)
#     temptemptemp = temptemp.loc[ (temptemp['window10'] == time)]
#     # TODO: Create x y z sliding windows
#     # x = np.lib.stride_tricks.sliding_window_view(temptemp["x"], window_shape = 200)
#     # y = np.lib.stride_tricks.sliding_window_view(temptemp["y"], window_shape = 200)
#     # z = np.lib.stride_tricks.sliding_window_view(temptemp["z"], window_shape = 200)
#     a = np.vstack((temptemptemp["x"], temptemptemp["y"], temptemptemp["z"]))
#     # print(temptemp.groupby("window10").count())
#     # print(a)
#     final_temp.append(a)
#     # break
#   final.append(np.array(final_temp))
#   # print(final)
  
#   # break
# # print(np.array(final,dtype=object).shape)


In [73]:
# tac['y'] = np.where(tac['TAC_Reading']>0.08, 1.0, 0.0)
# frame_t = frame_temp[frame_temp.pid == "BK7610"]
# frame_t = frame_t.reset_index(drop=True)



In [111]:

# all_labels_t = pd.DataFrame(atermll_labels, columns = ["tac", "t"])
# all_labels_t.value_counts()


In [9]:
# prep_data = pd.read_csv('../data/good_again_bhas.csv')