In [48]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd
from pyspark.sql import functions as F
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import StructField, StructType, DoubleType,MapType, StringType,ArrayType, FloatType, TimestampType, IntegerType
from cerebralcortex.core.datatypes import DataStream
from cerebralcortex.core.metadata_manager.stream.metadata import Metadata, DataDescriptor, \
ModuleMetadata
from scipy import signal
from scipy.interpolate import interp1d
from cerebralcortex import Kernel
from scipy.stats import skew,kurtosis,mode
from collections import Counter
CC = Kernel("/home/jupyter/cc3_conf/", study_name='mperf')

  self.fs = pa.hdfs.connect(self.hdfs_ip, self.hdfs_port)


In [49]:
def get_data_for_saving(data,
                        data_acl,
                        stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity',
                        acl_stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all',
                        activities = ['Walking'],
                        window_size = 60,
                        base_window_size  = 10):

    data = data.select('localtime','timestamp','day','prediction','user','version')
    data  = data.filter(F.col('prediction').isin(activities))
    groupbycols = ['user','version','day',F.window('timestamp',windowDuration=str(window_size)+' seconds', startTime='0 seconds')]
    data_windowed = data.groupBy(groupbycols).agg(F.collect_list('prediction')).withColumnRenamed('collect_list(prediction)','prediction')
    data_windowed = data_windowed.filter(F.size(F.col('prediction'))==window_size//base_window_size)
    def get_most_frequent(a):
        return Counter(a).most_common()[0][0]
    qfunction = F.udf(get_most_frequent,StringType())
    data_windowed = data_windowed.withColumn('prediction',qfunction(data_windowed['prediction']))
    data_windowed = data_windowed.filter(F.col('prediction').isin(activities))

    data_acl = data_acl.select('localtime','timestamp','aclx','acly','aclz','user','version')
    data_acl = data_acl.withColumn('time',F.col('timestamp').cast('double'))
    data_acl = data_acl.withColumn('data',F.array('time','aclx','acly','aclz')).drop('time','aclx','acly','aclz')
    groupbycols = ['user','version',F.window('timestamp',windowDuration=str(window_size)+' seconds', startTime='0 seconds')]
    data_acl_windowed = data_acl.groupBy(groupbycols).agg(F.collect_list('data')).withColumnRenamed('collect_list(data)','data')
    data_joined = data_windowed.join(data_acl_windowed,on=['user','window'],how='inner')
    
#     def reshape_data(a):
# #         a = np.array([np.array(b) for b in a])
# #         a = a[a[:,0].argsort()]
# #         return list(a.reshape(-1))
#         return [1,2,3,4]
#     qfunction = F.udf(reshape_data,ArrayType(DoubleType()))
#     data_joined_final = data_joined.withColumn('data',qfunction(data_joined['data']))

    schema = data_joined.schema
    stream_metadata = Metadata()
    stream_metadata.set_name(stream_name+'.'+str(window_size)+'.secs').set_description("ACL data saving for REID model, window size = "+str(window_size)+' secs')
    for field in schema.fields:
        stream_metadata.add_dataDescriptor(
            DataDescriptor().set_name(str(field.name)).set_type(str(field.dataType))
        )
    stream_metadata.add_module(
        ModuleMetadata().set_name("ACL data saving for REID model, window size = "+str(window_size)+' secs') \
        .set_attribute("url", "https://md2k.org").set_author(
            "Md Azim Ullah", "mullah@memphis.edu"))
    ds = DataStream(data=data_joined,metadata=stream_metadata)
    return ds

In [50]:
base_window_size = 10
import pickle
stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity'
activities = ['Walking']
window_size = 20
acl_stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all'
user_id1 = '3ca3dbf5-2390-409e-bd2c-c9f23a255e75'
users = pickle.load(open('./data/users.p','rb'))
for user_id in users:
    if user_id==user_id1:
        continue
    data = CC.get_stream(stream_name,user_id=user_id)
    data_acl = CC.get_stream(acl_stream_name,user_id=user_id)
    data_final = get_data_for_saving(data,
                            data_acl,
                            stream_name = stream_name,
                            acl_stream_name = acl_stream_name,
                            activities = activities,
                            window_size = window_size,
                            base_window_size  = base_window_size)
    CC.save_stream(data_final,overwrite=False)
    data = CC.get_stream(stream_name+'.'+str(window_size)+'.secs',user_id=user_id)
    df = data._data.toPandas()
    pickle.dump(df,open('./data/right_wrist/'+user_id+'.p','wb'))
    print(user_id)

0990887a-6163-4c80-9c9e-468ea2598202
61d1a237-d70f-49b0-89ba-cea4d2526832
8aa1bd02-ee43-4e9d-b7f7-7ddc66b607f9
1babaec3-2adb-4812-a377-c0a759a1f624
87d70bed-3ed0-455c-a144-9fd955229125
db4a2be2-d180-4fa5-b3b8-41b91c2a641c
c7e9149b-94da-4733-92eb-1395c724fc7b
08b3a46a-f926-4a57-8723-b78b53c33729
fd36e160-50a0-4dad-9357-65ea218c8d3c
5cd4f692-3b13-4728-9df3-debc682e42dd
95085684-88ec-4d2a-8eba-a38268018193
d83ac187-97cd-4ee0-a35f-5a1ffe6c7885
3b9ff2e4-dfec-4022-8994-1a0c4db7227a
072c81f7-4410-4301-8fd0-17337c0ac1e8
0c824653-a13b-4a4e-b907-660f1d8f8981
f244a6e2-97bf-4c57-8fb9-ed1ca1774c37
d1392516-4b33-47c0-81b2-066fa7210135
ac48132f-2c65-4762-bb64-ed8f733a540d
a6c16f12-0987-4690-87fe-336710f96398
3c1b90f5-dd19-4872-8175-9dede757c9c6
b7a05945-a70a-4196-a3bf-d49313a5d12a
be4297a8-d763-42e2-a2cb-cab38f64cfe3
ee641ee3-b4b9-4a2f-8394-ebda81037fe4
c2ad5056-c319-4b12-8e04-f6d63f61859f
bbc41a1e-4bbe-4417-a40c-64635cc552e6
b7c267ae-857f-4f5f-9c24-f5236bba1f89
8726f4c1-1507-4a6b-ae29-9b13fd9ce9e2
a

In [33]:
data = CC.get_stream(stream_name+'.'+str(window_size)+'.secs')

In [36]:
df = data._data.toPandas()

In [40]:
import pickle
pickle.dump(df,open('./data/right_wrist/'+user_id+'.p','wb'))

In [9]:
data_final.drop('data').show(1,False)

+------------------------------------+------------------------------------------+-------+--------+----------+
|user                                |window                                    |version|day     |prediction|
+------------------------------------+------------------------------------------+-------+--------+----------+
|3ca3dbf5-2390-409e-bd2c-c9f23a255e75|[2017-10-30 03:25:00, 2017-10-30 03:25:20]|1      |20171029|Walking   |
+------------------------------------+------------------------------------------+-------+--------+----------+
only showing top 1 row



In [None]:
data = CC.get_stream('org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity.60.secs')

In [47]:
df.loc[0]['data']

[[1509346200.031, -0.8089599609375, 1.8641357421875, 0.172607421875],
 [1509346200.070963, -0.8094482421875, 1.8634033203125, 0.1729736328125],
 [1509346200.110927, -0.8094482421875, 1.8634033203125, 0.1729736328125],
 [1509346200.150891, -0.809326171875, 1.8629150390625, 0.171875],
 [1509346200.190855, -0.8095703125, 1.8626708984375, 0.17138671875],
 [1509346200.230818, -0.8095703125, 1.8626708984375, 0.17138671875],
 [1509346200.270782, -0.8087158203125, 1.86279296875, 0.1732177734375],
 [1509346200.310746, -0.8087158203125, 1.86279296875, 0.1732177734375],
 [1509346200.35071, -0.81005859375, 1.863037109375, 0.1729736328125],
 [1509346200.390674, -0.8087158203125, 1.8631591796875, 0.171142578125],
 [1509346200.430637, -0.8087158203125, 1.8631591796875, 0.171142578125],
 [1509346200.470601, -0.80908203125, 1.86328125, 0.17236328125],
 [1509346200.510565, -0.80908203125, 1.86328125, 0.17236328125],
 [1509346200.550528, -0.80859375, 1.86328125, 0.173095703125],
 [1509346200.590492, -0.8

In [42]:
users

array(['3ca3dbf5-2390-409e-bd2c-c9f23a255e75',
       '0990887a-6163-4c80-9c9e-468ea2598202',
       '61d1a237-d70f-49b0-89ba-cea4d2526832',
       '8aa1bd02-ee43-4e9d-b7f7-7ddc66b607f9',
       '1babaec3-2adb-4812-a377-c0a759a1f624',
       '87d70bed-3ed0-455c-a144-9fd955229125',
       'db4a2be2-d180-4fa5-b3b8-41b91c2a641c',
       'c7e9149b-94da-4733-92eb-1395c724fc7b',
       '08b3a46a-f926-4a57-8723-b78b53c33729',
       'fd36e160-50a0-4dad-9357-65ea218c8d3c',
       '5cd4f692-3b13-4728-9df3-debc682e42dd',
       '95085684-88ec-4d2a-8eba-a38268018193',
       'd83ac187-97cd-4ee0-a35f-5a1ffe6c7885',
       '3b9ff2e4-dfec-4022-8994-1a0c4db7227a',
       '072c81f7-4410-4301-8fd0-17337c0ac1e8',
       '0c824653-a13b-4a4e-b907-660f1d8f8981',
       'f244a6e2-97bf-4c57-8fb9-ed1ca1774c37',
       'd1392516-4b33-47c0-81b2-066fa7210135',
       'ac48132f-2c65-4762-bb64-ed8f733a540d',
       'a6c16f12-0987-4690-87fe-336710f96398',
       '3c1b90f5-dd19-4872-8175-9dede757c9c6',
       'b7a05

In [51]:
import shutil

In [1]:
import pickle

In [9]:
import os
hours = []
for f in os.listdir('./data/right_wrist/'):
    data = pickle.load(open('./data/right_wrist/'+f,'rb'))
    hours.append(data.shape[0]*20/3600)

In [8]:
data.shape[0]*20/3600

1.4277777777777778

In [10]:
import matplotlib.pyplot as plt

In [12]:
from collections import Counter

In [16]:
import numpy as np
Counter(np.floor(hours))

Counter({4.0: 26,
         5.0: 28,
         0.0: 111,
         3.0: 42,
         1.0: 80,
         2.0: 55,
         6.0: 14,
         11.0: 5,
         7.0: 9,
         10.0: 3,
         14.0: 1,
         28.0: 1,
         8.0: 5,
         9.0: 3,
         16.0: 1,
         13.0: 2,
         225.0: 1,
         18.0: 1,
         12.0: 1})