In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd
from pyspark.sql import functions as F
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.types import StructField, StructType, DoubleType,MapType, StringType,ArrayType, FloatType, TimestampType, IntegerType
from cerebralcortex.core.datatypes import DataStream
from cerebralcortex.core.metadata_manager.stream.metadata import Metadata, DataDescriptor, \
ModuleMetadata
from scipy import signal
from scipy.interpolate import interp1d
from cerebralcortex import Kernel
from scipy.stats import skew,kurtosis,mode
from collections import Counter
CC = Kernel("/home/jupyter/cc3_conf/", study_name='mperf')

In [2]:
def get_data_for_saving(data,
                        data_acl,
                        stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity',
                        acl_stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all',
                        activities = ['Walking'],
                        window_size = 60,
                        base_window_size  = 10):

    data = data.select('localtime','timestamp','day','prediction','user','version')
    data  = data.filter(F.col('prediction').isin(activities))
    groupbycols = ['user','version','day',F.window('timestamp',windowDuration=str(window_size)+' seconds', startTime='0 seconds')]
    data_windowed = data.groupBy(groupbycols).agg(F.collect_list('prediction')).withColumnRenamed('collect_list(prediction)','prediction')
#     data_windowed = data_windowed.filter(F.size(F.col('prediction'))==window_size//base_window_size)
#     def get_most_frequent(a):
#         return Counter(a).most_common()[0][0]
#     qfunction = F.udf(get_most_frequent,StringType())
#     data_windowed = data_windowed.withColumn('prediction',qfunction(data_windowed['prediction']))
    data_windowed = data_windowed.withColumn('prediction',F.col('prediction').getItem(0))
    
    data_acl = data_acl.select('localtime','timestamp','aclx','acly','aclz','user','version')
    data_acl = data_acl.withColumn('time',F.col('timestamp').cast('double'))
    data_acl = data_acl.withColumn('data',F.array('time','aclx','acly','aclz')).drop('time','aclx','acly','aclz')
    groupbycols = ['user','version',F.window('timestamp',windowDuration=str(window_size)+' seconds', startTime='0 seconds')]
    data_acl_windowed = data_acl.groupBy(groupbycols).agg(F.collect_list('data')).withColumnRenamed('collect_list(data)','data')
    data_joined = data_windowed.join(data_acl_windowed,on=['user','window'],how='inner')
    
#     def reshape_data(a):
# #         a = np.array([np.array(b) for b in a])
# #         a = a[a[:,0].argsort()]
# #         return list(a.reshape(-1))
#         return [1,2,3,4]
#     qfunction = F.udf(reshape_data,ArrayType(DoubleType()))
#     data_joined_final = data_joined.withColumn('data',qfunction(data_joined['data']))

    schema = data_joined.schema
    stream_metadata = Metadata()
    stream_metadata.set_name(stream_name+'.'+str(window_size)+'.secs').set_description("ACL data saving for REID model, window size = "+str(window_size)+' secs')
    for field in schema.fields:
        stream_metadata.add_dataDescriptor(
            DataDescriptor().set_name(str(field.name)).set_type(str(field.dataType))
        )
    stream_metadata.add_module(
        ModuleMetadata().set_name("ACL data saving for REID model, window size = "+str(window_size)+' secs') \
        .set_attribute("url", "https://md2k.org").set_author(
            "Md Azim Ullah", "mullah@memphis.edu"))
    ds = DataStream(data=data_joined,metadata=stream_metadata)
    return ds

In [4]:
import pickle
base_window_size = 10
window_size = 10
stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity'
activities = ['Walking']
acl_stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all'
user_id1 = '6676c0e9-c6a6-413e-8355-fea361685385'
users = pickle.load(open('./data/users.p','rb'))
for i,user_id in enumerate(users):
    if user_id==user_id1:
        continue
    data = CC.get_stream(stream_name,user_id=user_id)
    data_acl = CC.get_stream(acl_stream_name,user_id=user_id)
    data_final = get_data_for_saving(data,
                            data_acl,
                            stream_name = stream_name,
                            acl_stream_name = acl_stream_name,
                            activities = activities,
                            window_size = window_size,
                            base_window_size  = base_window_size)
    CC.save_stream(data_final,overwrite=False)
    print(i,end=',')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,27

In [None]:
data_final.count()

In [None]:
filtered_data = data.filter(F.col('prediction').isin(activities))

In [None]:
filtered_data.count()

In [None]:
data_final.count()

In [None]:
base_window_size = 10
import pickle
stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity'
activities = ['Walking']
window_size = 20
acl_stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all'
user_id1 = '3ca3dbf5-2390-409e-bd2c-c9f23a255e75'
users = pickle.load(open('./data/users.p','rb'))
for user_id in users:
    if user_id==user_id1:
        continue
    data = CC.get_stream(stream_name,user_id=user_id)
    data_acl = CC.get_stream(acl_stream_name,user_id=user_id)
    data_final = get_data_for_saving(data,
                            data_acl,
                            stream_name = stream_name,
                            acl_stream_name = acl_stream_name,
                            activities = activities,
                            window_size = window_size,
                            base_window_size  = base_window_size)
    CC.save_stream(data_final,overwrite=False)
    data = CC.get_stream(stream_name+'.'+str(window_size)+'.secs',user_id=user_id)
    df = data._data.toPandas()
    pickle.dump(df,open('./data/right_wrist/'+user_id+'.p','wb'))
    print(user_id)

In [None]:
data = CC.get_stream(stream_name+'.'+str(window_size)+'.secs')

In [None]:
df = data._data.toPandas()

In [None]:
import pickle
pickle.dump(df,open('./data/right_wrist/'+user_id+'.p','wb'))

In [None]:
data_final.drop('data').show(1,False)

In [None]:
data = CC.get_stream('org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity.60.secs')

In [None]:
window_size = 20
stream_name = 'org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity'
data = CC.get_stream(stream_name+'.'+str(window_size)+'.secs')

In [None]:
data.count()

In [None]:
data = CC.get_stream('org.md2k.feature.motionsensehrv.decoded.rightwrist.all.activity')

In [None]:
data = data.drop('magnitude')

In [None]:
ds =  data.groupBy('prediction').count()

In [None]:
data.printSchema()

In [None]:
825139*10/3600

In [None]:
import os

In [None]:
len(os.listdir('./data/right_wrist/'))

In [None]:
import shutil

In [1]:
import pickle

In [9]:
import os
hours = []
for f in os.listdir('./data/right_wrist/'):
    data = pickle.load(open('./data/right_wrist/'+f,'rb'))
    hours.append(data.shape[0]*20/3600)

In [8]:
data.shape[0]*20/3600

1.4277777777777778

In [10]:
import matplotlib.pyplot as plt

In [12]:
from collections import Counter

In [16]:
import numpy as np
Counter(np.floor(hours))

Counter({4.0: 26,
         5.0: 28,
         0.0: 111,
         3.0: 42,
         1.0: 80,
         2.0: 55,
         6.0: 14,
         11.0: 5,
         7.0: 9,
         10.0: 3,
         14.0: 1,
         28.0: 1,
         8.0: 5,
         9.0: 3,
         16.0: 1,
         13.0: 2,
         225.0: 1,
         18.0: 1,
         12.0: 1})

In [None]:
shutil.make_archive('./data/right_wrist/','zip','./data/right_wrist/')

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)
np.random.seed(100)

In [None]:
gpus