# try looking at beat pd data

In [16]:
import pandas as pd
import numpy as np
from glob import glob
import tf_mapper as tfm

In [None]:
fn = glob("/home/ms994/beat_pd/data/real-pd/training_data/smartwatch_gyroscope/*.csv")

In [None]:
for i in range(len(fn)):
    df = pd.read_csv(fn[i])
    all_ids = df.device_id.unique()
    if len(all_ids) > 1:
        print(fn[i])
        print(all_ids)

In [None]:
read_seq("/home/ms994/beat_pd/data/real-pd/training_data/smartwatch_gyroscope/2371bc61-2556-4ce1-a7c5-ad6c1f83fef3.csv")

In [None]:
def read_seq(fp: str, device_id=None, t_colname='t', xyz_colnames=['x', 'y', 'z'], use_time_index=False, resample=pd.Timedelta(seconds=(1/50))):
    """ reads a file and returns the associated data

    Parameters
    ----------
    fp : str
        Description of parameter `fp`.
    device_id: str
        Valid only for smartwatch dataset, if set to None then default to first
    t_colname : type
        Description of parameter `t_colname`.
    xyz_colnames : type
        Description of parameter `xyz_colnames`.
    use_time_index : bool
        Description of parameter `use_time_index`.
    resample : pd.Timedelta
        how much to resample by. Uses mean resampling

    Returns
    -------
    read_seq(fp: str, t_colname='t', xyz_colnames=['x', 'y', 'z'], use_time_index=False,
        Description of returned object.
    """
    
    df = pd.read_csv(fp)
    if "smartwatch_accelerometer" in fp or "smartwatch_gyroscope" in fp:
        if device_id is None:
            device_id = df.device_id.iloc[0]
        df = df[df.device_id == device_id]
    df = df[[t_colname, *xyz_colnames]]
    df = df.rename(columns=dict(zip([t_colname, *xyz_colnames], ['t', 'x', 'y', 'z'])))
    df = df.set_index('t')
    if use_time_index:
        df = df.set_index(pd.to_timedelta(df.index, unit="s"))
        if resample is not None:
            df = df.resample(resample).mean()
    return df


## Try looking at Yidi's extracted features

In [5]:
features_df = pd.read_csv('/home/ms994/beat_pd/extracted_features/tsfeatures.csv', index_col=0)

In [7]:
features_df.shape

(1858, 754)

## Try looking at Mark's extracted features

In [11]:
files = glob("/home/mk596/research/beat_pd/data/cis-pd/training_data_tsf/*.csv")

In [17]:
labels = tfm.get_labels()

# testing a random model

In [None]:
model = tf.keras.models.load_model("../beat_pd.h5")

In [None]:
model.summary()

In [None]:
import pickle as pkl
np_x_data = pkl.load(open("/n/scratch2/ms994/beat_pd_frequency_data.pkl", "rb"))
y = pkl.load(open("/n/scratch2/ms994/beat_pd_labels_1s.pkl", "rb"))

In [None]:
np_x_data.shape, y.shape

In [None]:
import numpy as np
allPred = []
labels = []
for i in range(1000):
    start = np.random.randint(2210912)
    pred = model.predict(np.array([np_x_data[start:start+100]]).reshape(1, 100,3,26))
    labels.append(y.iloc[start].on_off)
    allPred.append(pred)

In [None]:
import pandas as pd
snsObj = pd.DataFrame([np.array(allPred).reshape(-1), labels], index=["Pred", "True"]).T

In [None]:
import seaborn as sns

sns.violinplot(x="True", y="Pred", data=snsObj)

In [None]:
sns.countplot(snsObj["True"])