### Definitions

In [None]:
from google.colab import drive
drive.mount('/content/drive' , force_remount = True)

Mounted at /content/drive


In [None]:
cd "/content/drive/MyDrive/raw/"

/content/drive/MyDrive/raw


In [None]:
import os
import statistics
import csv
import itertools
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sn
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import *
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.utils import plot_model
import seaborn as sns
from matplotlib.colors import rgb2hex
from matplotlib.cm import get_cmap
import plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from sklearn.preprocessing import StandardScaler, RobustScaler
plotly.offline.init_notebook_mode()

Output hidden; open in https://colab.research.google.com to view.

### MHEALTH

In [None]:
cd "/content/drive/MyDrive/MHEALTHDATASET/"

/content/drive/MyDrive/MHEALTHDATASET


In [None]:
data = pd.read_pickle('mhealth_cnn.pkl')

In [None]:
data = data.rename(columns={'subject': 'subject_id'})
data = data.rename(columns={'activity_id': 'activity'})

In [None]:
from sklearn.model_selection import train_test_split

grouped_data = data.groupby('subject_id')
train_data = pd.DataFrame()
test_data = pd.DataFrame()

for _, group in grouped_data:
    sorted_group = group.sort_index()

    total_samples = len(sorted_group)
    train_size = int(0.8 * total_samples)

    train_group = sorted_group.iloc[:train_size].drop(columns=['subject_id'])
    test_group = sorted_group.iloc[train_size:].drop(columns=['subject_id'])

    train_data = pd.concat([train_data, train_group])
    test_data = pd.concat([test_data, test_group])

data = data.drop(columns=['subject_id'])

In [None]:
def sliding_window(data, window_size, step_size):
    windows = []
    for start in range(0, len(data) - window_size + 1, step_size):
        window = data.iloc[start:start + window_size]
        windows.append(window)
    return windows

In [None]:
window_size = 100
step_size = 50

train_windows = sliding_window(train_data, window_size, step_size)
test_windows = sliding_window(test_data, window_size, step_size)

### PAMAP2

In [None]:
cd "/content/drive/MyDrive/PAMAP2_Dataset"

/content/drive/MyDrive/PAMAP2_Dataset


In [None]:
data = pd.read_pickle('pamap2.df')

def fix_data(data):
    data = data.drop(data[data['activity_id']==0].index)
    data = data.interpolate()
    for colName in data.columns:
        if data[colName].dtype != 'object':
            data[colName] = data[colName].fillna(data[colName].mean())
    return data

data = fix_data(data)

In [None]:
data = data.rename(columns={'subject': 'subject_id'})
data = data.rename(columns={'activity_id': 'activity'})

In [None]:
from sklearn.model_selection import train_test_split

grouped_data = data.groupby('subject_id')
train_data = pd.DataFrame()
test_data = pd.DataFrame()

for _, group in grouped_data:
    sorted_group = group.sort_values(by='time_stamp', ascending=True)

    total_samples = len(sorted_group)
    train_size = int(0.8 * total_samples)

    train_group = sorted_group.iloc[:train_size].drop(columns=['subject_id'])
    test_group = sorted_group.iloc[train_size:].drop(columns=['subject_id'])

    train_data = pd.concat([train_data, train_group])
    test_data = pd.concat([test_data, test_group])

### RF model

In [None]:
df_features = data.drop(['activity'],axis = 1)#features dataframe
df_target = data['activity']

In [None]:
X_train,X_test,y_train,y_test = train_test_split(df_features,df_target,test_size = 0.2,random_state = 42)#test train split
X_train = scaler.fit_transform(X_train)#scaling the train data
X_test = scaler.transform(X_test)

In [None]:
window_size = 200
stride = 50

train_frames = []
for i in tqdm(range(0, len(train_data)-window_size, stride)):
    window = train_data.iloc[i:i+window_size]
    if window['activity'].nunique() == 1:
      train_frames.append(window)

test_frames = []
for i in tqdm(range(0, len(test_data)-window_size, stride)):
    window = test_data.iloc[i:i+window_size]
    if window['activity'].nunique() == 1:
      test_frames.append(window)

In [None]:
X_train_list = []
y_train_list = []
relevant_columns = [col for col in train_frames[0].columns if col not in ['subject', 'activity']]

#for each frame replace label with activity
for frame in tqdm(train_frames):
    X_train_list.append(frame[relevant_columns].values)
    y_train_list.append(frame.iloc[0]['activity'])

X_test_list = []
y_test_list = []
for frame in tqdm(test_frames):
    X_test_list.append(frame[relevant_columns].values)
    y_test_list.append(frame.iloc[0]['activity'])

In [None]:
X_train = np.stack(X_train_list)
X_test = np.stack(X_test_list)

y_train = np.array(y_train_list)
y_test = np.array(y_test_list)

In [None]:
num_samples_train, window_size, n_features = X_train.shape
num_samples_test = X_test.shape[0]

In [None]:
X_train_reshaped = X_train.reshape((num_samples_train, window_size * n_features))
X_test_reshaped = X_test.reshape((num_samples_test, window_size * n_features))

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)

In [None]:
from sklearn.ensemble import RandomForestClassifier
RFmodel = RandomForestClassifier()#creating the model object
RFmodel.fit(X_train_scaled,y_train)

### SVM

In [None]:
from sklearn.svm import SVC

SVCmodel = SVC(kernel = 'rbf', probability=True)
SVCmodel.fit(X_train_scaled, y_train)

In [None]:
from scipy.stats import entropy

js_divergences = np.zeros(train_data.shape[1])

for i in range(train_data.shape[1]):
    bins = max(len(np.unique(train_data.iloc[:, i])), len(np.unique(test_data.iloc[:, i])))
    p, _ = np.histogram(train_data.iloc[:, i], bins=bins, density=True)
    q, _ = np.histogram(test_data.iloc[:, i], bins=bins, density=True)
    p += 1e-10
    q += 1e-10
    m = 0.5 * (p + q)
    js_divergences[i] = 0.5 * (entropy(p, m) + entropy(q, m))
avg_js_divergence = np.mean(js_divergences)
print("Average Jensen-Shannon divergence:", avg_js_divergence)

Average Jensen-Shannon divergence: 0.4556521285747621


In [None]:
train_data = train_data.drop(columns=['activity_id'])
test_data = test_data.drop(columns=['activity_id'])