### Definitions

In [None]:
from google.colab import drive
drive.mount('/content/drive' , force_remount = True)

Mounted at /content/drive


In [None]:
cd "/content/drive/MyDrive/raw/"

/content/drive/MyDrive/raw


In [None]:
import os
import statistics
import csv
import itertools
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sn
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import *
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.utils import plot_model
import seaborn as sns
from matplotlib.colors import rgb2hex
from matplotlib.cm import get_cmap
import plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from sklearn.preprocessing import StandardScaler, RobustScaler
plotly.offline.init_notebook_mode()

Output hidden; open in https://colab.research.google.com to view.

### MHEALTH

In [None]:
cd "/content/drive/MyDrive/MHEALTHDATASET/"

/content/drive/MyDrive/MHEALTHDATASET


In [None]:
data = pd.read_pickle('mhealth_cnn.pkl')

In [None]:
data = data.rename(columns={'subject': 'subject_id'})
data = data.rename(columns={'activity_id': 'activity'})
data = data.drop(columns=['subject_id'])

In [None]:
def sliding_window(data, window_size, step_size):
    windows = []
    for start in range(0, len(data) - window_size + 1, step_size):
        window = data.iloc[start:start + window_size]
        windows.append(window)
    return windows

In [None]:
window_size = 100
step_size = 50

train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)
train_windows = sliding_window(train_data, window_size, step_size)
test_windows = sliding_window(test_data, window_size, step_size)

### PAMAP2

In [None]:
cd "/content/drive/MyDrive/PAMAP2_Dataset"

/content/drive/MyDrive/PAMAP2_Dataset


In [None]:
data = pd.read_pickle('pamap2.df')

def fix_data(data):
    data = data.drop(data[data['activity_id']==0].index)
    data = data.interpolate()
    for colName in data.columns:
        if data[colName].dtype != 'object':
            data[colName] = data[colName].fillna(data[colName].mean())
    return data

data = fix_data(data)

In [None]:
data = data.rename(columns={'subject': 'subject_id'})
data = data.rename(columns={'activity_id': 'activity'})
data = data.drop(columns=['subject_id'])

In [None]:
window_size = 100
step_size = 50

train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)
train_windows = sliding_window(train_data, window_size, step_size)
test_windows = sliding_window(test_data, window_size, step_size)

### RF

In [None]:
from sklearn.ensemble import RandomForestClassifier as rfc

results = []
scaler = StandardScaler()
for i, window in enumerate(train_windows):

    X_train = window[window.columns[:-2]]
    Y_train = window['activity']
    X_train_scaled = scaler.fit_transform(X_train)

    model = rfc(n_estimators=100, random_state=42, n_jobs=-1, oob_score=True)
    model.fit(X_train_scaled, Y_train)
    accuracy = accuracy_score(Y_train, model.predict(X_train_scaled))

    results.append({
        'window': i,
        'oob_score': model.oob_score_,
        'accuracy': accuracy
    })

In [None]:
results_df = pd.DataFrame(results)
print("Training Results")
print(results_df)

In [None]:
test_results = []
for i, window in enumerate(test_windows):
    X_test = window[window.columns[:-2]]
    Y_test = window['activity_id']
    X_test_scaled = scaler.transform(X_test)

    accuracy = accuracy_score(Y_test, model.predict(X_test_scaled))

    test_results.append({
        'window': i,
        'accuracy': accuracy
    })

### SVM

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

scaler = StandardScaler()
results = []
for i, window in enumerate(train_windows):
    X_train = window[window.columns[:-2]]
    Y_train = window['activity']
    X_train_scaled = scaler.fit_transform(X_train)
    model = SVC(kernel='linear', random_state=42)
    model.fit(X_train_scaled, Y_train)
    accuracy = accuracy_score(Y_train, model.predict(X_train_scaled))

    results.append({
        'window': i,
        'accuracy': accuracy
    })

In [None]:
test_results = []
for i, window in enumerate(test_windows):
    X_test = window[window.columns[:-2]]
    Y_test = window['activity']
    X_test_scaled = scaler.transform(X_test)
    accuracy = accuracy_score(Y_test, model.predict(X_test_scaled))
    test_results.append({
        'window': i,
        'accuracy': accuracy
    })

In [None]:
test_results_df = pd.DataFrame(test_results)
print("Testing Results")
print(test_results_df)

In [None]:
from scipy.stats import entropy

js_divergences = np.zeros(train_data.shape[1])

for i in range(train_data.shape[1]):
    bins = max(len(np.unique(train_data.iloc[:, i])), len(np.unique(test_data.iloc[:, i])))
    p, _ = np.histogram(train_data.iloc[:, i], bins=bins, density=True)
    q, _ = np.histogram(test_data.iloc[:, i], bins=bins, density=True)
    p += 1e-10
    q += 1e-10
    m = 0.5 * (p + q)
    js_divergences[i] = 0.5 * (entropy(p, m) + entropy(q, m))
avg_js_divergence = np.mean(js_divergences)
print("Average Jensen-Shannon divergence:", avg_js_divergence)

Average Jensen-Shannon divergence: 0.30211115684041095


In [None]:
train_data = train_data.drop(columns=['activity'])
test_data = test_data.drop(columns=['activity'])