### Definitions

In [1]:
from google.colab import drive
drive.mount('/content/drive' , force_remount = True)

Mounted at /content/drive


In [2]:
cd "/content/drive/MyDrive/raw/"

/content/drive/MyDrive/raw


In [3]:
import os
import statistics
import csv
import itertools
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sn
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import *
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.utils import plot_model
import seaborn as sns
from matplotlib.colors import rgb2hex
from matplotlib.cm import get_cmap
import plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from sklearn.preprocessing import StandardScaler, RobustScaler
plotly.offline.init_notebook_mode()

Output hidden; open in https://colab.research.google.com to view.

### MHEALTH

In [4]:
cd "/content/drive/MyDrive/MHEALTHDATASET/"

/content/drive/MyDrive/MHEALTHDATASET


In [5]:
data = pd.read_pickle('mhealth_cnn.pkl')

In [6]:
data = data.rename(columns={'subject': 'subject_id'})
data = data.rename(columns={'activity_id': 'activity'})

In [7]:
from sklearn.model_selection import train_test_split

grouped_data = data.groupby('subject_id')
train_data = pd.DataFrame()
test_data = pd.DataFrame()

for _, group in grouped_data:
    sorted_group = group.sort_index()

    total_samples = len(sorted_group)
    train_size = int(0.8 * total_samples)

    train_group = sorted_group.iloc[:train_size].drop(columns=['subject_id'])
    test_group = sorted_group.iloc[train_size:].drop(columns=['subject_id'])

    train_data = pd.concat([train_data, train_group])
    test_data = pd.concat([test_data, test_group])

data = data.drop(columns=['subject_id'])

### PAMAP2

In [None]:
cd "/content/drive/MyDrive/PAMAP2_Dataset"

/content/drive/MyDrive/PAMAP2_Dataset


In [None]:
data = pd.read_pickle('pamap2.df')

def fix_data(data):
    data = data.drop(data[data['activity_id']==0].index)
    data = data.interpolate()
    for colName in data.columns:
        if data[colName].dtype != 'object':
            data[colName] = data[colName].fillna(data[colName].mean())
    return data

data = fix_data(data)

In [None]:
data = data.drop(columns=['subject_id'])
df_features = data.drop(['activity_id'],axis = 1)#features dataframe
df_target = data['activity_id']

In [None]:
from sklearn.model_selection import train_test_split

grouped_data = data.groupby('subject_id')
train_data = pd.DataFrame()
test_data = pd.DataFrame()

for _, group in grouped_data:
    sorted_group = group.sort_values(by='time_stamp', ascending=True)

    total_samples = len(sorted_group)
    train_size = int(0.8 * total_samples)

    train_group = sorted_group.iloc[:train_size].drop(columns=['subject_id'])
    test_group = sorted_group.iloc[train_size:].drop(columns=['subject_id'])

    train_data = pd.concat([train_data, train_group])
    test_data = pd.concat([test_data, test_group])

### RF model

In [11]:
window_size = 200
stride = 50
frames = []
for i in tqdm(range(0, len(data)-window_size, stride)):
    window = data.iloc[i:i+window_size]
    if window['activity'].nunique() == 1:
      frames.append(window)


This function will be removed in tqdm==5.0.0
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`



  0%|          | 0/7475 [00:00<?, ?it/s]

In [13]:
X_list = []
y_list = []
relevant_columns = [col for col in frames[0].columns if col not in ['subject_id', 'activity_id', 'time_ste[]']]

#replace label with activity
for frame in tqdm(frames):
    X_list.append(frame[relevant_columns].values)
    y_list.append(frame.iloc[0]['activity'])


This function will be removed in tqdm==5.0.0
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`



  0%|          | 0/7014 [00:00<?, ?it/s]

In [14]:
X = np.stack(X_list)
y = np.array(y_list)

In [15]:
num_samples_train, window_size, n_features = X.shape

In [16]:
X_train_reshaped = X.reshape((num_samples_train, window_size * n_features))

In [17]:
X_train, X_valtest, y_train, y_valtest = train_test_split(X_train_reshaped, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_valtest, y_valtest, test_size=0.5, random_state=42)

In [19]:
#fit scaler on train
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_train_scaled = scaler.fit_transform(train_reshaped)
X_train = X_train_scaled.reshape(X_train.shape)

val_reshaped = X_val.reshape(-1, X_val.shape[-1])
X_val_scaled = scaler.transform(val_reshaped)
X_val = X_val_scaled.reshape(X_val.shape)

test_reshaped = X_test.reshape(-1, X_test.shape[-1])
X_test_scaled = scaler.transform(test_reshaped)
X_test = X_test_scaled.reshape(X_test.shape)

In [21]:
X_test.shape

(1403, 4800)

In [23]:
from sklearn.ensemble import RandomForestClassifier
RFmodel = RandomForestClassifier()#creating the model object
RFmodel.fit(X_train,y_train)

### SVM

In [None]:
from sklearn.svm import SVC

SVCmodel = SVC(kernel = 'rbf', probability=True)
SVCmodel.fit(X_train, y_train)

### Show Results

In [24]:
def show_Confusion_matrix(y_true,y_predicted):
  cm = confusion_matrix(y_true,y_predicted)
  plt.rcParams['figure.figsize'] = (18,10)#setting the scale to get bigger display
  disp = ConfusionMatrixDisplay(confusion_matrix= cm, display_labels = list_activities)
  disp.plot(xticks_rotation= 'vertical',cmap = 'binary' )
  plt.show()

In [25]:
def get_metrics (y_true,y_pred):# function to get accuracy,precision,Recall and F1 score of the data
    acc = accuracy_score(y_true, y_pred)

    p = precision_score(y_true, y_pred,average=None).mean()#average precision
    r = recall_score(y_true, y_pred, average=None).mean()#average recall
    f1 = f1_score(y_true, y_pred, average=None).mean()#average f1 score

    print("Accuracy:  ",acc)

    print("Precision: ", p)
    print("Recall:    ", r)
    print("F1:        ", f1)

In [27]:
RFmodel_y_pred = RFmodel.predict(X_test)#predictions

In [29]:
from sklearn.metrics import precision_score,recall_score, f1_score, confusion_matrix, accuracy_score,ConfusionMatrixDisplay

In [30]:
get_metrics(y_test,RFmodel_y_pred)

Accuracy:   1.0
Precision:  1.0
Recall:     1.0
F1:         1.0
