In [1]:
from google.colab import drive
drive.mount('/content/drive' , force_remount = True)

Mounted at /content/drive


In [2]:
cd "/content/drive/MyDrive/raw/"

/content/drive/MyDrive/raw


In [3]:
cd "/content/drive/MyDrive/PAMAP2_Dataset"

/content/drive/MyDrive/PAMAP2_Dataset


In [4]:
import os
import statistics
import csv
import itertools
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sn
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import *
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.utils import plot_model
import seaborn as sns
from matplotlib.colors import rgb2hex
from matplotlib.cm import get_cmap
import plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from sklearn.preprocessing import StandardScaler, RobustScaler
plotly.offline.init_notebook_mode()

Output hidden; open in https://colab.research.google.com to view.

### MHEALTH

In [None]:
cd "/content/drive/MyDrive/MHEALTHDATASET/"

In [None]:
data = pd.read_pickle('mhealth_cnn.pkl')

In [None]:
data = data.rename(columns={'subject': 'subject_id'})
data = data.rename(columns={'activity_id': 'activity'})

In [None]:
test_data = data[(data["subject_id"] == 102)]
train_data = data[(data["subject_id"] != 102) & (data["subject_id"] != 103)]
val_data = data[(data["subject_id"] == 103)]

### PAMAP2

In [None]:
cd "/content/drive/MyDrive/PAMAP2_Dataset"

In [None]:
data = pd.read_pickle('pamap2.df')

def fix_data(data):
    data = data.drop(data[data['activity_id']==0].index)
    data = data.interpolate()
    for colName in data.columns:
        if data[colName].dtype != 'object':
            data[colName] = data[colName].fillna(data[colName].mean())
    return data

data = fix_data(data)

In [None]:
test_data = data[(data["subject_id"] == 102)]
train_data = data[(data["subject_id"] != 102) & (data["subject_id"] != 103)]
val_data = data[(data["subject_id"] == 103)]

### RF model

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
window_size = 200
stride = 50

train_frames = []
for i in tqdm(range(0, len(train_data)-window_size, stride)):
    window = train_data.iloc[i:i+window_size]
    if window['activity'].nunique() == 1:
      train_frames.append(window)

test_frames = []
for i in tqdm(range(0, len(test_data)-window_size, stride)):
    window = test_data.iloc[i:i+window_size]
    if window['activity'].nunique() == 1:
      test_frames.append(window)

In [None]:
window_size = 200
stride = 50

train_frames = []
for i in tqdm(range(0, len(train_data)-window_size, stride)):
    window = train_data.iloc[i:i+window_size]
    if window['activity'].nunique() == 1:
      train_frames.append(window)

test_frames = []
for i in tqdm(range(0, len(test_data)-window_size, stride)):
    window = test_data.iloc[i:i+window_size]
    if window['activity'].nunique() == 1:
      test_frames.append(window)

In [None]:
X_train_list = []
y_train_list = []
relevant_columns = [col for col in train_frames[0].columns if col not in ['subject', 'activity']]

#for each frame replace label with activity
for frame in tqdm(train_frames):
    X_train_list.append(frame[relevant_columns].values)
    y_train_list.append(frame.iloc[0]['activity'])

X_test_list = []
y_test_list = []
for frame in tqdm(test_frames):
    X_test_list.append(frame[relevant_columns].values)
    y_test_list.append(frame.iloc[0]['activity'])

In [None]:
X_train = np.stack(X_train_list)
X_test = np.stack(X_test_list)

y_train = np.array(y_train_list)
y_test = np.array(y_test_list)

In [None]:
num_samples_train, window_size, n_features = X_train.shape
num_samples_test = X_test.shape[0]

In [None]:
X_train_reshaped = X_train.reshape((num_samples_train, window_size * n_features))
X_test_reshaped = X_test.reshape((num_samples_test, window_size * n_features))

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)

In [None]:
from sklearn.ensemble import RandomForestClassifier
RFmodel = RandomForestClassifier()#creating the model object
RFmodel.fit(X_train_scaled,y_train)

### SVM

In [None]:
from sklearn.svm import SVC

SVCmodel = SVC(kernel = 'rbf', probability=True)
SVCmodel.fit(X_train_scaled, y_train)

### Show results

In [None]:
def show_Confusion_matrix(y_true,y_predicted):
  cm = confusion_matrix(y_true,y_predicted)
  plt.rcParams['figure.figsize'] = (18,10)#setting the scale to get bigger display
  disp = ConfusionMatrixDisplay(confusion_matrix= cm, display_labels = list_activities)
  disp.plot(xticks_rotation= 'vertical',cmap = 'binary' )
  plt.show()

In [None]:
def get_metrics (y_true,y_pred):# function to get accuracy,precision,Recall and F1 score of the data
    acc = accuracy_score(y_true, y_pred)

    p = precision_score(y_true, y_pred,average=None).mean()#average precision
    r = recall_score(y_true, y_pred, average=None).mean()#average recall
    f1 = f1_score(y_true, y_pred, average=None).mean()#average f1 score

    print("Accuracy:  ",acc)

    print("Precision: ", p)
    print("Recall:    ", r)
    print("F1:        ", f1)

In [None]:
RFmodel_y_pred = RFmodel.predict(X_test_scaled)#predictions

In [None]:
from sklearn.metrics import precision_score,recall_score, f1_score, confusion_matrix, accuracy_score,ConfusionMatrixDisplay

In [None]:
get_metrics(y_test,RFmodel_y_pred)

Accuracy:   0.4286511008233906
Precision:  0.3828595075631681
Recall:     0.5406529064650761
F1:         0.40653880360731137



Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

