# <span style='color:Blue'> SEQUENCE CLASSIFICATION </span>

This Notebook is based on the analysis of a dataset called “Smartphone-Based Recognition of Human Activities and Postural Transitions Data Set” which is built from the recordings of 30 subjects performing activities of daily living (ADL) while carrying a waist-mounted smartphone with embedded inertial sensors. Link to dataset: http://archive.ics.uci.edu/ml/datasets/Smartphone-Based+Recognition+of+Human+Activities+and+Postural+Transitions

### Import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import time
import pmdarima as pm
import statsmodels.api as sm
import tsfresh
import sklearn
from scipy import signal 
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters, EfficientFCParameters, MinimalFCParameters
from sklearn.ensemble import GradientBoostingClassifier,AdaBoostClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from sklearn.metrics import confusion_matrix
from pandas import read_csv

%matplotlib inline
sns.set_style('whitegrid')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')


import warnings
warnings.filterwarnings("ignore")
from matplotlib.patches import Rectangle
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['text.color'] = 'k'
matplotlib.rcParams['figure.figsize'] = 13,8

### Read and plot one measurement

To get to know the dataset, we will start by visualising a single measurement from the first experiment done on user id 1. We highlight the actions that we would like to classify.

In [None]:
def load_file(filepath):
    dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values
 
exp01 = load_file('/Users/TNM1BET/Downloads/Dataset/acc_exp01_user01.txt')

In [None]:
fig,ax = plt.subplots(figsize=(20,8))
plt.title('Triaxial acceleration signal (experiment id: 1 , user id: 1)',fontsize=20)
#plt.xlabel('data points',fontsize=15)
ax.plot(exp01[:,0], color= 'blue', label='acc X-axis')
ax.plot(exp01[:,1], color= 'green', label='acc Y-axis')
ax.plot(exp01[:,2], color= 'red', label='acc Z-axis')
plt.ylabel('g (9.8 m/s^2)',fontsize=15)
plt.legend(fancybox=True, framealpha=1, shadow=True, borderpad=1)

fig.gca().add_patch(Rectangle((250,-2),982,6,fill=True, color='g', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((1393,-2),801,6,fill=True, color='m', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((2360,-2),1014,6,fill=True, color='g', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((4736,-2),931,6,fill=True, color='m', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((7496,-2),582,6,fill=True, color='b', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((8356,-2),894,6,fill=True, color='b', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((9657,-2),910,6,fill=True, color='b', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((10750,-2),964,6,fill=True, color='b', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((13191,-2),655,6,fill=True, color='c', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((14069,-2),630,6,fill=True, color='r', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((14869,-2),623,6,fill=True, color='c', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((15712,-2),665,6,fill=True, color='r', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((16530,-2),623,6,fill=True, color='c', alpha=0.2, zorder=100, figure=fig))
fig.gca().add_patch(Rectangle((17298,-2),672,6,fill=True, color='r', alpha=0.2, zorder=100, figure=fig))

plt.figtext(0.25, 0.06, "Standing", ha="center", fontsize=18, bbox={"facecolor":"green", "alpha":0.2, "pad":5})
plt.figtext(0.35, 0.06, "Sitting", ha="center", fontsize=18, bbox={"facecolor":"magenta", "alpha":0.2, "pad":5})
plt.figtext(0.45, 0.06, "Walking", ha="center", fontsize=18, bbox={"facecolor":"blue", "alpha":0.2, "pad":5})
plt.figtext(0.75, 0.06, "Walking_upstairs", ha="center", fontsize=18, bbox={"facecolor":"red", "alpha":0.2, "pad":5})
plt.figtext(0.60, 0.06, "Walking_downstairs", ha="center", fontsize=18, bbox={"facecolor":"cyan", "alpha":0.2, "pad":5})


plt.show()

Let's go now deeper and for that we will pick one segment from the above measurement which represents the walking action.

In [None]:
segment_walk = exp01[10750:11714,:]

plt.figure(figsize=(10,5))
plt.plot(segment_walk[:,0], color= "blue", label="X-axis")
plt.plot(segment_walk[:,1], color= "green", label="Y-axis")
plt.plot(segment_walk[:,2], color="red", label="Z-axis")
plt.title('One segment from the walking action',fontsize=15)
plt.legend(loc="upper right")
plt.xlabel('datapoints',fontsize=15)
plt.ylabel('g (9.8 m/s^2)',fontsize=15)
plt.show()

### Preprocessing

The first and major step in time series classification is the pre-processing step. We will now go through the different steps using the snippet from walking activity. 
We will:
1. downsample the data to smooth the signal
2. roll a sliding window on the signal and cut the signal in same sized windows  
3. normalize and bounde each window within [-1,1]

### Downsampling

In [None]:
segment_walk_down = signal.decimate(segment_walk, q=2, n=0,ftype="fir", axis=0)
print('Downsampled with Factor: {}, filter: type: {}, of order {}'.format(2,"fir",0) )

### windowing

In [None]:
def sliding_window_2d(arr, window_size, shifting_distance, slicing_axis=0, window_axis=1):
    
        array = np.expand_dims(arr, window_axis) # new shape: n x 1 x 3
        mod_shape = list(array.shape)
        #print('array shape:', mod_shape)
        mod_shape[slicing_axis] = np.floor(array.shape[slicing_axis] / shifting_distance 
                                        - window_size / shifting_distance + 1).astype(int)
        mod_shape[window_axis] = window_size
        mod_strides = list(array.strides)
        mod_strides[slicing_axis] *= shifting_distance # shift this far in original array for steps on 0-axis in new array
        mod_strides[window_axis] = array.strides[slicing_axis] 
        row_window_array = np.lib.stride_tricks.as_strided(array, shape=mod_shape, strides=mod_strides)
            
        print("number of windows:",row_window_array.shape[0])
        print("number of observation per window:",row_window_array.shape[1])
        print("number of axis:",row_window_array.shape[2])
    
        return row_window_array

In [None]:
window_size = 128
step_size = 64   #if equal to window_size than no overlapping

X_windowed = sliding_window_2d(segment_walk_down, window_size, step_size)

### scaling

In [None]:
def scale_data_vibration(data_array):
    X = np.ndarray(shape=(data_array.shape))
    for window in range(len(data_array)):
        X[window,:,:] = sklearn.preprocessing.maxabs_scale(data_array[window,:,:])
    
    return X

In [None]:
X_scl= scale_data_vibration(X_windowed)

We now plot the results from the pre-processing steps

In [None]:
fig, ((ax1, ax2),(ax3, ax4)) = plt.subplots(2,2,figsize=(12,8), sharey=False)
fig.suptitle(f'Window snipping with length{window_size} and step size of {step_size}')
ax1.plot(X_windowed[0])
ax1.set_title("window number: 1")
ax2.plot(X_windowed[1])
ax2.set_title("window number: 2")
ax3.plot(X_scl[0])
ax3.set_title("scaled window number: 1")
ax4.plot(X_scl[1])
ax4.set_title("scaled window number: 2")
fig.show()

### feature extraction

Now that we have standardized, same sized windows we can start by processing those. Next step would be the feature extraction. For that we can use tsfresh which allows us to derive a comprehensive set of characteristics from the rawdata. To get more insight here the link: https://tsfresh.readthedocs.io/en/latest/

We need of course to start by reshaping our data in the correct format required for the tsfresh.

In [None]:
idx = 0
n_channels = X_scl.shape[2]
n_obs = X_scl.shape[1]
windows = X_scl.shape[0]
df = pd.DataFrame()
for window in range(windows):
    df_temp = pd.DataFrame()
    for channel in range(n_channels):
        df_temp['acc_' + str(channel)] = pd.Series(X_scl[window,:,channel])
    df_temp.insert(0, 'id', idx)
    df_temp.insert(1, 'time', range(n_obs))
    idx+=1
    if df.empty:
        df = df_temp
    else:
        df = pd.concat([df,df_temp],ignore_index=True)

For sake of simplicity and time, we will calculate only the minimal number of features. Using the EfficientFCParameters would allow you to extract a higher number of features. 
In this link you can have an overview to the features list which can be calculated: https://tsfresh.readthedocs.io/en/latest/text/list_of_features.html

In [None]:
df_copy = df.copy()
extraction_settings = MinimalFCParameters()
#extraction_settings = EfficientFCParameters()

X_features = extract_features(df_copy, column_id='id', column_sort='time',
         default_fc_parameters=extraction_settings)

In [None]:
print("Number of features extracted:",X_features.shape[1])

In [None]:
X_features

In [None]:
fig, ((ax1, ax2),(ax3, ax4)) = plt.subplots(2,2,figsize=(12,8))
fig.suptitle('Some Features extracted')
ax1.plot(X_features["acc_0__sum_values"])
ax2.plot(X_features["acc_0__standard_deviation"])
ax3.plot(X_features["acc_0__mean"])
ax4.plot(X_features["acc_0__variance"])
ax1.set_title('acc_0__sum_values')
ax2.set_title('acc_0__standard_deviation')
ax3.set_title('acc_0__mean')
ax4.set_title('acc_0__variance')
fig.show()

### Loading the ML Dataset

We will load now the train and test sets. Each set contains:
- A 561-feature vector with time and frequency domain variables. 
- Its associated activity label. 

In [None]:
X_train = load_file("/Users/TNM1BET/Downloads/Dataset/X_train_reduced.txt")
y_train = load_file("/Users/TNM1BET/Downloads/Dataset/y_train_reduced.txt")
X_test = load_file("/Users/TNM1BET/Downloads/Dataset/X_test_reduced.txt")
y_test = load_file("/Users/TNM1BET/Downloads/Dataset/y_test_reduced.txt")

In [None]:
def class_balance_analysis(data, bins):
    df = pd.DataFrame(data)
    elements={}
    counts = df.groupby(0).size()
    counts = counts.values
    return counts

In [None]:
bins = ["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"]
np_samples_train = class_balance_analysis(y_train,bins)
np_samples_test = class_balance_analysis(y_test,bins)

fig, ((ax1, ax2)) = plt.subplots(1,2,figsize=(12,3))
ax1.bar(bins,np_samples_train)
ax2.bar(bins,np_samples_test)
ax1.set_title('samples balance training set')
ax2.set_title('samples balance test set')
ax1.xaxis.set_tick_params(rotation=50)
ax2.xaxis.set_tick_params(rotation=50)
fig.show()

### Ada boost

In [None]:
# AdaBoost parameters
ada_params = {
    'n_estimators': 70,
    'learning_rate' : 0.5,
}

ada = AdaBoostClassifier(**ada_params)

ada.fit(X_train, y_train)

y_pred_train = ada.predict(X_train)
y_pred_test = ada.predict(X_test)

acc_train = accuracy_score(y_train, y_pred_train)

print(f'Mean accuracy train score: {acc_train:.3}')

acc_test = accuracy_score(y_test, y_pred_test)

print(f'Mean accuracy test score: {acc_test:.3}')

In [None]:
conf_train = confusion_matrix(y_train,y_pred_train)
conf_test = confusion_matrix(y_test,y_pred_test)

conf_train_norm = conf_train.astype('float') / conf_train.sum(axis=1)[:, np.newaxis]
conf_test_norm = conf_test.astype('float') / conf_test.sum(axis=1)[:, np.newaxis]

fg, ((ax1, ax2),(ax3, ax4)) = plt.subplots(2,2,figsize=(12,10))
sns.heatmap(conf_train, annot=True, fmt="d", ax=ax1)
ax1.set(xlabel="predicted label")
#ax1.set_xticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
#ax1.set_yticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
ax1.set(ylabel="actual label")
ax1.set(title="Confusion Matrix for training set")

sns.heatmap(conf_test, annot=True, fmt="d", ax=ax2)
ax2.set(xlabel="predicted label")
ax2.set(ylabel="actual label")
#ax2.set_xticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
#ax2.set_yticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
ax2.set(title="Confusion Matrix for test set")
sns.heatmap(conf_train_norm, annot=True, ax=ax3)
ax3.set(xlabel="predicted label")
#ax3.set_xticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
#ax3.set_yticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
ax3.set(ylabel="actual label")
ax3.set(title="Normalized Confusion Matrix for training set")
sns.heatmap(conf_test_norm, annot=True, ax=ax4)
ax4.set(xlabel="predicted label")
ax4.set(ylabel="actual label")
#ax4.set_xticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
#ax4.set_yticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
ax4.set(title="Normalized Confusion Matrix for test set")

### Gradient boost

In [None]:
# Gradient Boost parameters
grad_params = {
    'n_estimators': 40,
    'learning_rate' : 0.5,
    'max_depth' : 1,
    'random_state' : 0
}

grad = GradientBoostingClassifier(**grad_params)

grad.fit(X_train, y_train)

y_pred_train = grad.predict(X_train)
y_pred_test = grad.predict(X_test)

acc_train = accuracy_score(y_train, y_pred_train)

print(f'Mean accuracy train score: {acc_train:.3}')

acc_test = accuracy_score(y_test, y_pred_test)

print(f'Mean accuracy test score: {acc_test:.3}')

In [None]:
conf_train = confusion_matrix(y_train,y_pred_train)
conf_test = confusion_matrix(y_test,y_pred_test)

conf_train_norm = conf_train.astype('float') / conf_train.sum(axis=1)[:, np.newaxis]
conf_test_norm = conf_test.astype('float') / conf_test.sum(axis=1)[:, np.newaxis]

fg, ((ax1, ax2),(ax3, ax4)) = plt.subplots(2,2,figsize=(12,10))
sns.heatmap(conf_train, annot=True, fmt="d", ax=ax1)
ax1.set(xlabel="predicted label")
#ax1.set_xticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
#ax1.set_yticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
ax1.set(ylabel="actual label")
ax1.set(title="Confusion Matrix for training set")

sns.heatmap(conf_test, annot=True, fmt="d", ax=ax2)
ax2.set(xlabel="predicted label")
ax2.set(ylabel="actual label")
#ax2.set_xticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
#ax2.set_yticklabels(["Walking","W_upstairs","W_downstairs","Sitting","Standing"])
ax2.set(title="Confusion Matrix for test set")
sns.heatmap(conf_train_norm, annot=True, ax=ax3)
ax3.set(xlabel="predicted label")
#ax3.set_xticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
#ax3.set_yticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
ax3.set(ylabel="actual label")
ax3.set(title="Normalized Confusion Matrix for training set")
sns.heatmap(conf_test_norm, annot=True, ax=ax4)
ax4.set(xlabel="predicted label")
ax4.set(ylabel="actual label")
#ax4.set_xticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
#ax4.set_yticklabels(["Walking","Walking_upstairs","Walking_downstairs","Sitting","Standing"])
ax4.set(title="Normalized Confusion Matrix for test set")