In [None]:
# This is activities classification of accelerometer data recorded at left ankle using CNN
# Author: Richard Gupta(Dinesh Kumar Gupta)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
# import the libraries
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split 
import sklearn
print("Above mentioned library has been imported")

In [None]:
# Reading the csv data file
df = pd.read_csv("../input/adultaccelerometerdata/id00b70b13.csv")
df.head()

In [None]:
# Visualizing the left-ankle data using plt 

activities = df["activity"].value_counts().index

sample_frequency = 100

def plot_activity(activity, df):
    #This function will plot the activity column of accelerometer data recorded at left-ankle
    fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, figsize=(16,9), sharex=True)
    plot_axis(ax0, df["time_s"], df["la_x"], "X-Axis")
    plot_axis(ax1, df["time_s"], df["la_y"], "Y-Axis")
    plot_axis(ax2, df["time_s"], df["la_z"], "Z-Axis")
    plt.subplots_adjust(hspace=0.2)
    fig.suptitle(activity)
    plt.subplots_adjust(top=0.9)
    plt.show()

def plot_axis(ax, la_x, la_y, title):
    #This function will define the axis of plot using set_xlim() and set_ylim() methods
    ax.plot(la_x, la_y, "g")
    ax.set_title(title)
    ax.xaxis.set_visible(False)
    ax.set_ylim([min(la_y) - np.std(la_y), max(la_y) + np.std(la_y)])
    ax.set_xlim([min(la_x), max(la_x)])
    ax.grid(True)

# Looping over the activities using a for loop and generating the plot
for activity in activities:
    data_plot = df[(df["activity"] == activity)] [:sample_frequency * 300]

    plot_activity(activity, data_plot)

In [None]:
# Taking the lelf-ankle accelerometer data and dropping all others data
data = df.drop(columns=['time_s', 'lw_x', 'lw_y', 'lw_z',
       'lh_x', 'lh_y', 'lh_z', 'ra_x', 'ra_y', 'ra_z'])
data.head()

In [None]:
# Look over the number of data distributtion in sublabel of activity
data["activity"].value_counts()

In [None]:
# Dropping label 77, 99 from the column of activity
data1 = data[~data['activity'].isin([77, 99])]
data1["activity"].value_counts()

In [None]:
# taking the uniformly distributed data using groupby() method 
data2 = data1.groupby("activity").head(9125)
data2["activity"].value_counts()

In [None]:
# Encoding the label using LabelEncoder
from sklearn.preprocessing import LabelEncoder
tags = LabelEncoder()
data2["tags"] = tags.fit_transform(data2["activity"])
data2.head()

In [None]:
# Assigning data to X and labels to y variable
X = data2[['la_x', 'la_y', 'la_z']]
y = data2["tags"]

In [None]:
# Standardize the data
from sklearn.preprocessing import StandardScaler, LabelEncoder
scaler = StandardScaler()
X = scaler.fit_transform(X)

scaled_X = pd.DataFrame(data=X, columns=['la_x', 'la_y', 'la_z'])
scaled_X["tags"] = y.values

scaled_X 

In [None]:
# Creating a get_frames function
import scipy.stats as stats

Fs = 100
frame_size = Fs * 9
hop_size = Fs * 2

def get_frames(data1, frame_size, hop_size):
    """
    This get_frames function will mainly create a frame_size and hop_size in the
    given dataset.
    """
    N_FEATURES = 3
    
    # creating empty list of frames and tags
    features = []
    tags_ = []
    for i in range(0, len(data1) - frame_size + 1, hop_size):
        la_x = data1["la_x"].values[i:i + frame_size]
        la_y = data1["la_y"].values[i:i + frame_size]
        la_z = data1["la_z"].values[i:i + frame_size]

        # retrieve the most often used label in this segment 
        tags = stats.mode(data1["tags"][i: i + frame_size])[0][0]
        features.append([la_x, la_y, la_z])
        tags_.append(tags)

    # bring the segments into a better shape 
    features = np.asarray(features).reshape(-1, frame_size, N_FEATURES)
    tags_ = np.asarray(tags_)

    return features, tags_

In [None]:
# Integrating the X varibles and y variables in get_frames function
X, y = get_frames(scaled_X, frame_size, hop_size)

In [None]:
#Examine the dimension of X and y variable
X.shape, y.shape

In [None]:
# Segregating the data into train, and test data sets
from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                            random_state=0, stratify=y)

In [None]:
# Check the dimension of train and test data
X_train.shape, X_test.shape

In [None]:
# examine the dimension of train and test data
X_train[0].shape, X_test[0].shape

In [None]:
# Convert the data into 3D vector using reshape
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)

In [None]:
# See the shape of X_train, X_test
X_train[0].shape, X_test[0].shape

In [None]:
# import the important libraries
import tensorflow as tf 
from tensorflow.keras import Sequential 
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPool2D 
from tensorflow.keras.optimizers import Adam
print(tf.__version__)

In [None]:
# Creating a CNN model
model = Sequential()
model.add(Conv2D(16, (2, 2), activation="relu", input_shape = X_train[0].shape))
model.add(Dropout(0.1))

model.add(Conv2D(32, (2, 2), activation="relu"))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))

model.add(Dense(6, activation="softmax"))

In [None]:
# compiling the cnn model and optimizing with Adam
model.compile(optimizer=Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy", 
                  metrics=["accuracy"])

In [None]:
# Training and validating 
history = model.fit(X_train, y_train, epochs=32, validation_data=(X_test, y_test), verbose=1)


In [None]:
# A function for plot learning curve
def plot_learning_curve(history, epochs):
    # Plot model accuracy 
    epoch_range = range(1, epochs + 1)
    plt.plot(epoch_range, history.history["accuracy"])
    plt.plot(epoch_range, history.history["val_accuracy"])
    plt.title("left-ankle_frame_size_9_Model accuracy")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend(["Train", "validation"], loc="upper left")
    plt.show()

    # plot model loss 
    plt.plot(epoch_range, history.history["loss"])
    plt.plot(epoch_range, history.history["val_loss"])
    plt.title("left-ankle_frame_size_9_Model loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend(["Train", "validation"], loc="upper left")
    plt.show()

In [None]:
# Display the graph of model accuracy and model loss
plot_learning_curve(history, 32)

In [None]:
# importing libraries and modules
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print("imported")

In [None]:
# computing y_pred
y_pred = np.argmax(model.predict(X_test), axis=-1)

In [None]:
#plotting confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
class_name = ["walking", "descending_stairs","ascending_stairs", "driving"]
plot_confusion_matrix(conf_mat=confusion_mat, class_names=class_name, show_normed=True, 
                      figsize=(7,7))
print(confusion_mat)

In [None]:
# computing classification report (Precision, Recall, f1 score) and accuracy score
from sklearn.metrics import classification_report, accuracy_score

class_name = tags.classes_


cr = classification_report(y_test, y_pred, labels=class_name)
accs = accuracy_score(y_test, y_pred)
print(cr)
print(f"Accuracy score of left ankle frame size 9 is : {accs}")