In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from scipy import interpolate, stats
from sklearn import preprocessing
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import load_model
sns.set()

In [None]:
def read_data_test(file_path, user_id):
    """
    Read the accelerometer data for test from a file.

    Args:
        file_path: URL pointing to the CSV file
    Returns:
        A pandas dataframe
    """

    df = pd.read_csv(file_path,
                     header=0)
    df.columns = ['Time', 'timestamp', 'x-axis', 'y-axis', 'z-axis']
    df = df.assign(activity=None, user_id=user_id)

    # Some rows are duplicates, remove them
    df.drop_duplicates(inplace=True, ignore_index=True)

    return df

In [None]:
df = read_data_test('../data/Accelerometer_mathis.csv', 0)
df['timestamp'] = df['timestamp'] - df['timestamp'].min()
t = np.arange(0, df['timestamp'].max(), 50)
clean_df = pd.DataFrame(t, columns=['timestamp'])
for axis in ['x-axis', 'y-axis', 'z-axis']:
    x = df['timestamp']
    y = df[axis]
    f = interpolate.interp1d(x, y)
    clean_df[axis] = f(t)
clean_df

In [None]:
clean_df.head()

In [None]:
def plot_axis(ax, x, y, title):
    ax.plot(x, y)
    ax.set_title(title)
    # ax.xaxis.set_visible(False)
    ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
    ax.set_xlim([min(x), max(x)])
    ax.grid(True)


def plot_data(data):
    fig, ax0 = plt.subplots(nrows=1, figsize=(15, 3), sharex=True)
    plot_axis(ax0, data['timestamp'], data['x-axis'], 'x-axis')
    # plot_axis(ax1, data['timestamp'], data['y-axis'], 'y-axis')
    # plot_axis(ax2, data['timestamp'], data['z-axis'], 'z-axis')
    plt.subplots_adjust(hspace=0.2)
    plt.subplots_adjust(top=0.90)
    plt.show()

In [None]:
plot_data(clean_df[(clean_df['timestamp'] > 30000) &
                   (clean_df['timestamp'] < 50000)])
plot_data(df[(df['timestamp'] > 30000) & (df['timestamp'] < 50000)])
print(df['timestamp'].max(), clean_df['timestamp'].max())

In [None]:
label_time = pd.read_csv('../data/intervals.txt')
t0 = pd.to_datetime(df['Time'].iloc[0], format='%Y-%m-%d %H:%M:%S')
dt_start = pd.to_datetime(label_time['start'], format='%Y-%m-%d %H:%M:%S')
label_time['start'] = (dt_start - t0).apply(
    lambda x: int(round(x.total_seconds() * 1e3)))
dt_end = pd.to_datetime(label_time['end'], format='%Y-%m-%d %H:%M:%S')
label_time['end'] = (dt_end - t0).apply(
    lambda x: int(round(x.total_seconds() * 1e3)))
label_time

In [None]:
clean_df.loc[:, 'activity'] = None
for i in label_time.index:
    entry = label_time.loc[i]
    clean_df.loc[(clean_df['timestamp'] >= entry['start']) &
                 (clean_df['timestamp'] <= entry['end']),
                 'activity'] = entry['activity']
clean_df.dropna(inplace=True)
clean_df

In [None]:
LABEL = 'ActivityEncoded'
le = preprocessing.LabelEncoder()
clean_df[LABEL] = le.fit_transform(clean_df['activity'].values.ravel())
LABELS = np.asarray(le.classes_, dtype=str)

In [None]:
print(LABELS)

In [None]:
def create_segments_and_labels(clean_df, df,
                               time_interval, time_step,
                               label_name):
    """
    This function receives a dataframe and returns the reshaped segments
    of x,y,z acceleration as well as the corresponding labels

    Args:
        clean_df: Cleaned up DataFrame
        df: original DataFrame
        time_interval: Size of the sliding window in seconds
        time_step: step to advance for each iteration in seconds
        label_name: name of the column containing the encoded labels
    Returns:
        reshaped_segments
        labels:
    """

    # x, y, z acceleration as features
    N_FEATURES = 3
    segments = []
    labels = []
    t_interval_ms = 1000 * time_interval
    t_step_ms = 1000 * time_step
    n_samples = 20 * time_interval
    ts = 'timestamp'
    time = np.arange(clean_df[ts].min(),
                     clean_df[ts].max(),
                     t_step_ms)
    for t in time:
        if len(df[(t <= df[ts]) &
                  (df[ts] < t + t_interval_ms)]) > 0.5 * n_samples:

            xs = clean_df['x-axis'][
                (t <= clean_df[ts]) &
                (clean_df[ts] < t + t_interval_ms)].values

            ys = clean_df['y-axis'][
                (t <= clean_df[ts]) &
                (clean_df[ts] < t + t_interval_ms)].values

            zs = clean_df['z-axis'][
                (t <= clean_df[ts]) &
                (clean_df[ts] < t + t_interval_ms)].values

            if len(xs) == len(ys) == len(zs) == n_samples:
                # Retrieve the most often used label in this segment
                try:
                    label = stats.mode(
                        clean_df[label_name][
                            (t <= clean_df[ts]) &
                            (clean_df[ts] < t + t_interval_ms)])[0][0]
                    segments.append([xs, ys, zs])
                    labels.append(label)
                except IndexError:
                    pass
    # Bring the segments into a better shape
    reshaped_segments = np.asarray(segments,
                                   dtype=np.float32).reshape(-1,
                                                             n_samples,
                                                             N_FEATURES)
    labels = np.asarray(labels)

    return reshaped_segments, labels

In [None]:
TIME_INTERVAL = 4
TIME_STEP = 2
clean_df = clean_df.round({'x-axis': 6, 'y-axis': 6, ' z-axis': 6})
x_test, y_test = create_segments_and_labels(clean_df,
                                            df,
                                            TIME_INTERVAL,
                                            TIME_STEP,
                                            LABEL
                                            )

In [None]:
x_test.shape

In [None]:
model_m = load_model('../models/cnn/saved')
y_test_neural = pd.get_dummies(y_test)
score = model_m.evaluate(x_test, y_test_neural, verbose=1)

print(f"\nAccuracy on test data: {score[1]*100:.1f}%")
print(f"\nLoss on test data: {score[0]:.2f}")

In [None]:
y_pred_test = model_m.predict(x_test)
# Take the class with the highest probability from the test predictions
max_y_pred_test = np.argmax(y_pred_test, axis=1)

cf_matrix = pd.DataFrame(confusion_matrix(y_test, max_y_pred_test),
                         columns=LABELS, index=LABELS)
sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True, fmt='.2%', cmap='Greens')

print(classification_report(y_test, max_y_pred_test))