# Imports

In [None]:
import pandas as pd
import numpy as np
from sliding_window import SlidingWindow

# Globals

In [None]:
df = pd.read_csv('full_data.gz', compression='gzip')
df = df.drop(['Unnamed: 0'], axis=1).set_index('time')

In [None]:
num_experiments = 16
num_participants = 24
exclude = 10
analytic_functions_list = ['mean', 'sum', 'median', 'min', 'max', 'std']
labels_dict = {'wlk': 0, 'sit': 1, "std": 2, "ups": 3, "jog": 4, "dws": 5}

# Helper Functions

In [None]:
def data_allocation(df):
    # Define X,Y
    df = df.sample(frac=1).reset_index(drop=True)
    X, y = df.drop(["action"], axis=1), df["action"]
    y = y.replace(labels_dict)
    
    # Divide to training, validation and test set 70%, 10%, 20%
    num_training = int(df.shape[0] * 0.7)
    num_validation = int(df.shape[0] * 0.1)
    X_train, y_train = X[:num_training], y[:num_training]
    X_vald, y_vald = X[num_training:num_training + num_validation], y[num_training:num_training + num_validation]
    X_test, y_test = X[num_training + num_validation:], y[num_training + num_validation:]
    
    return X_train, y_train, X_vald, y_vald, X_test, y_test

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

def create_classes(labels_dict):
    classes_indexs = labels_dict.items()
    classes_indexs = sorted(classes_indexs, key=lambda x: x[1])
    classes_names = [label for label, index in classes_indexs]
    return classes_names

def evaluate_results(y_true, y_pred, classes):
        print("---- Printing classification report ----")
        print(classification_report(y_true, y_pred, target_names=classes))

# Model Evaluation

## Prepare Dataframe to Classify

In [None]:
window = SlidingWindow(df, 10, num_experiments, num_participants, exclude, analytic_functions_list)
sld_df = window.df
X_train, y_train, X_vald, y_vald, X_test, y_test = data_allocation(sld_df)
classes_names = create_classes(labels_dict)

## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_jobs=-1, verbose=1)
rf.fit(X_train, y_train)

### Evaluate Results

In [None]:
prediction = rf.predict(X_vald)
evaluate_results(y_vald, prediction, classes_names)

In [None]:
prediction_test = rf.predict(X_test)
evaluate_results(y_test, prediction_test, classes_names)

## Logistic Regression Classifier

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', verbose=1, max_iter=300)
lr.fit(X_train, y_train)

### Evaluate Results

In [None]:
lr_prediction = lr.predict(X_vald)
evaluate_results(y_vald, lr_prediction, classes_names)

## SVM Classifier

In [None]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(X_train, y_train)

## Perform the same analysis over the history data set

In [None]:
hist_df = pd.read_pickle("history_10_encoded.pkl")

In [None]:
hist_df.head(5)

In [None]:
hist_df = hist_df.sample(frac=1).reset_index(drop=True)
labels_dict = {'wlk': 0, 'sit': 1, "std": 2, "ups": 3, "jog": 4, "dws": 5}
X, y = hist_df.drop(["action"], axis=1), hist_df["action"]
y = y.replace(labels_dict)

In [None]:
num_training = int(hist_df.shape[0] * 0.7)
num_validation = int(hist_df.shape[0] * 0.1)
X_train, y_train = X[:num_training], y[:num_training]
X_vald, y_vald = X[num_training:num_training + num_validation], y[num_training:num_training + num_validation]
X_test, y_test = X[num_training + num_validation:], y[num_training + num_validation:]

In [None]:
from sklearn.ensemble import RandomForestClassifier
hist_rf = RandomForestClassifier(n_jobs=-1, verbose=1)
hist_rf.fit(X_train, y_train)

In [None]:
prediction = hist_rf.predict(X_vald)
classes_names = create_classes(labels_dict)
evaluate_results(y_vald, prediction, classes_names)

In [None]:
prediction_test = hist_rf.predict(X_test)
evaluate_results(y_test, prediction_test, classes_names)

Try also here linear model

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', verbose=1, max_iter=300)
lr.fit(X_train, y_train)

In [None]:
lr_prediction = lr.predict(X_vald)
classes_names = create_classes(labels_dict)
evaluate_results(y_vald, lr_prediction, classes_names)

# Window Size Performance
### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_jobs=-1, verbose=1)

In [None]:
sizes = [i for i in range(2, 20, 2)]
for size in sizes:
    window = SlidingWindow(df, size, num_experiments, num_participants, exclude, analytic_functions_list)
    sld_df = window.df
    X_train, y_train, X_vald, y_vald, X_test, y_test = data_allocation(sld_df)
    rf.fit(X_train, y_train)
    
    prediction = rf.predict(X_vald)
    evaluate_results(y_vald, prediction, classes_names)