In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv("data/wisdm/WISDM_ar_v1.1_transformed.csv",
                 na_values = "?")

# Remove columns with missing values and 'UNIQUE_ID'.
df = df.drop(columns=['UNIQUE_ID', 'YAVG', 'ZAVG'])

# Print number of unique users.
print(len(np.unique(df["user"].values)))

print(df.head())

### Perform traditional k-fold cross validation (mixed- model).

In [None]:
from sklearn.model_selection import StratifiedKFold

X = df.drop(columns=['class','user']).values
y = df["class"].values

kf = StratifiedKFold(n_splits = 10)

accuracies = []

for i, (train_indx, test_indx) in enumerate(kf.split(X, y)):
    
    X_train = X[train_indx]
    y_train = y[train_indx]
    X_test = X[test_indx]
    y_test = y[test_indx]

    rf = RandomForestClassifier(random_state = 123)
    rf.fit(X_train, y_train)
    predictions = rf.predict(X_test)
    accuracies.append(accuracy_score(y_test, predictions))

print(np.mean(accuracies))

### Perform leave-one-user-out validation (user-independent model)

In [None]:
from sklearn.model_selection import LeaveOneGroupOut

users = df["user"].values
X = df.drop(columns=['class','user']).values
y = df["class"].values

valscheme = LeaveOneGroupOut()

accuracies = []

for train_indx, test_indx in valscheme.split(X, y, groups = users):
    X_train = X[train_indx]
    y_train = y[train_indx]
    X_test = X[test_indx]
    y_test = y[test_indx]

    rf = RandomForestClassifier(random_state = 123)
    rf.fit(X_train, y_train)
    predictions = rf.predict(X_test)
    accuracies.append(accuracy_score(y_test, predictions))

print(np.mean(accuracies))