In [None]:
%%bash
mkdir -p ../datasets/arem
cd ../datasets/arem
rm AReM.zip
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00366/AReM.zip
unzip AReM.zip

In [None]:
import os
from random import Random
import pandas as pd

In [None]:
dataset_dir = '../datasets/arem/'

def get_activity_files(activity):
    activity_dir = os.path.join(dataset_dir, activity)
    activity_files = [ os.path.join(activity_dir, file) for file in sorted(os.listdir(activity_dir)) ]
    return sorted(activity_files)

target_activities = ['cycling', 'lying', 'sitting', 'standing', 'walking']

all_activity_files = [ (activity, file) for activity in target_activities for file in get_activity_files(activity) ]

shuffled_activity_files = all_activity_files.copy()
Random(1).shuffle(shuffled_activity_files)

In [None]:
def load_df(activity, file):
    df = pd.read_csv(file, names=['time', 'avg_rss12', 'var_rss12', 'avg_rss13', 'var_rss13', 'avg_rss23', 'var_rss23'], comment='#')
    df = df.drop(['time'], axis=1)
    df['activity'] = activity
    return df

case_dfs = [ load_df(activity, file) for activity, file in shuffled_activity_files ]

In [None]:
def random_interleave_dfs(dfs, seed=1):
    rand = Random(seed)
    if not dfs:
        return None
    all_df = pd.DataFrame(columns = dfs[0].columns)
    dfs = [ df.copy() for df in dfs if not df.empty ]
    # While there are still non-empty dataframes
    while dfs:
        next_df = rand.choice(dfs)
        # Append first row from a random df
        all_df = all_df.append(next_df.iloc[0], ignore_index=True)
        # Remove that row from the random df
        next_df.drop(next_df.index[0], inplace=True)
        # Remove any empty dataframes
        dfs = [ df for df in dfs if not df.empty ]
    return all_df
all_cases_interleaved_df = random_interleave_dfs(case_dfs)

In [None]:
all_cases_interleaved_df.to_csv(os.path.join(dataset_dir, 'arem_all_interleaved.csv'), index=False, na_rep='NaN')