In [19]:
import pandas as pd
from os.path import join
from shutil import copyfile
import os

In [20]:
# Fill in these paths appropriately

PATH_TO_CISPD_TRAIN_LABELS_DIR = join("data", "cis-pd", "data_labels")
PATH_TO_CISPD_TRAIN_MEASUREMENTS_DIR = join("data", "cis-pd", "training_data")
PATH_TO_CISPD_TEST_LABELS_DIR = join("data", "test_set", "cis-pd")
PATH_TO_CISPD_TEST_MEASUREMENTS_DIR = join("data", "test_set", "cis-pd", "testing_data")

PATH_TO_REALPD_TRAIN_LABELS_DIR = join("data", "real-pd", "data_labels")
PATH_TO_REALPD_TRAIN_MEASUREMENTS_DIR = join("data", "real-pd", "training_data")
PATH_TO_REALPD_TEST_LABELS_DIR = join("data", "test_set", "real-pd")
PATH_TO_REALPD_TEST_MEASUREMENTS_DIR = join("data", "test_set", "real-pd", "testing_data")

PATH_TO_BEAT_PD_DIR = join("..", "beat_pd")

In [21]:
train_manifest_df = pd.DataFrame()
test_manifest_df = pd.DataFrame()

train_labels_df = pd.DataFrame()

In [22]:
df = pd.read_csv(join(PATH_TO_CISPD_TRAIN_LABELS_DIR, "CIS-PD_Training_Data_IDs_Labels.csv"))
df["subject_id"] = df["subject_id"].astype(str)

cohort = "cispd"
device = "smartwatch"
instrument = "accelerometer"

for index, row in df.iterrows():
    measurement_id =row['measurement_id']
    subject_id = row['subject_id']
    source_file_path = join(PATH_TO_CISPD_TRAIN_MEASUREMENTS_DIR, measurement_id + ".csv")
    m_file = f"{cohort}_{device}_{instrument}_{subject_id}_{measurement_id}.csv"
    target_file_path = join(PATH_TO_BEAT_PD_DIR, "data", "raw", "train", m_file)
    copyfile(source_file_path, target_file_path)
    
    train_manifest_df = train_manifest_df.append({
        "measurement_id": measurement_id,
        "measurement_file": m_file,
        "subject_id": subject_id,
        "cohort": cohort,
        "device": device,
        "instrument": instrument
    }, ignore_index=True)
train_labels_df = train_labels_df.append(df, ignore_index=True)

In [23]:
df = pd.read_csv(join(PATH_TO_CISPD_TEST_LABELS_DIR, "cis-pd.CIS-PD_Test_Data_IDs.csv"))
df["subject_id"] = df["subject_id"].astype(str)

cohort = "cispd"
device = "smartwatch"
instrument = "accelerometer"

for index, row in df.iterrows():
    measurement_id =row['measurement_id']
    subject_id = row['subject_id']
    source_file_path = join(PATH_TO_CISPD_TEST_MEASUREMENTS_DIR, measurement_id + ".csv")
    m_file = f"{cohort}_{device}_{instrument}_{subject_id}_{measurement_id}.csv"
    target_file_path = join(PATH_TO_BEAT_PD_DIR, "data", "raw", "test", m_file)
    copyfile(source_file_path, target_file_path)
    
    test_manifest_df = test_manifest_df.append({
        "measurement_id": measurement_id,
        "measurement_file": m_file,
        "subject_id": subject_id,
        "cohort": cohort,
        "device": device,
        "instrument": instrument
    }, ignore_index=True)


In [24]:
df = pd.read_csv(join(PATH_TO_REALPD_TRAIN_LABELS_DIR, "REAL-PD_Training_Data_IDs_Labels.csv"), index_col=0)
df["subject_id"] = df["subject_id"].astype(str)

cohort = "realpd"

for d_i in ["smartphone_accelerometer", "smartwatch_accelerometer", "smartwatch_gyroscope"]:
    device = d_i.split("_")[0]
    instrument = d_i.split("_")[1]

    for filename in os.listdir(join(PATH_TO_REALPD_TRAIN_MEASUREMENTS_DIR, d_i)):

        measurement_id = filename[:-4]
        subject_id = df.at[measurement_id, "subject_id"]

        source_file_path = join(PATH_TO_REALPD_TRAIN_MEASUREMENTS_DIR, d_i, measurement_id + ".csv")
        m_file = f"{cohort}_{device}_{instrument}_{subject_id}_{measurement_id}.csv"
        target_file_path = join(PATH_TO_BEAT_PD_DIR, "data", "raw", "train", m_file)
        copyfile(source_file_path, target_file_path)

        train_manifest_df = train_manifest_df.append({
            "measurement_id": measurement_id,
            "measurement_file": m_file,
            "subject_id": subject_id,
            "cohort": cohort,
            "device": device,
            "instrument": instrument
        }, ignore_index=True)
train_labels_df = train_labels_df.append(df.reset_index(), ignore_index=True)

In [25]:
df = pd.read_csv(join(PATH_TO_REALPD_TEST_LABELS_DIR, "real-pd.REAL-PD_Test_Data_IDs.csv"), index_col=0)
df["subject_id"] = df["subject_id"].astype(str)

cohort = "realpd"

for d_i in ["smartphone_accelerometer", "smartwatch_accelerometer", "smartwatch_gyroscope"]:
    device = d_i.split("_")[0]
    instrument = d_i.split("_")[1]

    for filename in os.listdir(join(PATH_TO_REALPD_TEST_MEASUREMENTS_DIR, d_i)):

        measurement_id = filename[:-4]
        subject_id = df.at[measurement_id, "subject_id"]

        source_file_path = join(PATH_TO_REALPD_TEST_MEASUREMENTS_DIR, d_i, measurement_id + ".csv")
        m_file = f"{cohort}_{device}_{instrument}_{subject_id}_{measurement_id}.csv"
        target_file_path = join(PATH_TO_BEAT_PD_DIR, "data", "raw", "test", m_file)
        copyfile(source_file_path, target_file_path)

        test_manifest_df = test_manifest_df.append({
            "measurement_id": measurement_id,
            "measurement_file": m_file,
            "subject_id": subject_id,
            "cohort": cohort,
            "device": device,
            "instrument": instrument
        }, ignore_index=True)


In [26]:
train_manifest_df["subject_id"] = train_manifest_df["subject_id"].astype(str)
test_manifest_df["subject_id"] = test_manifest_df["subject_id"].astype(str)

train_labels_df["subject_id"] = train_labels_df["subject_id"].astype(str)

In [27]:
train_manifest_df.to_csv(join(PATH_TO_BEAT_PD_DIR, "data", "raw", "train", "manifest.csv"))
test_manifest_df.to_csv(join(PATH_TO_BEAT_PD_DIR, "data", "raw", "test", "manifest.csv"))

train_labels_df.to_csv(join(PATH_TO_BEAT_PD_DIR, "data", "raw", "train", "labels.csv"))