In [1]:
import os
from tools import *

In [16]:
base_path = "raw_data"
subjects = [f.split('_')[0] for f in os.listdir(f"{base_path}/labels") if f.endswith(".txt")]
print(len(subjects))
print(subjects)

31
['1066528', '1360686', '1449548', '1455390', '1818471', '2598705', '2638030', '3509524', '3997827', '4018081', '4314139', '4426783', '46343', '5132496', '5383425', '5498603', '5797046', '6220552', '759667', '7749105', '781756', '8000685', '8173033', '8258170', '844359', '8530312', '8686948', '8692923', '9106476', '9618981', '9961348']


In [18]:
def load_subject_data(subject):
    motion = pd.read_csv(f"{base_path}/motion/{subject}_acceleration.txt", sep=" ", header=None,
                         names=["time", "x", "y", "z"])
    heart = pd.read_csv(f"{base_path}/heart_rate/{subject}_heartrate.txt", sep=",", header=None,
                        names=["time", "heart_rate"])
    labels = pd.read_csv(f"{base_path}/labels/{subject}_labeled_sleep.txt", sep=" ", header=None,
                         names=["time", "stage"])
    return motion, heart, labels

In [26]:
for subject in subjects:
    df = None
    print(subject)
    motion_uncropped, heart_uncropped, labels_uncropped = load_subject_data(subject)
    motion, heart, labels = crop_data(motion_uncropped, heart_uncropped, labels_uncropped)

    # jak dużo aktywności w danej sekundzie
    activity = build_activity_counts(motion)
    heart_rate = build_heart_rate(heart)

    # zaokrąglenie w dół do najbliższej wielokrotności 30 - do której epoki należy pomiar (pomiary w sekndach)
    motion_ids = motion["time"].apply(lambda x: x - np.mod(x, 30))
    heart_ids = heart["time"].apply(lambda x: x - np.mod(x, 30))

    valid_times = set(motion_ids) & set(heart_ids)
    valid_epochs = labels[(labels["time"].isin(valid_times)) & (labels["stage"] != -1)]["time"].tolist()

    rows = []
    for epoch in valid_epochs:
        rows.append({
            "second_of_sleep": epoch,
            "label": labels.loc[labels['time'] == epoch, "stage"].iloc[0],
            # jak dużo aktywności w przeciągu 15 sekund - wygładzenie gaussem
            "activity_count": smooth_gauss(get_window(activity, "activity", epoch)),
            # odchylenie standardowe wartości w oknie czasowym
            "heart_rate_std": np.std(get_window(heart_rate, "heart_rate", epoch)),
            # jak bardzo jest to senny czas dla organizmu
            "circadian_cycle": cosine_proxy(epoch - valid_epochs[0]),
            "hour_of_sleep": (epoch - valid_epochs[0]) / 3600.0
        })
    df = pd.DataFrame(rows)
    df.set_index('second_of_sleep', inplace=True)

    mapping = {0: 0, 1: 1, 2: 1, 3: 1, 4: 1, 5: 2}
    df["label"] = df["label"].map(mapping)

    df.to_pickle(f"datasets/data_{subject}.pkl")

1066528
                 label  activity_count  heart_rate_std  circadian_cycle  \
second_of_sleep                                                           
0                    0        0.000000        0.351612        -0.258819   
30                   0        0.000000        0.336348        -0.260926   
60                   0        0.000000        0.322797        -0.263031   
90                   0        0.001274        0.310572        -0.265135   
120                  0        0.009708        0.299694        -0.267238   
...                ...             ...             ...              ...   
25980                0        7.307716        0.640652        -0.836286   
26010                0        6.556160        0.656672        -0.835088   
26040                0        7.720901        0.664027        -0.833886   
26070                0        8.198019        0.680921        -0.832680   
27930                0        0.057462        0.283500        -0.750400   

                