# Load BSON files

In [4]:
import os
import numpy as np
import bson

def getBsonFiles(DIR):
    files = os.listdir(DIR)
    bson_files = []

    for f in files:
        if ".bson" in f:
            bson_files.append(f)

    print(bson_files)
    return bson_files

In [5]:
DATA_DIR = os.getcwd() + "/data/fddg-db14/fddg"

In [6]:
bson_files = getBsonFiles(DATA_DIR)

['users.bson', 'metas.bson', 'chunks.bson']


In [7]:
bson_data = {}

for b in bson_files:
    with open(DATA_DIR + "/" + b, "rb") as f:
        bson_data[b] = bson.decode_all(f.read())

# Load all BSON data into class objects

In [8]:
class Meta:
    def __init__(self, json_doc):
        self._id = json_doc["_id"]
        self.subject_id = json_doc["subject_id"]
        self.phone_placement = json_doc["phone_placement"]
        self.recording_duration = json_doc["recording_duration"]
        self.chunk_ids = json_doc["chunk_ids"]
        
class Chunk:
    def __init__(self, json_doc):
        self._id = json_doc["_id"]
        self.recording_id = json_doc["recording_id"]
        self.chunk_index = json_doc["chunk_index"]
        self.labels = json_doc["labels"]
        self.p_ecg = json_doc["p_ecg"]
        self.p_hr = json_doc["p_hr"]
        self.p_contact = json_doc["p_contact"]
        self.p_acc_x = json_doc["p_acc_x"]
        self.p_acc_y = json_doc["p_acc_y"]
        self.p_acc_z = json_doc["p_acc_z"]
        self.acc_x = json_doc["acc_x"]
        self.acc_y = json_doc["acc_y"]
        self.acc_z = json_doc["acc_z"]
        self.gyr_x = json_doc["gyr_x"]
        self.gyr_y = json_doc["gyr_y"]
        self.gyr_z = json_doc["gyr_z"]
        self.gra_x = json_doc["gra_x"]
        self.gra_y = json_doc["gra_y"]
        self.gra_z = json_doc["gra_z"]
        self.mag_x = json_doc["mag_x"]
        self.mag_y = json_doc["mag_y"]
        self.mag_z = json_doc["mag_z"]
        self.att_roll = json_doc["att_roll"]
        self.att_pitch = json_doc["att_pitch"]
        self.att_yaw = json_doc["att_yaw"]
        self.delta_heading = json_doc["delta_heading"]
        
class User:
    def __init__(self, json_doc):
        self.subject_id = json_doc["subject_id"]
        self.name = json_doc["name"]
        self.yob = json_doc["yob"]
        self.height = json_doc["height"]
        self.weight = json_doc["weight"]
        self.is_female = json_doc["is_female"]
        self.medical_conditions = json_doc["medical_conditions"]

In [9]:
metas = []
rec_chunks = {}
chunks = {}
users = {}

In [10]:
# Metas
for doc in bson_data[bson_files[1]]:
    meta = Meta(doc)
    if meta._id != "B49EB5F5-3FEA-403C-816C-A97BDB1A7776":
        metas.append(meta)
    else:
        print("caught")
    
# Chunks
for doc in bson_data[bson_files[2]]:
    chunk = Chunk(doc)
    print(chunk._id)
    print(chunk.recording_id)
    print()
    
    if chunk.recording_id != "B49EB5F5-3FEA-403C-816C-A97BDB1A7776":
        if chunk.recording_id in rec_chunks.keys():
            rec_chunks[chunk.recording_id].append(chunk)
        else:
            rec_chunks[chunk.recording_id] = [chunk]
    
    chunks[chunk._id] = chunk

# Users
for doc in bson_data[bson_files[0]]:
    user = User(doc)
    users[user.subject_id] = user

caught
97D59978-23A4-4059-9A35-EF617DAAF9A5
40181EF2-1A30-4C0B-81D2-9773E158F5C0

FD63AF73-29F8-49CA-9AE5-A22228D945EB
40181EF2-1A30-4C0B-81D2-9773E158F5C0

B7F3E187-7A4D-46CD-A100-C8B459658B5A
40181EF2-1A30-4C0B-81D2-9773E158F5C0

0E4D56BB-C18F-47CA-8511-E913E0CB3ACB
40181EF2-1A30-4C0B-81D2-9773E158F5C0

7E2F6803-1A54-429D-B897-2F9E4EAEDEA9
40181EF2-1A30-4C0B-81D2-9773E158F5C0

D16D8CB6-0285-4423-A6CA-96F0CB9BC05B
40181EF2-1A30-4C0B-81D2-9773E158F5C0

DEDB48D7-4D00-40F4-9F0C-382D4BF2E908
40181EF2-1A30-4C0B-81D2-9773E158F5C0

B996F060-92AD-48E8-B7A7-7B8DF829C9DA
40181EF2-1A30-4C0B-81D2-9773E158F5C0

7B56F921-080F-40FA-87EF-D5C658FD7E25
40181EF2-1A30-4C0B-81D2-9773E158F5C0

8A7DD7B4-23C4-4B30-AC86-A17DC4E09413
40181EF2-1A30-4C0B-81D2-9773E158F5C0

F9C5C39C-0B2D-4F1F-BB0D-CFF1E8F20B97
40181EF2-1A30-4C0B-81D2-9773E158F5C0

B551DE55-976E-4742-82DD-DF80687FBF6A
40181EF2-1A30-4C0B-81D2-9773E158F5C0

453327E8-BE03-4E01-AC8F-BDFD51DF15E0
AEA56528-8D55-49B1-B0DC-A5CCB861850E

C5886B47-983D-402B

In [20]:
for c in rec_chunks["EBDF23BC-1923-4240-9AA4-0222E0A44161"]:
    print(c.chunk_index)
    print(len(c.labels))
    print(len(c.p_ecg))
    print(len(c.p_acc_x))
    print(len(c.acc_x))
    print(len(c.p_hr))
    print()

KeyError: 'EBDF23BC-1923-4240-9AA4-0222E0A44161'

In [9]:
for c in rec_chunks["3D6EB8A7-6255-4A7F-ACBA-797CF023A0ED"]:
    print(c.chunk_index)
    print(len(c.labels))
    print(len(c.p_ecg))
    print(len(c.p_acc_x))
    print(len(c.acc_x))
    print(len(c.p_hr))
    print()

KeyError: '3D6EB8A7-6255-4A7F-ACBA-797CF023A0ED'

# Merge all chunks and meta data into a single object for each recording

In [18]:
class Recording:
    def __init__(self, meta):
        self._id = meta._id
        self.subject_id = meta.subject_id
        self.phone_placement = meta.phone_placement
        self.recording_duration = meta.recording_duration
        self.chunk_ids = meta.chunk_ids
        self.labels = []
        self.p_ecg = []
        self.p_hr = []
        self.p_contact = []
        self.p_acc_x = []
        self.p_acc_y = []
        self.p_acc_z = []
        self.acc_x = []
        self.acc_y = []
        self.acc_z = []
        self.gyr_x = []
        self.gyr_y = []
        self.gyr_z = []
        self.gra_x = []
        self.gra_y = []
        self.gra_z = []
        self.mag_x = []
        self.mag_y = []
        self.mag_z = []
        self.att_roll = []
        self.att_pitch = []
        self.att_yaw = []
        self.delta_heading = []
        self.height = users[self.subject_id].height
        print(self.height)
        self.weight = users[self.subject_id].weight
        
        print(self._id)
        r_chunks = rec_chunks[self._id]
        for c in r_chunks:
            print(c._id)
        print()
        ordered_chunk_ids = {}
        
        for chunk in r_chunks:
            ordered_chunk_ids[chunk.chunk_index] = chunk
            
        for i in range(0,len(r_chunks)-1):
            chunk = chunks[ordered_chunk_ids[i]._id]
            
            assert(i == chunk.chunk_index)
            
            self.labels += chunk.labels
            self.p_ecg += chunk.p_ecg
            self.p_hr += chunk.p_hr
            self.p_contact += chunk.p_contact
            self.p_acc_x += chunk.p_acc_x
            self.p_acc_y += chunk.p_acc_y
            self.p_acc_z += chunk.p_acc_z
            self.acc_x += chunk.acc_x
            self.acc_y += chunk.acc_y
            self.acc_z += chunk.acc_z
            self.gyr_x += chunk.gyr_x
            self.gyr_y += chunk.gyr_y
            self.gyr_z += chunk.gyr_z
            self.gra_x += chunk.gra_x
            self.gra_y += chunk.gra_y
            self.gra_z += chunk.gra_z
            self.mag_x += chunk.mag_x
            self.mag_y += chunk.mag_y
            self.mag_z += chunk.mag_z
            self.att_roll += chunk.att_roll
            self.att_pitch += chunk.att_pitch
            self.att_yaw += chunk.att_yaw
            self.delta_heading += chunk.delta_heading

            print(len(chunk.labels))
            print(len(chunk.p_ecg))
            print(len(chunk.p_hr))
            print(len(chunk.p_acc_x))
            print(len(chunk.p_contact))
            print(len(chunk.acc_x))
            print(len(chunk.delta_heading))
            print()
            
        print(len(self.labels))
        print(len(self.p_ecg))
        print(len(self.p_hr))
        print(len(self.p_acc_x))
        print(len(self.p_contact))
        print(len(self.acc_x))
        print()
        
    def isEqual(self, rec):
        return self.labels == rec.labels and self.p_ecg == rec.p_ecg and self.p_hr == rec.p_hr \
        and self.p_acc_x == rec.p_acc_x and self.p_acc_y == rec.p_acc_y and self.p_acc_z == rec.p_acc_z \
        and self.p_contact == rec.p_contact and self.acc_x == rec.acc_x and self.acc_y == rec.acc_y and \
        self.acc_z == rec.acc_z and self.gyr_x == rec.gyr_x and self.gyr_y == rec.gyr_y and \
        self.gyr_z == rec.gyr_z and self.gra_x == rec.gra_x and self.gra_y == rec.gra_y and \
        self.gra_z == rec.gra_z and self.mag_x == rec.mag_x and self.mag_y == rec.mag_y and \
        self.mag_z == rec.mag_z and self.att_pitch == rec.att_pitch and self.att_roll == rec.att_roll and \
        self.att_yaw == rec.att_yaw and self.delta_heading == rec.delta_heading

In [19]:
sizes = {labels: 1, p_ecg: 13, p_acc: 1000, cm: 1}

NameError: name 'labels' is not defined

In [20]:
recordings = []

In [21]:
for meta in metas:
    recordings.append(Recording(meta))

180
40181EF2-1A30-4C0B-81D2-9773E158F5C0
97D59978-23A4-4059-9A35-EF617DAAF9A5
FD63AF73-29F8-49CA-9AE5-A22228D945EB
B7F3E187-7A4D-46CD-A100-C8B459658B5A
0E4D56BB-C18F-47CA-8511-E913E0CB3ACB
7E2F6803-1A54-429D-B897-2F9E4EAEDEA9
D16D8CB6-0285-4423-A6CA-96F0CB9BC05B
DEDB48D7-4D00-40F4-9F0C-382D4BF2E908
B996F060-92AD-48E8-B7A7-7B8DF829C9DA
7B56F921-080F-40FA-87EF-D5C658FD7E25
8A7DD7B4-23C4-4B30-AC86-A17DC4E09413
F9C5C39C-0B2D-4F1F-BB0D-CFF1E8F20B97
B551DE55-976E-4742-82DD-DF80687FBF6A

50
292
5
540
5
48
48

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

550
6792
55
10540
55
548

180
AEA56528-8D55-49B1-B0DC-A5CCB861850E
453327E8-BE03-4E01-AC8F-BDFD51DF15E0
C5886B47-983D-402B-8471-43A9670C6C8A
529C8992-FAD5-4CFC-9798-ADA4F14B4579
E0D1F703-B95C-4902-95E7-78D5E3A7F99D
31D80692-B51F-4130-A3D8-158641AF0246
55A1426F-B

50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

50
650
5
1000
5
50
50

1900
24690
190
38000
190
1899

170
64A2056B-BC1C-4E6F-A641-43DF216EA153
20D6E181-44A2-4F0E-A4A9-1B41B4ED1BA3
93219AA7-A7AF-419C-B53E-FE47ED802A6C
68657810-639E-4CA2-AD8C-97CD6D8B8F33
A7D36BFD-43D9-4E10-8C12-FB747A69249B
2A6065A2-B1D7-491B-B6BF-FA49E446EAEA
3D65DDF0-6535-4F36-9388-C903CFB7B69B
B22516EB-DB39-49E4-B618-0F888D4AEE4F
3FDFE038-DF46-4EB9-8248-E9ECBAB48EA0
CBEF1BBD-4248-4F73-B4FE-CF558DC25EF1
230E7D8E-BB30-4731-89A2-C19D14D05FBA
C1D40E52-8FBB-444F-8492-8233D22E1699
7C81847A-3EBC-48AA-9914-ABE4D8D

In [22]:
for r in recordings:
    print(r.recording_duration)

57.00000000000054
74.00000000000007
92.99999999999899
51.000000000000455
160.99999999999514
40.0000000000003
138.9999999999964
44.000000000000355
40.0000000000003
104.99999999999831
189.9999999999935
135.99999999999656
183.99999999999383
115.99999999999768
124.99999999999717
152.9999999999956
163.99999999999497
135.99999999999656
165.99999999999486
153.99999999999554
194.9999999999932
143.9999999999961
116.99999999999763
134.99999999999662
104.99999999999831


# Pickle the data

In [23]:
import pickle

ddir = os.getcwd() + "/pickles/db14/"

for r in recordings:
    filehandler = open(ddir + r._id + ".p", "wb")
    pickle.dump(r, filehandler)
    
for r in recordings:
    reader = open(ddir + r._id + ".p", "rb")
    r2 = pickle.load(reader)
    assert(r2.isEqual(r))

In [25]:
filehandler = open(ddir + "users.p", "wb")
pickle.dump(users, filehandler)

In [11]:
for u in users.values():
    print(u.name)
    print(u.height)
    print(u.weight)

Matthäus Gemmingen
180
77
Robert van Biljon
195
94
Harry Wixley
170
72
