In [43]:
from pathlib import Path
import json
import os
import pandas as pd

from tinydb import TinyDB

current_dir = Path(os.getcwd()).absolute()
results_dir = current_dir.joinpath('results')
kv_data_dir = results_dir.joinpath('kvdb')
kv_data_dir.mkdir(parents=True, exist_ok=True)


class DocumentDB(object):
    def __init__(self, db_path):
        ## You can use the code from the previous example if you would like
        people_json = kv_data_dir.joinpath('people.json')
        visited_json = kv_data_dir.joinpath('visited.json')
        sites_json = kv_data_dir.joinpath('sites.json')
        measurements_json = kv_data_dir.joinpath('measurements.json')

        self._db_path = Path(db_path)
        self._db = None

        # loading my data into pandas databases so I can work with them
        # I noticed in the last section that the json files need to be transposed.

        # people
        my_people = pd.read_json(people_json)
        my_people = my_people.T
        
        # sites
        my_sites = pd.read_json(sites_json)
        my_sites = my_sites.T
        
        # visited
        my_visited = pd.read_json(visited_json)
        my_visited = my_visited.T
        
        # measurements
        my_measurements = pd.read_json(measurements_json)

        # ensuring I am getting what I expect
        print(my_people.head())
        print('\n')
        print(my_sites.head())
        print('\n')
        print(my_visited.head())
        print('\n')
        print(my_measurements.head())

        # we need to bring these tables together
        # I opted to merge these
        # I feel like sql would help quite a bit here

        patient_information = my_people.merge(my_measurements, on='person_id', how='inner')\
            .merge(my_visited, on='visit_id', how='inner')\
            .merge(my_sites, on='site_id', how='inner')\
            .groupby(['person_id'])\
            # .apply(lambda x: x.to_json(orient='records', path_or_buf='/home/totennacht/Bellevue/dsc650/dsc650/assignments/assignment02/results/patient-info.json', indent=2))
        print('\npatient info\n')
        print(patient_information.head())
        patient_information = pd.DataFrame(patient_information)
        patient_information.to_json(orient='records', path_or_buf='/home/totennacht/Bellevue/dsc650/dsc650/assignments/assignment02/results/patient-info.json', indent=2)
        # patient_information.to_json('/home/totennacht/Bellevue/dsc650/dsc650/assignments/assignment02/results/patient-info.json')


        self._load_db()

    def _load_db(self):
        self._db = TinyDB(self._db_path)
        ## TODO: Implement code

In [44]:
db_path = results_dir.joinpath('patient-info.json')
if db_path.exists():
    os.remove(db_path)

db = DocumentDB(db_path)

         person_id personal_name family_name
danforth  danforth         Frank    Danforth
dyer          dyer       William        Dyer
lake          lake      Anderson        Lake
pb              pb         Frank     Pabodie
roe            roe     Valentina     Roerich


      site_id latitude longitude
DR-1     DR-1   -49.85   -128.57
DR-3     DR-3   -47.15   -126.72
MSK-4   MSK-4   -48.87    -123.4


    visit_id site_id  visit_date
619      619    DR-1  1927-02-08
622      622    DR-1  1927-02-10
734      734    DR-3  1930-01-07
735      735    DR-3  1930-01-12
751      751    DR-3  1930-02-26


   visit_id person_id quantity  reading
0       619      dyer      rad     9.82
1       619      dyer      sal     0.13
2       622      dyer      rad     7.80
3       622      dyer      sal     0.09
4       734        pb      rad     8.41

patient info

   person_id personal_name family_name visit_id quantity  reading site_id  \
0       dyer       William        Dyer      619      rad     9