<a href="https://colab.research.google.com/github/cbrink585/dsc650/blob/master/kvdbcindybrinkmeyer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
import json
from pathlib import Path
import os

import pandas as pd
import s3fs


def read_cluster_csv(file_path, endpoint_url='https://storage.budsc.midwest-datascience.com'):
    s3 = s3fs.S3FileSystem(
        anon=True,
        client_kwargs={
            'endpoint_url': endpoint_url
        }
    )
    return pd.read_csv(s3.open(file_path, mode='rb'))

# New Section

In [None]:
#https://neptune.ai/blog/google-colab-dealing-with-files

In [2]:
! pip install s3fs

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting s3fs
  Downloading s3fs-2023.4.0-py3-none-any.whl (28 kB)
Collecting aiobotocore~=2.5.0 (from s3fs)
  Downloading aiobotocore-2.5.0-py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.7/72.7 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from s3fs)
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.29.77,>=1.29.76 (from aiobotocore~=2.5.0->s3fs)
  Downloading botocore-1.29.76-py3-none-any.whl (10.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m75.6 MB/s[0m eta [36m0:00:00[0m
Collecting aioitertools>=0.5.1 (from aiobotocore~=2.5.0->s3fs)
  Downloading aioitertools-0.1

In [29]:
current_dir = Path(os.getcwd()).absolute()
results_dir = current_dir.joinpath('results')
kv_data_dir = results_dir.joinpath('kvdb')
kv_data_dir.mkdir(parents=True, exist_ok=True)

In [30]:

people_json = kv_data_dir.joinpath('people.json')
visited_json = kv_data_dir.joinpath('visited.json')
sites_json = kv_data_dir.joinpath('sites.json')
measurements_json = kv_data_dir.joinpath('measurements.json')

In [31]:
class KVDB(object):
    def __init__(self, db_path):
        self._db_path = Path(db_path)
        self._db = {}
        self._load_db()

    def _load_db(self):
        if self._db_path.exists():
            with open(self._db_path) as f:
                self._db = json.load(f)

    def get_value(self, key):
        return self._db.get(key)

    def set_value(self, key, value):
        self._db[key] = value

    def save(self):
        with open(self._db_path, 'w') as f:
            json.dump(self._db, f, indent=2)

In [23]:
ls


[0m[01;34mresults[0m/  [01;34msample_data[0m/


In [27]:
pwd

'/content'

In [33]:
def create_sites_kvdb():
    db = KVDB(sites_json)
    df = read_cluster_csv('site.csv')
    for site_id, group_df in df.groupby('site_id'):
        db.set_value(site_id, group_df.to_dict(orient='records')[0])
    db.save()


def create_people_kvdb():
    db = KVDB(people_json)
    df_ppl = read_cluster_csv('person.csv')
    for person_id, group_df in df_ppl.groupby('person_id'):
        db.set_value(person_id, group_df.to_dict(orient='records')[0])
    db.save()

def create_visits_kvdb():
    db = KVDB(visited_json)
    df_vis = read_cluster_csv('visited.csv')
    for visit_id, group_df in df_vis.groupby('visit_id'):
        db.set_value(str(visit_id), group_df.to_dict(orient='records')[0])
    db.save()

def create_measurements_kvdb():
    db = KVDB(measurements_json)
    df_measr = read_cluster_csv('measurements.csv')
    for visit_id, group_df in df_measr.groupby('visit_id'):
        db.set_value(visit_id, group_df.to_dict(orient='records')[0])
    db.save()

In [34]:
create_sites_kvdb()
create_people_kvdb()
create_visits_kvdb()
create_measurements_kvdb()

ValueError: ignored

In [9]:
kvdb_path = 'visits.json'
kvdb = KVDB(kvdb_path)
key = (619, 'DR-1')
value = dict(visit_id=619,
   site_id='DR-1',
   visit_date='1927-02-08'
)
kvdb.set_value(key, value)
retrieved_value = kvdb.get_value(key)