# Project 3 Exploritory Graphs

## Load in Python Packages

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

## Load in Data

In [2]:
demographics = pd.read_csv('data/Demographics.csv')
demographics.head()

Unnamed: 0,ID,Gender,HbA1c
0,13,MALE,5.7
1,1,FEMALE,5.5
2,3,FEMALE,5.9
3,4,FEMALE,6.4
4,5,FEMALE,5.7


In [3]:
# Create dictonary of dexcom tables with each key being "id_{id_num}" and value being dexcom df
dexcoms = {}
for i in range(1, 17):
    if i == 3:
        continue
    id = str(i).zfill(3)
    dexcoms[f"id_{id}"] = (pd.read_csv(f'data/dexcom/Dexcom_{id}.csv')[12:]
                           .drop(columns='Index')
                           .reset_index(drop=True))
    dexcoms[f"id_{id}"]['Timestamp (YYYY-MM-DDThh:mm:ss)'] = pd.to_datetime(dexcoms[f"id_{id}"]['Timestamp (YYYY-MM-DDThh:mm:ss)'])
    dexcoms[f"id_{id}"] = dexcoms[f"id_{id}"].assign(date=dexcoms[f"id_{id}"]['Timestamp (YYYY-MM-DDThh:mm:ss)'].dt.date)

# View example df for id_001
dexcoms['id_001'].head()

Unnamed: 0,Timestamp (YYYY-MM-DDThh:mm:ss),Event Type,Event Subtype,Patient Info,Device Info,Source Device ID,Glucose Value (mg/dL),Insulin Value (u),Carb Value (grams),Duration (hh:mm:ss),Glucose Rate of Change (mg/dL/min),Transmitter Time (Long Integer),date
0,2020-02-13 17:23:32,EGV,,,,iPhone G6,61.0,,,,,11101.0,2020-02-13
1,2020-02-13 17:28:32,EGV,,,,iPhone G6,59.0,,,,,11401.0,2020-02-13
2,2020-02-13 17:33:32,EGV,,,,iPhone G6,58.0,,,,,11701.0,2020-02-13
3,2020-02-13 17:38:32,EGV,,,,iPhone G6,59.0,,,,,12001.0,2020-02-13
4,2020-02-13 17:43:31,EGV,,,,iPhone G6,63.0,,,,,12301.0,2020-02-13


In [4]:
# Create dictonary of food_log tables with each key being "id_{id_num}" and value being dexcom df
food_logs = {}
for i in range(1, 17):
    if i == 3:
        continue
    id = str(i).zfill(3)
    food_logs[f"id_{id}"] = (pd.read_csv(f'data/food_log/Food_Log_{id}.csv'))
    food_logs[f"id_{id}"].columns = ["date", "time_of_day", "time_begin", "time_end",
                                     "logged_food", "amount", "unit", "searched_food",
                                     "calorie", "total_carb", "dietary_fiber", "sugar",
                                     "protein", "total_fat"]
    # time_of_day is inconsistently formated, use time_begin when possible
    food_logs[f"id_{id}"]['time_of_day'] = pd.to_datetime(food_logs[f"id_{id}"]['time_of_day'], errors='coerce', format='%H:%M:%S').dt.time
    food_logs[f"id_{id}"]['date'] = pd.to_datetime(food_logs[f"id_{id}"]['date'])
    food_logs[f"id_{id}"]['time_begin'] = pd.to_datetime(food_logs[f"id_{id}"]['time_begin'])

# View example df for id_001
food_logs['id_001'].head()

Unnamed: 0,date,time_of_day,time_begin,time_end,logged_food,amount,unit,searched_food,calorie,total_carb,dietary_fiber,sugar,protein,total_fat
0,2020-02-13,18:00:00,2020-02-13 18:00:00,,Berry Smoothie,20.0,fluid ounce,Strawberry Smoothie,456.0,85.0,1.7,83.0,16.0,3.3
1,2020-02-13,20:30:00,2020-02-13 20:30:00,,Chicken Leg,1.0,,chicken leg,475.0,0.0,0.0,0.0,62.0,23.0
2,2020-02-13,20:30:00,2020-02-13 20:30:00,,Asparagus,4.0,,Asparagus,13.0,2.5,1.2,0.8,1.4,0.1
3,2020-02-14,07:10:00,2020-02-14 07:10:00,,Natrel Lactose Free 2 Percent,8.0,fluid ounce,(Natrel) Lactose Free 2% Partly Skimmed Milk,120.0,9.0,,8.0,12.0,
4,2020-02-14,07:10:00,2020-02-14 07:10:00,,Standard Breakfast,0.75,cup,"(Kellogg's) Frosted Flakes, Cereal",110.0,26.0,,10.0,1.0,
