# `GlucoseMeter` Populator

## Part 1: Set-Up

In [None]:
# Imports
import uuid
from confluent_kafka.admin import AdminClient, NewTopic
from pyspark.sql.functions import col

# Get config
from config import user
from config import password

In [None]:
# Mount point through Oauth security.
storageAccount = "gen10datafund2205"
storageContainer = "group5container"
clientSecret = "-ZS8Q~NwOKfwEpVOg3Teb1pPtxDbz616XjlXLbuU"
clientid = "2ca50102-5717-4373-b796-39d06568588d"
mount_point = "/mnt/jacklynn/glucmeter" 

configs = {"fs.azure.account.auth.type": "OAuth",
       "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
       "fs.azure.account.oauth2.client.id": clientid,
       "fs.azure.account.oauth2.client.secret": clientSecret,
       "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/d46b54b2-a652-420b-aa5a-2ef7f8fc706e/oauth2/token",
       "fs.azure.createRemoteFileSystemDuringInitialization": "true"}

# Unmount if exists
try: 
    dbutils.fs.unmount(mount_point)
except:
    pass

# Mount to database
dbutils.fs.mount(
    source = "abfss://"+storageContainer+"@"+storageAccount+".dfs.core.windows.net/",
    mount_point = mount_point,
    extra_configs = configs)

In [None]:
%fs 
ls /mnt/jacklynn/glucmeter

path,name,size,modificationTime
dbfs:/mnt/jacklynn/glucmeter/CGM_Data.csv,CGM_Data.csv,35978185,1659468631000
dbfs:/mnt/jacklynn/glucmeter/Diabetes Prevalence in the US by State and Demographic.csv,Diabetes Prevalence in the US by State and Demographic.csv,180068,1659497875000
dbfs:/mnt/jacklynn/glucmeter/ExerciseData_2013_150min.csv,ExerciseData_2013_150min.csv,2107,1659554693000
dbfs:/mnt/jacklynn/glucmeter/Food Insecurity.csv,Food Insecurity.csv,6779,1659533925000
dbfs:/mnt/jacklynn/glucmeter/U.S. NHANES Survey Data.csv,U.S. NHANES Survey Data.csv,724469,1659554890000
dbfs:/mnt/jacklynn/glucmeter/chinese-diabetes-clean.csv,chinese-diabetes-clean.csv,33367142,1659541878000


## Part 2: Populate `GlucoseMeter` Table

In [None]:
# Get data
df = spark.read.options(
    inferSchema='True',
    delimiter=',',
    header='True'
    ).csv('/mnt/jacklynn/glucmeter/CGM_Data.csv')

display(df)

_c0,RecID,PtID,DeviceDtTm,Glucose
0,196879,220,2000-04-28T18:20:14.010+0000,129
1,196880,220,2000-04-28T18:25:14.010+0000,132
2,196881,220,2000-04-28T18:30:14.010+0000,126
3,196882,220,2000-04-28T18:35:14.010+0000,120
4,196883,220,2000-04-28T18:40:14.010+0000,118
5,196884,220,2000-04-28T18:45:14.010+0000,115
6,196885,220,2000-04-28T18:50:14.010+0000,107
7,196886,220,2000-04-28T18:55:14.010+0000,107
8,196887,220,2000-04-28T19:00:14.010+0000,106
9,196888,220,2000-04-28T19:05:14.010+0000,105


In [None]:
# Rename columns to match database
df = df.withColumnRenamed('RecID', 'recID')
df = df.withColumnRenamed('PtID', 'ptID')
df = df.withColumnRenamed('DeviceDtTm', 'time')
df = df.withColumnRenamed('Glucose', 'glucose_lvl')

# Drop '_c0' as it is unnecessary
df = df.drop(col('_c0'))

In [None]:
# Save to database
database = "group5database"
table = "dbo.GlucoseMeter"
server = "gen10-data-fundamentals-22-05-sql-server.database.windows.net"
port = "1433"
df.write.format('jdbc').option("url", f"jdbc:sqlserver://{server}:1433;databaseName={database};") \
            .mode("overwrite") \
            .option("dbtable", table) \
            .option("user", user) \
            .option("password", password) \
            .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
            .save()