## Part 0 - Setup Environment and Common Functions


University of San Diego - MS Applied AI

AAI-540 Team 5

December 04, 2024

In [None]:
# Environment Setup Notebook for Type I Diabetes Management Project
# This notebook sets up the environment for running the project in local VS Code, Google Colab, or AWS SageMaker.
# For each section, use the appropriate code based on where you are executing the notebook.

# ---------------------------------------------
# Section 1: Importing Libraries
# ---------------------------------------------

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import os
import random
import json
import datetime
import joblib
from time import gmtime, strftime


In [None]:
# The following imports will not work on Google Colab; comment out if using Colab
import boto3
import sagemaker
import awswrangler as wr
from pyathena import connect
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.session import Session


In [None]:
# ---------------------------------------------
# Section 2: Installing Necessary Libraries
# ---------------------------------------------

# Uncomment for Local Environment
# !pip install numpy pandas tensorflow matplotlib seaborn boto3 sagemaker awswrangler pyathena

# Uncomment for Google Colab
# !pip install numpy pandas tensorflow matplotlib seaborn boto3 sagemaker awswrangler pyathena
# from google.colab import drive
# drive.mount('/content/drive')

# Uncomment for AWS SageMaker
# !conda install numpy pandas matplotlib seaborn -y
# Note: TensorFlow is pre-installed in the SageMaker environment

In [None]:
# ---------------------------------------------
# Section 3: Setting Up AWS Credentials
# ---------------------------------------------

# Uncomment for Local Environment
# import boto3
# import os
# os.environ['AWS_ACCESS_KEY_ID'] = "<Your_AWS_Access_Key_ID>"
# os.environ['AWS_SECRET_ACCESS_KEY'] = "<Your_AWS_Secret_Access_Key>"

# Uncomment for Google Colab
# import boto3
# from google.colab import auth
# auth.authenticate_user()
# !aws configure

# Uncomment for AWS SageMaker
# SageMaker has built-in IAM roles; no manual credential setup is needed

In [None]:
# ---------------------------------------------
# Section 4: Defining Global Variables
# ---------------------------------------------

# Data Paths
# Uncomment the appropriate path based on your environment

# Local Environment
# DATA_PATH = "./data/"

# Google Colab
# DATA_PATH = "/content/drive/MyDrive/diabetes_project/data/"

# AWS SageMaker
# DATA_PATH = "s3://your-bucket-name/diabetes_project/data/"


In [None]:
# ---------------------------------------------
# Section 5: Verifying TensorFlow Installation
# ---------------------------------------------
print("TensorFlow version:", tf.__version__)


In [None]:
# ---------------------------------------------
# Section 6: Setting Up AWS SageMaker, Feature Store, S3 Buckets, and Athena
# ---------------------------------------------

# Uncomment for AWS SageMaker
# Initialize SageMaker session
# sess = sagemaker.Session()
# bucket = sess.default_bucket()
# role = sagemaker.get_execution_role()
# region = boto3.Session().region_name
# account_id = boto3.client("sts").get_caller_identity().get("Account")
#
# sm = boto3.Session().client(service_name="sagemaker", region_name=region)
# s3_client = boto3.client("s3", region_name=region)
#
# boto_session = boto3.Session(region_name=region)
# featurestore_runtime = boto_session.client(
#     service_name="sagemaker-featurestore-runtime", region_name=region
# )
#
# # Initialize SageMaker feature store session
# feature_store_session = Session(
#     boto_session=boto_session,
#     sagemaker_client=sm,
#     sagemaker_featurestore_runtime_client=featurestore_runtime,
# )
#
# # Default S3 bucket for offline feature store
# default_s3_bucket_name = feature_store_session.default_bucket()
# prefix = "sagemaker-featurestore-diabetes-management"
#
# # Default feature group name
# feature_group_name = "diabetes-feature-group-offline"
#
# # Define feature group
# feature_group = FeatureGroup(
#     name=feature_group_name, sagemaker_session=feature_store_session
# )
#
# # Set S3 paths for data lake
# s3_datalake_path_csv = f"s3://{bucket}/diabetes-management/csv"
# s3_datalake_path_parquet = f"s3://{bucket}/diabetes-management/parquet"
# s3_staging_dir = f"s3://{bucket}/athena/staging"
#
# # Set Athena parameters
# database_name = "diabetes_management_db"
#
# # Connect to the Athena staging directory
# conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)


In [None]:
# ---------------------------------------------
# Section 7: Data Access Verification
# ---------------------------------------------

# Verify access to the data path
try:
    # Uncomment the appropriate command based on your environment

    # Local Environment
    # files = os.listdir(DATA_PATH)

    # Google Colab
    # files = os.listdir(DATA_PATH)

    # AWS SageMaker
    # s3 = boto3.resource('s3')
    # bucket = s3.Bucket('your-bucket-name')
    # files = [obj.key for obj in bucket.objects.filter(Prefix='diabetes_project/data/')]

    print("Data files:", files)
except Exception as e:
    print("Error accessing data path:", e)


In [None]:
# ---------------------------------------------
# Section 8: Setting Up GPU/TPU for Training (Colab Only)
# ---------------------------------------------

# Uncomment for Google Colab
# import tensorflow as tf
# if tf.test.gpu_device_name():
#     print('Default GPU Device:', tf.test.gpu_device_name())
# else:
#     print("Please install GPU version of TensorFlow")

# ---------------------------------------------
# Section 9: Summary and Next Steps
# ---------------------------------------------

print("Environment setup complete. Proceed to the next notebook for data loading and exploration.")
