# Wildfire Risk - Ingest - S3 Setup
__Team 3 - Dave Friesen, John Chen, and Kyle Dalope__<br>
__ADS-508-02-SP23__<br><br>
__GitHub link: https://github.com/davefriesen/wildfire-risk__

In [20]:
__authors__ = ['Dave Friesen', 'John Chen', 'Kyle Dalope']
__contact__ = ['dfriesen@sandiego.edu', 'johnchen@sandiego.edu', 'kdalope@sandiego.edu']
__date__ = '2023-03-20'
__license__ = 'MIT'
__version__ = '1.0.1'

# Setup Basics

In [21]:
# Import basic libraries
import boto3
import sagemaker

# Import data access libraries
import pandas as pd
from profiler import profile, profile_cat

In [22]:
# Establish session fundamentals
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
account_id = boto3.Session().client(service_name='sagemaker', region_name=region)

# Setup S3

In [23]:
# Set public path and store as SageMaker variable
s3_public_path = 's3://wildfire-risk/'
%store s3_public_path

# Set private path ("my bucket")
s3_private_path = 's3://{}/wildfire-risk/csv/'.format(bucket)
%store s3_private_path
print(s3_private_path)

Stored 's3_public_path' (str)
Stored 's3_private_path' (str)
s3://sagemaker-us-east-1-857283526476/wildfire-risk/csv/


In [24]:
# Get contents of Public S3 bucket (file source)
!aws s3 ls {s3_public_path}

                           PRE athena/
2023-03-18 23:31:25   10821874 fires.csv
2023-03-18 23:31:21   38317925 weather.csv


In [25]:
# Now copy public bucket-based data to private (local) bucket
!aws s3 cp --recursive $s3_public_path/ $s3_private_path/ --exclude '*' --include 'fires.csv'
!aws s3 cp --recursive $s3_public_path/ $s3_private_path/ --exclude '*' --include 'weather.csv'

In [26]:
# List contents of private S3 bucket to confirm copy
!aws s3 ls {s3_private_path}

# Load and Validate Data (*traditional Pandas - as check*)

In [27]:
# Load and check base data files from public S3 bucket
fires_df = pd.read_csv(s3_private_path+'fires.csv')
weather_df = pd.read_csv(s3_private_path+'weather.csv')
profile(fires_df)
profile(weather_df)

FileNotFoundError: sagemaker-us-east-1-857283526476/wildfire-risk/csv/fires.csv

# Store Variables and Close Session

In [28]:
# Store variables for subsequent notebooks
%store

Stored variables and their in-db values:
s3_private_path             -> 's3://sagemaker-us-east-1-857283526476/wildfire-ri
s3_public_path              -> 's3://wildfire-risk/'


In [29]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [None]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}