In [None]:
import logging
import urllib.request
import re
import os
import boto3
import json
import pprint
from pathlib import Path
from datetime import datetime

import pandas as pd
import numpy as np
import s3fs

import data_utils
import aws_utils

# You may want to do it this way if you are debuging / editing the support files 
# modules imported via aimport will before you execute any cell
# %load_ext autoreload
# %autoreload 1
# %aimport data_utils
# %aimport aws_utils

## First we'll establish some basic logging

In [None]:
logger = logging.getLogger('main')
logger.setLevel(logging.DEBUG)

s_tdy = datetime.today().strftime('%m-%d-%Y')
main_log = Path(f'logs/chi-town-lambda_{s_tdy}.log')
if not main_log.parent.exists(): main_log.parent.mkdir(parents=True, exist_ok=True)
if main_log.exists(): os.remove(main_log)

# create console handler and set level to debug
ch = logging.StreamHandler()
ch_main_log = logging.FileHandler(main_log)
ch.setLevel(logging.DEBUG)
ch_main_log.setLevel(logging.DEBUG)

# create formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s')

# add formatter to ch
ch.setFormatter(formatter)
ch_main_log.setFormatter(formatter)

logger.addHandler(ch)
logger.addHandler(ch_main_log)

## Let's make sure our Lambda function exists

<font color=red>If you named your function something other than scrubChiTownFile, update the cell below</font>

In [None]:
LAMBDA_FUNCTION = 'scrubChiTownFile'
lambda_client = boto3.client('lambda')

In [None]:
# You should see the details of your function if all is well
my_lambda_fn = lambda_client.get_function(FunctionName=LAMBDA_FUNCTION)
if 'Configuration' in my_lambda_fn:
    pprint.pprint(my_lambda_fn['Configuration'])

## Let's fire off our data to our lambda function

<font color=red>Make sure to change the name of the S3_BUCKET to your bucket</font>

In [None]:
s3 = s3fs.S3FileSystem()
FILE_PATTERN = re.compile('.*?(\d+)\.csv')
S3_BUCKET = 'chi-town-scrub-data'

s3_files = aws_utils.get_s3_files_to_process(s3, FILE_PATTERN, S3_BUCKET, '')
s3_files

In [None]:
for _, s3_file in s3_files:

    s3_bucket = s3_file.split('/')[0]
    s3_key = '/'.join(s3_file.split('/')[1:])

    payload = {'s3_bucket': s3_bucket, 's3_key': s3_key}
    logger.info(f'Sending payload {payload}')

    respose = lambda_client.invoke(FunctionName=LAMBDA_FUNCTION,
                         InvocationType='Event',
                         Payload=json.dumps(payload))

    if response['StatusCode'] != 202:
        logger.error('Something"s gone horribly wrong')
        break

## Head over to CloudWatch from the AWS Console to see our Lambdas in action

On the left of the CloudWatch dashboard, click Logs, then find the log group for our lambda function

## After that, look in your S3 bucket to confirm our files have been processed

<font size=24 color=blue>Et Voila! Now we're supercomputing!</font>