# Guess batching and SQS kickoff

This notebook kicks off the scrape by creating all the SQS messages containing the guess ranges, which then trigger the Lambda function calls.

In [2]:
import requests
import time

# to test the lambda function via an http trigger
endpoint = "https://cwuo75bqj2.execute-api.us-east-2.amazonaws.com/default/guessInmate"

params = {
        'inmateMin': 100201,
        'inmateMax': 100300,
        'regionNum': 1000
    }

response = requests.get(endpoint, params)


In [3]:
import random
import string
import json

def get_random_alphanumeric_string(stringLength=8):
    lettersAndDigits = string.ascii_letters + string.digits
    return ''.join((random.choice(lettersAndDigits) for i in range(stringLength)))

Now, we'll generate SQS messages. Each SQS message contains an ID as required by `boto3`, and a MessageBody which tells the Lambda function which range of inmate registry numbers to validate.

In [4]:
batchSize = 100

batches = [
    {
        "Id": get_random_alphanumeric_string(12),
        "MessageBody" : json.dumps({
            "inmateMin": inmateNum,
            "inmateMax": inmateNum + batchSize,
            "regionNum": regionNum
        })
    }
    for regionNum in range(1000, 2000)
    for inmateNum in range(100000, 200000, batchSize)
]

In [6]:
test_batch = batches[0]

In [7]:
test_batch

{'Id': 'M197W7sYaKo3',
 'MessageBody': '{"inmateMin": 100000, "inmateMax": 100100, "regionNum": 1000}'}

In [9]:
import boto3
sqs = boto3.client('sqs')
dir(sqs)

queueUrl = "https://us-east-2.queue.amazonaws.com/712505641269/inmateBatching"

This next block actually sends off all the batches to SQS.

In [10]:
sqs.send_message_batch(QueueUrl=queueUrl, Entries=[test_batch])

{'Successful': [{'Id': 'QfJjBjODOuZS',
   'MessageId': '9c9197aa-9e4a-45c6-b582-1276d0f7f78d',
   'MD5OfMessageBody': 'd2079e25138cd69a2cf8390ddaf05e53'}],
 'ResponseMetadata': {'RequestId': '78c02750-327d-5c5f-9c84-7020e0ba9ec0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '78c02750-327d-5c5f-9c84-7020e0ba9ec0',
   'date': 'Sat, 27 Jun 2020 04:52:23 GMT',
   'content-type': 'text/xml',
   'content-length': '478'},
  'RetryAttempts': 0}}

In [11]:
for i in range(1, len(batches), 10):
    sqs.send_message_batch(QueueUrl=queueUrl, Entries=batches[i:i+10])