# Request Caffe Spot Instance

We now have a good idea of how much it will cost to run a GPU based spot instance so we start one based on an AMI which includes a setup Cafe install.

If you prefer not to use the AMI, getting Cafe to run on EC2 can be a challenge and requires recompiling the kernel to support additonal GPU drivers.

## References

* [EC2 AMI with Caffe and Torch](http://blog.titocosta.com/post/110345699197/public-ec2-ami-with-torch-and-caffe-deep-learning)
* [Making Ubuntu EC2 Instance run Caffe (**see above**)](https://github.com/BVLC/caffe/wiki/Ubuntu-14.04-ec2-instance)
* [Use Ansible as a Library](http://oriolrius.cat/blog/2015/01/21/using-ansible-like-library-programming-in-python/)
* [Synchronize Ansible](http://docs.ansible.com/ansible/synchronize_module.html)

In [1]:
# A logger to keep from a bunch of print statements :/
import logging
logger = logging.getLogger("Request Caffe Spot Instance")
logger.setLevel(logging.DEBUG)

In [2]:
# Request a Spot Instance, this doesn't guarantee we will get one though.
import boto.ec2
import time

# NOTE the AMI used in "EC2 AMI with Caffe and Torch" is only available in the us-east region.
# I made copies of that AMI and placed the copies in each public region, this maps the regions
# to their AMIs.
REGION_AMIS = {
    "ap-northeast-1": "ami-b270eab2",
    "ap-southeast-1": "ami-e26372b0",
    "ap-southeast-2": "ami-5fc88065",
    "eu-central-1": "ami-b6666bab",
    "eu-west-1": "ami-f1526486",
    "sa-east-1": "ami-81f86e9c",
    "us-east-1": "ami-35622e50",
    "us-west-1": "ami-918b4ad5",
    "us-west-2": "ami-686a8e5b",
}


# TODO, make sure your Security Group allows SSH from your IP so you can connect! Also make sure this is available in
# all regions.
# TODO, check your IAM Security policy
# {
#     "Version": "2012-10-17",
#     "Statement": [
#         {
#           "Sid": "stmt999",
#           "Effect": "Allow",
#           "Action": [
#               "ec2:DescribeInstances",
#               "ec2:DescribeSpotInstanceRequests",
#               "ec2:RequestSpotInstances",
#               "ec2:DescribeSpotPriceHistory"
#            ],
#            "Resource": [
#                "*"
#            ]
#        }
#    ]
# }
def request_spot_instance(aws_region, price):
    """
    Connect to EC2 and request a Spot Instance, this will return instantly but doesn't mean a system
    is available.
    
    Paramaters
    ----------
    aws_region : str
        An AWS region where your caffe instance will run.
    price : float
        The maximum price in USD which you will pay for this instance.
        
    Note
    ----
    THIS WILL COST YOU MONEY! BE CAREFUL.
    """
    if not aws_region in REGION_AMIS.keys():
        raise Exception("No AMI copied to that region: %s" % (aws_region,))
        
    ec2_conn = boto.ec2.connect_to_region(
        region_name=aws_region,
        profile_name="cedarstreet")
    
    ami_id = REGION_AMIS[aws_region]
    
    return ec2_conn.request_spot_instances(
        price=price,  # In USD (I think)
        image_id=ami_id,  # See "EC2 AMI with Caffe and Torch"
        count=1,
        type='one-time',  # This can also be persistent which will resubmit your price if you're outbid
        instance_type="g2.2xlarge",
        key_name="erik",  # A private key which Ansible can access as well 
        dry_run=False)  # Try with a DryRun first


def watch_spot_instance_states(aws_region, spot_request_ids):
    """
    Connect to EC2 and watch for the status updates about a given spot request. Keeps trying until
    a state of cancelled or open is reached.
    
    Parameters
    ----------
    aws_region : str
        AWS region where your Spot Request was created.
    spot_request_ids : list(str)
        List of SpotRequestIds, these are required.
        
    Returns
    -------
    spot_request : SpotRequest
        The fulfilled SpotRequest which has an instance_id.
        
    Notes
    -----
    This will sleep for 60 seconds before testing again if an instance is ready.
    """
    ec2_conn = boto.ec2.connect_to_region(
        region_name=aws_region,
        profile_name="cedarstreet")
    
    while True:
        spot_requests = ec2_conn.get_all_spot_instance_requests(request_ids=spot_request_ids)
        try_again = False
        
        for spot_request in spot_requests:
            if spot_request.state == "cancelled":
                logger.error("Spot Request was cancelled: %s", spot_request.id)
                break

            if spot_request.instance_id == None:
                logger.debug(
                    "Instance is not ready for spot request %s with state of %s.",
                    spot_request.id,
                    spot_request.state)
                try_again = True
            else:
                yield spot_request
        
        if try_again:
            # Wait a minute before checking again.
            logger.debug("Waiting to try again.")
            time.sleep(60.0)
        else:
            break

In [4]:
def get_instance_public_ip(aws_region, instance_id):
    """
    Get instance details, assuming the instance is already live in this region.
    
    Parameters
    ----------
    aws_region : str
        AWS region where this instance is located.
    instance_id : str
        Instance ID which is being requested.
        
    Returns
    -------
    EC2Instance
        An EC2 Instance with details about its current state.
    """
    ec2_conn = boto.ec2.connect_to_region(
        region_name=aws_region,
        profile_name="cedarstreet")
    reservations = ec2_conn.get_all_reservations(instance_ids=[instance_id])
    
    instance = reservations[0].instances[0]
    return instance

In [3]:
# Watch for an instance to start in the us-east region with a max price of $0.15 USD.
# Once an instance is available, play a sound to let me know if I'm in another tab.
from IPython.display import Audio

aws_region = "us-east-1"
    
spot_instances = request_spot_instance(aws_region, 0.15)
if len(spot_instances) > 1:
    raise Exception("Too many spot instances started!")

logger.debug("Started %s Spot Instances", spot_instances)

instance_id = None
for spot_request in watch_spot_instance_states(aws_region, map(lambda s: s.id, spot_instances)):
    logger.info("Spot instance is ready: %s", spot_request.instance_id)
    instance_id = spot_request.instance_id


Audio(url="./docs/sounds/success.wav", autoplay=True)

DEBUG:Request Caffe Spot Instance:Started [SpotInstanceRequest:sir-021rfclm] Spot Instances
DEBUG:Request Caffe Spot Instance:Instance is not ready for spot request sir-021rfclm with state of open.
DEBUG:Request Caffe Spot Instance:Waiting to try again.
DEBUG:Request Caffe Spot Instance:Instance is not ready for spot request sir-021rfclm with state of open.
DEBUG:Request Caffe Spot Instance:Waiting to try again.
DEBUG:Request Caffe Spot Instance:Instance is not ready for spot request sir-021rfclm with state of open.
DEBUG:Request Caffe Spot Instance:Waiting to try again.


i-0c4296ae


In [15]:
instance = get_instance_public_ip(aws_region, instance_id)
instance.ip_address

u'174.129.71.20'

In [17]:
# Now that the instance is running, let's sync some scripts to it which aren't included in the AMI.
import ansible.runner
import ansible.inventory

hosts = [instance.ip_address]

inventory = ansible.inventory.Inventory(hosts)
ansible_runner = ansible.runner.Runner(
    module_name='synchronize',
    module_args='src=scripts dest=./',
    timeout=5,
    inventory=inventory,
    remote_user="ubuntu"
)

out = ansible_runner.run()
if not out["contacted"].get(instance.ip_address, None):
    raise Exception("No response information from instance :/")
else:
    logger.debug("Output from command: %s", "\n".join(out["contacted"][instance.ip_address]["stdout_lines"]))

DEBUG:Request Caffe Spot Instance:Output from command: .d..t.... scripts/
<f.st.... scripts/caffe_python.sh
<f.st.... scripts/convert_protomean.py
<f.st.... scripts/create_lmbd.sh
<f.st.... scripts/install_packages.sh
<f.st.... scripts/make_mean.sh
<f.st.... scripts/resume_training.sh
<f.st.... scripts/run_all.sh
<f.st.... scripts/train.sh
