# AWS SDK for Python (Boto3)

1. S3 basic operations
1. EMR Steps control

## S3 basic operations

Install boto3 by pip

In [None]:
%%sh
/emr/notebook-env/bin/pip install --user boto3

Show boto3 version

In [None]:
import boto3
import json

boto3.__version__

Initial S3 client

In [None]:
s3 = boto3.client('s3')

List buckets

In [None]:
resp = s3.list_buckets()

print(json.dumps(resp, indent=2))

In [None]:
buckets = s3.list_buckets()['Buckets']

for bucket in buckets:
    print(bucket['Name'])

Prepare file for uploading

In [None]:
local_file_path = '/home/emr-notebook/hello.txt'

with open(local_file_path, 'w+') as f:
    f.write('hello world')

upload_file and upload_fileobj

In [None]:
bucket = ''

In [None]:
key_1 = 'hello.txt'
key_2 = 'key_2/hello.txt'

with open(local_file_path, 'rb') as f:
    resp = s3.upload_file(local_file_path, bucket, key_1)
    print(json.dumps(resp, indent=2))

    resp = s3.upload_fileobj(f, bucket, key_2)
    print(json.dumps(resp, indent=2))

put_object

In [None]:
key_3 = 'key_3/world.txt'

resp = s3.put_object(
    Bucket=bucket,
    Key=key_3,
    Body='hello world 123'.encode(),
)
print(json.dumps(resp, indent=2))

List objects

In [None]:
resp = s3.list_objects_v2(
    Bucket=bucket,
    MaxKeys=5,
)
print(json.dumps(resp, indent=2))

In [None]:
resp = s3.list_objects_v2(
    Bucket=bucket,
    Prefix='key_2',
)
print(json.dumps(resp, indent=2))

Download objects

In [None]:
with open('/home/emr-notebook/hello_1.txt', 'wb') as f:
    s3.download_fileobj(bucket, key_1, f)
    
resp = s3.download_file(bucket, key_2, '/home/emr-notebook/hello_2.txt')
print(json.dumps(resp, indent=2))

In [None]:
resp = s3.get_object(
    Bucket=bucket,
    Key=key_3,
)

print(json.dumps(resp, indent=2))

body = resp['Body']

bytes_ = body.read()

print(bytes_.decode('utf-8'))

Delete objects

In [None]:
resp = s3.delete_objects(
    Bucket=bucket,
    Delete={
        'Objects': [
            {
                'Key': key_1,
            },
            {
                'Key': key_2,
            },
            {
                'Key': key_3,
            },
        ]
    }
)
print(json.dumps(resp, indent=2))

## EMR Steps control

In [None]:
emr = boto3.client('emr', region_name='us-east-2')

In [None]:
cluster_id = ''
bucket = ''

In [None]:
resp = emr.add_job_flow_steps(
    JobFlowId=cluster_id,
    Steps=[
        {
            'Name': 'sample-spark',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    'spark-submit',
                    '--name',
                    'sample-spark',
                    f's3://{bucket}/sample.py',
                    'default',
                    'spark_on_emr_workshop',
                    'curated',
                    'tripdata',
                    f's3://{bucket}'
                ],
            }
        },
    ]
)
print(json.dumps(resp, indent=2))

In [None]:
resp = emr.list_steps(
    ClusterId=cluster_id,
#     StepStates=[
#         'PENDING'|'CANCEL_PENDING'|'RUNNING'|'COMPLETED'|'CANCELLED'|'FAILED'|'INTERRUPTED',
#     ],
#     StepIds=[
#         'string',
#     ],
#     Marker='string'
)
print(json.dumps(resp, indent=2))

In [None]:
resp = emr.describe_step(
    ClusterId=cluster_id,
    StepId='string'
)
print(json.dumps(resp, indent=2))

In [None]:
resp = emr.cancel_steps(
    ClusterId=cluster_id,
    StepIds=[
        'string',
    ],
    StepCancellationOption='SEND_INTERRUPT'|'TERMINATE_PROCESS'
)
print(json.dumps(resp, indent=2))