# Using AWS SDK for python the boto libraty

Boto is a python library that enables usage of AWS serivces programatically. 
It is one of many SDK that AWS offers - [AWS_SDK-s](https://aws.amazon.com/developer/tools/?nc1=f_dr)

In [4]:
import boto3
import pandas as pa

Keep your keys in a safe place. Load them into a variable. Never expose the keys in code. Regenerate your keys regularely and delete all unneeded ones. Here I am loading the keys from the csv file, which I downloaded on creation of a user in IAM.

In [271]:
# load keys from vault
AWS_KEY_ID = pa.read_csv('../../AWS_vault/credentials_boto_test_2.csv')['Access key ID'][0]
AWS_SECRET = pa.read_csv('../../AWS_vault/credentials_boto_test_2.csv')['Secret access key'][0]

# define your preferred region
my_region = 'eu-central-1'

In [3]:
# Generate the boto3 client for interacting with S3 and SNS
s3 = boto3.client('s3',  region_name= my_region,
                         aws_access_key_id=AWS_KEY_ID, 
                         aws_secret_access_key=AWS_SECRET)


## 1. Creating an S3 bucket, uploading objects, listing objects, deleting objects, deleting bucket

In [5]:
# List S3 buckets 
s3.list_buckets()

{'Buckets': [{'CreationDate': datetime.datetime(2020, 1, 29, 12, 49, 47, tzinfo=tzutc()),
   'Name': 'test-bokeh'}],
 'Owner': {'ID': 'd14ca27c07aac3d80654a5143be2aaa7c7e40b70f5c2c5564807e54b96cbe49d'},
 'ResponseMetadata': {'HTTPHeaders': {'content-type': 'application/xml',
   'date': 'Mon, 03 Feb 2020 09:15:29 GMT',
   'server': 'AmazonS3',
   'transfer-encoding': 'chunked',
   'x-amz-id-2': 'Xu0sOSiLfjttQirepMMNLyjP4ouJuCtyuMn9rGbdu1CPhDMBSXSDQGfHE+6o5jqjNOupdJV3CUI=',
   'x-amz-request-id': '61361378548CF413'},
  'HTTPStatusCode': 200,
  'HostId': 'Xu0sOSiLfjttQirepMMNLyjP4ouJuCtyuMn9rGbdu1CPhDMBSXSDQGfHE+6o5jqjNOupdJV3CUI=',
  'RequestId': '61361378548CF413',
  'RetryAttempts': 0}}

In [48]:
# Create the buckets
response_staging = s3.create_bucket(Bucket='test-bokeh',
                                    CreateBucketConfiguration={'LocationConstraint': my_region})

# Print out the response
print(response_staging)

{'Location': 'http://test-bokeh.s3.amazonaws.com/', 'ResponseMetadata': {'HTTPHeaders': {'x-amz-id-2': 'QcpMcRJprshLUHc7d7leaJO/gZdlDvIlONNaklvkC30Xxy9DAtYi9jsszqr+UpF21Vg1YBwrHp8=', 'content-length': '0', 'x-amz-request-id': '851F18682F2A3C1D', 'server': 'AmazonS3', 'date': 'Wed, 29 Jan 2020 12:49:46 GMT', 'location': 'http://test-bokeh.s3.amazonaws.com/'}, 'RequestId': '851F18682F2A3C1D', 'HostId': 'QcpMcRJprshLUHc7d7leaJO/gZdlDvIlONNaklvkC30Xxy9DAtYi9jsszqr+UpF21Vg1YBwrHp8=', 'RetryAttempts': 0, 'HTTPStatusCode': 200}}


In [51]:
# Upload a file
s3.upload_file(Bucket='test-bokeh',
              # Set filename and key
               Filename='final_report.csv', 
               Key='final_report.csv')

In [29]:
# Get the list_buckets response
response = s3.list_buckets()

# Iterate over Buckets from .list_buckets() response
for bucket in response['Buckets']:
    print(bucket['Name'])


# Get object metadata
response = s3.list_objects(Bucket='test-bokeh', 
                           Prefix='final')
response

test-bokeh


{'Contents': [{'ETag': '"2143b2e98c7759312732afd7dadb8fda"',
   'Key': 'final_report.csv',
   'LastModified': datetime.datetime(2020, 1, 29, 12, 50, 42, tzinfo=tzutc()),
   'Owner': {'ID': 'd14ca27c07aac3d80654a5143be2aaa7c7e40b70f5c2c5564807e54b96cbe49d'},
   'Size': 199,
   'StorageClass': 'STANDARD'},
  {'ETag': '"e8a346159ac66da8f9c25217543225f6"',
   'Key': 'final_report.html',
   'LastModified': datetime.datetime(2020, 1, 29, 13, 58, tzinfo=tzutc()),
   'Owner': {'ID': 'd14ca27c07aac3d80654a5143be2aaa7c7e40b70f5c2c5564807e54b96cbe49d'},
   'Size': 1606,
   'StorageClass': 'STANDARD'},
  {'ETag': '"2143b2e98c7759312732afd7dadb8fda"',
   'Key': 'final_report_.csv',
   'LastModified': datetime.datetime(2020, 2, 3, 9, 36, 23, tzinfo=tzutc()),
   'Owner': {'ID': 'd14ca27c07aac3d80654a5143be2aaa7c7e40b70f5c2c5564807e54b96cbe49d'},
   'Size': 199,
   'StorageClass': 'STANDARD'}],
 'EncodingType': 'url',
 'IsTruncated': False,
 'Marker': '',
 'MaxKeys': 1000,
 'Name': 'test-bokeh',
 'Pre

In [8]:
# Get files in the bucket using keys
response.get('Contents')[0].get('Key')

'final_report.csv'

In [10]:
# List all objects in the bucket'               
[obj['Key'] for obj in s3.list_objects(Bucket='test-bokeh')['Contents']]

['final_report.csv',
 'final_report.html',
 'final_report_.csv',
 'indes.html',
 'index.html',
 'slider-test.html',
 'st',
 'st-boxes.png',
 'statistics_vs_machine_learning.png',
 'test-range-tool.html']

In [11]:
# list out specified files 
[obj['Key'] for obj in s3.list_objects(Bucket='test-bokeh')['Contents'] if obj['Key'].endswith('csv')]    

['final_report.csv', 'final_report_.csv']

## 2. Security and permissions


In [30]:
# Upload a file specifying its ACL rule 

s3.upload_file(
    Bucket='test-bokeh',
    Filename='final_report.csv', 
    Key='final_report_.csv',
    ExtraArgs = {'ACL': 'public-read'})

# Iterate over the objects
for obj in response['Contents']:
    s3.put_object_acl(Bucket='test-bokeh', 
                      Key=obj['Key'], 
                      ACL='public-read')
    
    #print out clickable link to the files 
    print("https://{}.s3.amazonaws.com/{}".format( 'test-bokeh', obj['Key']))

https://test-bokeh.s3.amazonaws.com/final_report.csv
https://test-bokeh.s3.amazonaws.com/final_report.html
https://test-bokeh.s3.amazonaws.com/final_report_.csv


In [137]:
# Generate presigned_url for the uploaded object
share_url = s3.generate_presigned_url(
    # Specify allowable operations
    ClientMethod='get_object',
    # Set the expiration time
    ExpiresIn=3600,
    # Set bucket and shareable object's name
    Params={'Bucket': 'test-bokeh',
            'Key': 'final_report.csv'})

# Print out the presigned URL
print(share_url)

https://test-bokeh.s3.amazonaws.com/final_report.csv?X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20200130T170329Z&X-Amz-Credential=AKIAZLPDXHVMJ4IZU7GO%2F20200130%2Feu-central-1%2Fs3%2Faws4_request&X-Amz-Signature=51c49344afab9245a1f9236ce17a83290b9f952d9cfc1fe787753d5a629f72ab


## 3. Retrieveing files from S3 with get_object - Use cloud, but work locally 
In this case the csv files will be downloaded and stored in a dataframe

In [14]:
request_files = s3.list_objects(Bucket='test-bokeh')['Contents']
request_files

[{'ETag': '"2143b2e98c7759312732afd7dadb8fda"',
  'Key': 'final_report.csv',
  'LastModified': datetime.datetime(2020, 1, 29, 12, 50, 42, tzinfo=tzutc()),
  'Owner': {'ID': 'd14ca27c07aac3d80654a5143be2aaa7c7e40b70f5c2c5564807e54b96cbe49d'},
  'Size': 199,
  'StorageClass': 'STANDARD'},
 {'ETag': '"e8a346159ac66da8f9c25217543225f6"',
  'Key': 'final_report.html',
  'LastModified': datetime.datetime(2020, 1, 29, 13, 58, tzinfo=tzutc()),
  'Owner': {'ID': 'd14ca27c07aac3d80654a5143be2aaa7c7e40b70f5c2c5564807e54b96cbe49d'},
  'Size': 1606,
  'StorageClass': 'STANDARD'},
 {'ETag': '"2143b2e98c7759312732afd7dadb8fda"',
  'Key': 'final_report_.csv',
  'LastModified': datetime.datetime(2020, 2, 3, 9, 21, 3, tzinfo=tzutc()),
  'Owner': {'ID': 'd14ca27c07aac3d80654a5143be2aaa7c7e40b70f5c2c5564807e54b96cbe49d'},
  'Size': 199,
  'StorageClass': 'STANDARD'},
 {'ETag': '"f71a01ee9dbb21fa37dcacfd2986d981"',
  'Key': 'indes.html',
  'LastModified': datetime.datetime(2020, 1, 31, 17, 38, 54, tzinfo=t

In [15]:
s3.get_object(Bucket='test-bokeh', Key=request_files[0]['Key'])#['Body']

{'AcceptRanges': 'bytes',
 'Body': <botocore.response.StreamingBody at 0x7f6bb1b83828>,
 'ContentLength': 199,
 'ContentType': 'binary/octet-stream',
 'ETag': '"2143b2e98c7759312732afd7dadb8fda"',
 'LastModified': datetime.datetime(2020, 1, 29, 12, 50, 42, tzinfo=tzutc()),
 'Metadata': {},
 'ResponseMetadata': {'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '199',
   'content-type': 'binary/octet-stream',
   'date': 'Mon, 03 Feb 2020 09:23:38 GMT',
   'etag': '"2143b2e98c7759312732afd7dadb8fda"',
   'last-modified': 'Wed, 29 Jan 2020 12:50:42 GMT',
   'server': 'AmazonS3',
   'x-amz-id-2': 'BP7pDAyWGvpopSYZMIc3v4CZPirekhfhjRva9F00y4/N4Ri8gHqcNgU/R5+Vw0gIy69uupaDRGg=',
   'x-amz-request-id': 'AE187EFAC3A026DA'},
  'HTTPStatusCode': 200,
  'HostId': 'BP7pDAyWGvpopSYZMIc3v4CZPirekhfhjRva9F00y4/N4Ri8gHqcNgU/R5+Vw0gIy69uupaDRGg=',
  'RequestId': 'AE187EFAC3A026DA',
  'RetryAttempts': 0}}

In [16]:
# obtain a file from S3 and load it into a dataframe

df_list = [] 
# Load each object from s3
for file in request_files:
    s3_day_reqs = s3.get_object(Bucket='test-bokeh', 
                                Key=file['Key'])
    if file['Key'].endswith('csv'):
    # Read the DataFrame into pandas, append it to the list
        day_reqs = pa.read_csv(s3_day_reqs['Body'])
        df_list.append(day_reqs)

# Concatenate all the DataFrames in the list
all_reqs = pa.concat(df_list)

# Preview the DataFrame
all_reqs.head()

Unnamed: 0,Service_name,request_count
0,72 Hour Violation,8
1,Graffiti Removal,2
2,Missed Collection,12
3,Street Light Out,21
4,Pothole,33


## 4. Sharing file through a website - S3 serving static html web pages


### Generating links to S3 object

In [74]:
# Generate an html table from the df using pandas 
df.to_html('final_report.html', 
           columns=['Service_name', 'request_count'],
           border=0)

In [77]:
# Upload html file to S3
s3.upload_file(Filename='final_report.html', 
               Bucket='test-bokeh', Key='final_report.html',
               ExtraArgs = {
                 'ContentType':'text/html',
                 'ACL': 'public-read'})

# Print the S3 Public Object URL for the new file.
print("http://{}.s3.amazonaws.com/{}".format('test-bokeh', 'final_report.html'))

http://test-bokeh.s3.amazonaws.com/final_report.html


### Plotting in Bokeh and exporting to html

In [15]:
import bokeh

# to get all bokeh example datasets uncomment the following line:

#bokeh.sampledata.download() 

In [28]:

#example from https://docs.bokeh.org/en/latest/docs/gallery.html

from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure
from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.unemployment import data as unemployment

palette.reverse()

counties = {code: county for code, county in counties.items() if county["state"] == "tx"}

county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]

county_names = [county['name'] for county in counties.values()]
county_rates = [unemployment[county_id] for county_id in counties]
color_mapper = LogColorMapper(palette=palette)

data=dict(
    x=county_xs,
    y=county_ys,
    name=county_names,
    rate=county_rates,
)

TOOLS = "pan,wheel_zoom,reset,hover,save"

p = figure(
    title="Texas Unemployment, 2009", tools=TOOLS,
    x_axis_location=None, y_axis_location=None,
    tooltips=[
        ("Name", "@name"), ("Unemployment rate)", "@rate%"), ("(Long, Lat)", "($x, $y)")
    ])
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"

p.patches('x', 'y', source=data,
          fill_color={'field': 'rate', 'transform': color_mapper},
          fill_alpha=0.7, line_color="white", line_width=0.5)

show(p)

In [32]:
# export the plot to html
bokeh.io.saving.save(p,'test.html', data=data)

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


'/home/dalabsba/Documents/Projects/30_Udacity/tech-scholar-Bertelsmann/bertelsmann-dsml-group/Project_boto/test.html'

In [38]:

# example https://docs.bokeh.org/en/latest/docs/gallery/range_tool.html

import numpy as np
from bokeh.io import show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.plotting import figure
from bokeh.sampledata.stocks import AAPL

dates = np.array(AAPL['date'], dtype=np.datetime64)
source = ColumnDataSource(data=dict(date=dates, close=AAPL['adj_close']))

p = figure(plot_height=300, plot_width=800, tools="xpan", toolbar_location=None,
           x_axis_type="datetime", x_axis_location="above",
           background_fill_color="#efefef", x_range=(dates[1500], dates[2500]))

p.line('date', 'close', source=source)
p.yaxis.axis_label = 'Price'

select = figure(title="Drag the middle and edges of the selection box to change the range above",
                plot_height=130, plot_width=800, y_range=p.y_range,
                x_axis_type="datetime", y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#efefef")

range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.line('date', 'close', source=source)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool

#show(column(p, select))
pp=column(p, select)

# convert bokeh visualization to html
bokeh.io.saving.save(pp,'test-range-tool.html')

'/home/dalabsba/Documents/Projects/30_Udacity/tech-scholar-Bertelsmann/bertelsmann-dsml-group/Project_boto/test-range-tool.html'

In [79]:
# Upload bokeh html plots to S3
filename="test-range-tool.html"

s3.upload_file(Filename=filename, 
               Bucket='test-bokeh', Key='test-range-tool.html',
               ExtraArgs = {
                 'ContentType':'text/html',
                 'ACL': 'public-read'})

# Print the S3 Public Object URL for the new file.
print("http://{}.s3.amazonaws.com/{}".format('test-bokeh', filename))

http://test-bokeh.s3.amazonaws.com/test-range-tool.html


In [16]:
# example code from https://docs.bokeh.org/en/latest/docs/gallery.html

import numpy as np
from bokeh.layouts import row, column
from bokeh.models import CustomJS, Slider
from bokeh.plotting import figure, output_file, show, ColumnDataSource

x = np.linspace(0, 10, 500)
y = np.sin(x)

source = ColumnDataSource(data=dict(x=x, y=y))

plot = figure(y_range=(-10, 10), plot_width=400, plot_height=400)

plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

amp_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Amplitude")
freq_slider = Slider(start=0.1, end=10, value=1, step=.1, title="Frequency")
phase_slider = Slider(start=0, end=6.4, value=0, step=.1, title="Phase")
offset_slider = Slider(start=-5, end=5, value=0, step=.1, title="Offset")

callback = CustomJS(args=dict(source=source, amp=amp_slider, freq=freq_slider, phase=phase_slider, offset=offset_slider),
                    code="""
    const data = source.data;
    const A = amp.value;
    const k = freq.value;
    const phi = phase.value;
    const B = offset.value;
    const x = data['x']
    const y = data['y']
    for (var i = 0; i < x.length; i++) {
        y[i] = B + A*Math.sin(k*x[i]+phi);
    }
    source.change.emit();
""")

amp_slider.js_on_change('value', callback)
freq_slider.js_on_change('value', callback)
phase_slider.js_on_change('value', callback)
offset_slider.js_on_change('value', callback)

layout = row(
    plot,
    column(amp_slider, freq_slider, phase_slider, offset_slider),
)

#output_file("slider.html", title="slider.py example")

#show(layout)

bokeh.io.saving.save(layout,'slider-test.html')

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


'/home/dalabsba/Documents/Projects/30_Udacity/tech-scholar-Bertelsmann/bertelsmann-dsml-group-projects/Project-boto/slider-test.html'

In [17]:
# Upload bokeh html plots to S3
filename="slider-test.html"

s3.upload_file(Filename=filename, 
               Bucket='test-bokeh', Key=filename,
               ExtraArgs = {
                 'ContentType':'text/html',
                 'ACL': 'public-read'})

# Print the S3 Public Object URL for the new file.
print("http://{}.s3.amazonaws.com/{}".format('test-bokeh', filename))

http://test-bokeh.s3.amazonaws.com/slider-test.html


### Making an index page with links to all plots 

In [35]:
s3.delete_object(Bucket='test-bokeh', Key='index.html')

{'ResponseMetadata': {'HTTPHeaders': {'date': 'Mon, 03 Feb 2020 09:38:52 GMT',
   'server': 'AmazonS3',
   'x-amz-id-2': 'wUPEsk1+NKPNE+fUMS4EnWUR+aWOgtQEC8jUrCkRjJ10X8wM7aERcvfET61z+Ec96lMuLvSWrD8=',
   'x-amz-request-id': 'E8B80310A556675B'},
  'HTTPStatusCode': 204,
  'HostId': 'wUPEsk1+NKPNE+fUMS4EnWUR+aWOgtQEC8jUrCkRjJ10X8wM7aERcvfET61z+Ec96lMuLvSWrD8=',
  'RequestId': 'E8B80310A556675B',
  'RetryAttempts': 0}}

In [560]:
# why this: 
pa.set_option('display.max_colwidth', -1)
# because https://stackoverflow.com/questions/26277757/pandas-to-html-truncates-string-contents


base_url = "http://test-bokeh.s3.amazonaws.com/"

# List the bucket objects
objects_list = s3.list_objects(Bucket='test-bokeh')
objects_df=pa.DataFrame(columns=['Key','Link','Last modified'])

for obj in s3.list_objects(Bucket='test-bokeh')['Contents']:
    if obj['Key'].endswith('html'):
        # Convert the response contents to DataFrame
        objects_df = objects_df.append({'Link':base_url + obj['Key'],
                                        'Key':obj['Key'],
                                        'Last modified':obj['LastModified'].ctime()}, ignore_index=True)
# Preview the resulting DataFrame
objects_df.head()



Unnamed: 0,Key,Link,Last modified
0,final_report.html,http://test-bokeh.s3.amazonaws.com/final_report.html,Wed Jan 29 13:58:00 2020
1,index.html,http://test-bokeh.s3.amazonaws.com/index.html,Mon Feb 3 09:39:26 2020
2,slider-test.html,http://test-bokeh.s3.amazonaws.com/slider-test.html,Thu Jan 30 17:17:20 2020
3,test-range-tool.html,http://test-bokeh.s3.amazonaws.com/test-range-tool.html,Wed Jan 29 14:32:24 2020


In [38]:
# Write objects_df to an HTML file
objects_df.to_html('list_of_plots.html',render_links=True, columns=list(objects_df.columns))

# Overwrite index.html key by uploading the new file
s3.upload_file( Filename='list_of_plots.html', Key='index.html', 
                Bucket='test-bokeh',
                ExtraArgs = {'ContentType': 'text/html', 
                             'ACL': 'public-read'})

In [559]:
for obj in s3.list_objects(Bucket='test-bokeh')['Contents']:
    if obj['Key']=='index.html':
        # Create a column "Link" that contains Public Object URL
        print(base_url + obj['Key'])
        

http://test-bokeh.s3.amazonaws.com/index.html
