# comando para criar um Bucket na AWS

In [1]:
try:
    !pip install boto3=="1.13.1" --quiet
except:
    print("Running throw py file.")

In [2]:
import boto3
import os
import json

In [3]:
dirpath = os.getcwd()
dataPath = dirpath + "/../data"

## Preparando os servicos AWS S3

In [4]:
s3 = boto3.resource('s3')

### Criacao de bucket chamado <span style="color:red">tembici-fk</span> 

In [5]:
my_bucket = "tembici-fk"
s3_client = boto3.client('s3')
s3_client.create_bucket(Bucket=my_bucket)

{'ResponseMetadata': {'RequestId': 'D441DCDE1790E7C7',
  'HostId': 'NTxnd+8LdBov53E46pKBZyHbBuNElKsAfmJEDuYetsZlJ70ni1hF/NcGFOp5lS3iHk91ouQ6New=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'NTxnd+8LdBov53E46pKBZyHbBuNElKsAfmJEDuYetsZlJ70ni1hF/NcGFOp5lS3iHk91ouQ6New=',
   'x-amz-request-id': 'D441DCDE1790E7C7',
   'date': 'Mon, 04 May 2020 02:00:47 GMT',
   'location': '/tembici-fk',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'Location': '/tembici-fk'}

### Criando uma TAG chamada <span style="color:red">tembici-test-fk</span> no bucket criado.

In [6]:
my_tag = [{'Key': 'tembici-test-fk', 'Value': ''}]
tag={'TagSet': my_tag}
s3_client.put_bucket_tagging(Bucket=my_bucket, Tagging=tag)

{'ResponseMetadata': {'RequestId': '801AD24A6B87A598',
  'HostId': '8JD46b9fs1YHGHbBKyp+Yfl08mOGoRdowGRyvaJLFRbjbIBeJvqWMilT8NcNmS6aHJ/MxJ8gDD0=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': '8JD46b9fs1YHGHbBKyp+Yfl08mOGoRdowGRyvaJLFRbjbIBeJvqWMilT8NcNmS6aHJ/MxJ8gDD0=',
   'x-amz-request-id': '801AD24A6B87A598',
   'date': 'Mon, 04 May 2020 02:00:47 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

### Exibindo os buckets existentes

In [7]:
for bucket in s3.buckets.all():
    print(bucket.name)

elasticbeanstalk-us-east-1-032594213725
estudo-aws-fabio
estudo-aws-fk
tembici-fk


### Upload de data files para o bucket

In [8]:
files = ['station.csv', 'weather.csv','trip.csv'] #'trip.csv'

for file in files: 
    file_name = dataPath + "/" + file
    try:
        response = s3_client.upload_file(file_name, my_bucket, "data/" + file)
        print("It was uploaded the file", "'" + file + "'", ".")
    except ClientError as e:
        logging.error(e)

It was uploaded the file 'station.csv' .
It was uploaded the file 'weather.csv' .
It was uploaded the file 'trip.csv' .


### Upload Zeppelin file (model) to bucket 

In [9]:
files = ['zeppelin-tembici-test-fk.json','zeppelin-tembici-test-fk.py']

for file in files: 
    file_name = dirpath + "/" + file
    try:
        response = s3_client.upload_file(file_name, my_bucket, "model/" + file)
        print("It was uploaded the file", "'" + file + "'", ".")
    except ClientError as e:
        logging.error(e)

It was uploaded the file 'zeppelin-tembici-test-fk.json' .
It was uploaded the file 'zeppelin-tembici-test-fk.py' .


## Configurando um Resource Group com o nome de <span style="color:red">rg-tembici-test-fk</span>

In [10]:
my_resource_group = "rg-tembici-test-fk"
RG_client = boto3.client('resource-groups')

#AWS::AllSupported
#AWS::S3::Bucket
query = {
    "ResourceTypeFilters": ["AWS::AllSupported"],
    "TagFilters":  [{
        "Key": my_tag[0].get("Key"),
        "Values": [""]
    }] 
}
resource_query = {
    'Type': 'TAG_FILTERS_1_0',
    'Query': json.dumps(query)
}
client = boto3.client("resource-groups")
try:
    resp = RG_client.create_group(Name=my_resource_group,ResourceQuery=resource_query)
    print("Resource Group was created.")
except Exception as e:
    print(e)


Resource Group was created.


### Criacao de um cluster EMR Cluster chamado <span style="color:red">spark-tembici-test-fk</span>

Foi escolhido a maquina m4.large pois tem uma dos menores custos.

In [11]:
emr_client = boto3.client('emr') #region_name='us-east-1'
my_emr_cluster = "spark-tembici-test-fk"

cluster_id = emr_client.run_job_flow(Name=my_emr_cluster, ReleaseLabel='emr-5.29.0',
    LogUri='s3://tembici-fk/log/',
    Applications=[
        {
            'Name': 'Spark'
        },
    ],
    Instances={
        'InstanceGroups': [
            {
                'Name': "Master",
                'Market': 'ON_DEMAND',
                'InstanceRole': 'MASTER',
                'InstanceType': 'm4.large',
                'InstanceCount': 1,
            },
            {
                'Name': "Slave",
                'Market': 'ON_DEMAND',
                'InstanceRole': 'CORE',
                'InstanceType': 'm4.large',
                'InstanceCount': 2,
            }
        ],
        'KeepJobFlowAliveWhenNoSteps': False,
        'TerminationProtected': False,
    },
    Steps=[
        {
            'Name': 'Spark application',   
                    'ActionOnFailure': 'CONTINUE',
                    'HadoopJarStep': {
                        'Jar': 'command-runner.jar',
                        'Args': ["spark-submit","--deploy-mode","cluster","s3://tembici-fk/model/zeppelin-tembici-test-fk.py"]
                    }
        }        
    ],                                    
    VisibleToAllUsers=True,
    JobFlowRole='EMR_EC2_DefaultRole',
    ServiceRole='EMR_DefaultRole',
    Tags=[
        {
            'Key': 'tembici-test-fk',
            'Value': ''
        },
    ],
)
