In [1]:
# Error you might be getting:
# "EMR_DefaultRole is invalid" or "EMR_EC2_DefaultRole is invalid" error when creating an Amazon EMR cluster
# 
# Short Description
# These errors occur when the Amazon Elastic Compute Cloud (Amazon EC2) instance profile associated with the default roles isn't configured correctly. Delete the roles and the instance profile. Then, recreate the roles.
# 
# Lets try to translate this:
# https://aws.amazon.com/premiumsupport/knowledge-center/emr-default-role-invalid/
# into something we can do using boto3
# boto3 documentation: 
# https://boto3.amazonaws.com/v1/documentation/api/latest/index.html

In [6]:
# Import
import os
import sys
import configparser
import boto3

In [5]:
# Setup

# Set path to current directory
home = os.path.expanduser('~')
project_path = os.path.join(home,'git','udacity-data-engineering-nano-degree','udacity-4-data-lakes-with-spark')
os.chdir(project_path)

# Open and read the contents of the keys file
iac_keys = configparser.ConfigParser()
iac_keys.read_file(open('./keys.cfg'))

# Load all the keys needed to connect to AWS services
KEY             = iac_keys.get('AWS','AWS_ACCESS_KEY_ID')
SECRET          = iac_keys.get('AWS','AWS_SECRET_ACCESS_KEY')

# Open and read the contents of the config file
iac_config = configparser.ConfigParser()
iac_config.read_file(open('./dl.cfg'))

# Load all the keys needed to create the EMR cluster
DL_REGION       = iac_config.get("DL","DL_REGION")

# Creating resources/clients for infrastructure: IAM
iam = boto3.client('iam',
                   region_name=DL_REGION, 
                   aws_access_key_id=KEY,
                   aws_secret_access_key=SECRET,)

# Creating resources/clients for infrastructure: IAM
emr = boto3.client('emr',
                   region_name=DL_REGION, 
                   aws_access_key_id=KEY,
                   aws_secret_access_key=SECRET,)

In [2]:
# 1.    Remove EMR_EC2_DefaultRole from the instance profile:
# > aws iam remove-role-from-instance-profile --instance-profile-name EMR_EC2_DefaultRole --role-name EMR_EC2_DefaultRole
iam.remove_role_from_instance_profile(
    InstanceProfileName='EMR_EC2_DefaultRole',
    RoleName='EMR_EC2_DefaultRole'
)

In [3]:
# 2.    Delete the instance profile:
# > aws iam delete-instance-profile --instance-profile-name EMR_EC2_DefaultRole
iam.delete_instance_profile(
    InstanceProfileName='EMR_EC2_DefaultRole'
)

In [4]:
# 3.    Delete the IAM policy associated with EMR_EC2_DefaultRole:
# > aws iam detach-role-policy --role-name EMR_EC2_DefaultRole --policy-arn arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role
iam.detach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role',
    RoleName='EMR_EC2_DefaultRole'
)

In [5]:
# 4.    Delete EMR_EC2_DefaultRole:
# > aws iam delete-role --role-name EMR_EC2_DefaultRole
iam.delete_role(
    RoleName='EMR_EC2_DefaultRole'
)

In [6]:
# 5.    Delete the IAM policy associated with EMR_DefaultRole:
# > aws iam detach-role-policy --role-name EMR_DefaultRole --policy-arn arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole
iam.detach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole',
    RoleName='EMR_DefaultRole'
)

In [7]:
# 6.    Delete EMR_DefaultRole:
# > aws iam delete-role --role-name EMR_DefaultRole
iam.delete_role(
    RoleName='EMR_DefaultRole'
)

In [8]:
# 7.    Run create-default-roles to recreate the default roles:
# > aws emr create-default-roles

# Note: It does not appear there is an "easy" solution to this step. Either run 'aws emr create-default-roles' via the CLI tool (once per account per region), or build the default roles manually using IAM, using AWS guidelines:
# 

In [9]:
# 8.    Launch a new Amazon EMR cluster.