In [18]:
def initiate_cluster():
    import boto3
    from datetime import datetime
    import json
    import sys
    
    sys.path.append("/Users/josepherlanger/github/je_scripts/github_scripts/scratch/local")
    import sconfig
    
    with open('basic_config.json') as conf:
        emr_config = json.load(conf)
    
    connection = boto3.client('emr',
                              region_name='us-east-1',
                              aws_access_key_id=sconfig.cph_access_key,
                              aws_secret_access_key=sconfig.cph_secret_key
                             )
    print('Connection to EMR successful')    
    
    print('Initiating cluster...')
    cluster = connection.run_job_flow(Name='JE_Test_EMR',
                                        LogUri='s3n://debug-dwh-liveintent-com/emr-debug-logs/JE_Test_EMR/',
                                        ReleaseLabel='emr-5.20.0',
                                        VisibleToAllUsers=True,
                                        EbsRootVolumeSize=10,
                                        ScaleDownBehavior='TERMINATE_AT_TASK_COMPLETION',
                                        JobFlowRole='EMR_EC2_DefaultRole',
                                        ServiceRole='EMR_DefaultRole',
                                        Tags=[
                                             {
                                                 'Key': 'Name',
                                                 'Value': 'JE-Test',
                                             },
                                             {
                                                 'Key': 'cost-center',
                                                 'Value': 'JE'
                                             },
                                         ],
                                        Applications=[
                                            {
                                                'Name':'Spark',
                                            },
                                            {
                                                'Name':'Hadoop',
                                            },
                                            {
                                                'Name':'Hive',
                                            },
                                            {
                                                'Name':'JupyterHub'
                                            }
                                        ],
                                        Instances={
                                            'InstanceFleets': [
                                                {
                                                    'Name': 'Master',
                                                    'InstanceFleetType': 'MASTER',
                                                    'TargetOnDemandCapacity': 0,
                                                    'TargetSpotCapacity': 1,
                                                    'InstanceTypeConfigs': [
                                                        {
                                                            'InstanceType': 'm3.xlarge',
                                                            'BidPriceAsPercentageOfOnDemandPrice': 100.0,
                                                        }
                                                    ],
                                                    'LaunchSpecifications': {
                                                        'SpotSpecification': {
                                                            'TimeoutDurationMinutes': 60,
                                                            'TimeoutAction': 'TERMINATE_CLUSTER',
                                                        }
                                                    }
                                                },
                                                {
                                                    'Name': 'Core',
                                                    'InstanceFleetType': 'CORE',
                                                    'TargetOnDemandCapacity': 0,
                                                    'TargetSpotCapacity': 1,
                                                    'InstanceTypeConfigs': [
                                                        {
                                                            'InstanceType': 'i2.xlarge',
                                                            'BidPriceAsPercentageOfOnDemandPrice': 100.0,
                                                        }
                                                    ],
                                                    'LaunchSpecifications': {
                                                        'SpotSpecification': {
                                                            'TimeoutDurationMinutes': 60,
                                                            'TimeoutAction': 'TERMINATE_CLUSTER',
                                                        }
                                                    }
                                                }
                                            ],
                                            'Ec2KeyName': 'airflow',
                                            'Ec2SubnetId': 'subnet-badaee95',
                                            'EmrManagedMasterSecurityGroup': 'sg-21078352',
                                            'EmrManagedSlaveSecurityGroup': 'sg-32048041',
                                            'KeepJobFlowAliveWhenNoSteps': True
                                        },
                                         BootstrapActions=[
                                             {
                                                 'Name':'LiveIntent shell installer',
                                                 'ScriptBootstrapAction': {
                                                     'Path': 's3://us-config-mojn/emr/add-liveintent-shell',
                                                 }
                                             },
                                             {
                                                 'Name': 'Idle cluster reaper',
                                                 'ScriptBootstrapAction': {
                                                     'Path': 's3://us-config-mojn/emr/idle_timeout.sh',
                                                 }
                                             },
                                             {
                                                 'Name': 'Prometheus export start',
                                                 'ScriptBootstrapAction': {
                                                     'Path': 's3://us-config-mojn/emr/emr_bootstrap_node_exporter.sh',
                                                 }
                                             },
                                             {
                                                 'Name': 'Additional AWS Credential Profiles',
                                                 'ScriptBootstrapAction': {
                                                     'Path': 's3://us-config-mojn/emr/add-profile-configuration'
                                                 }
                                             }
                                         ],
                                         Configurations=emr_config
                                        )

    job_flow_id = cluster['JobFlowId']
    print(f'Cluster initiation success: {job_flow_id}')
    
initiate_cluster()

Connection to EMR successful
Initiating cluster...
Cluster initiation success: j-3P32JRR3JBIBW
