# Install Data Fabric on single node

## AWS deployment

You should have configured your AWS profile with ```aws configure``` command.

We will create a keypair, a security group and an EC2 instance with these settings;

EC2:
 - Ubuntu 20.04 ami-0bd2099338bc55e6d
 - m5.4xlarge - 16 vCPU and 64GB memory
 - 150GB root (gp2) + 150GB data disks (gp3)

SG: Allow all traffic from my IP

In [1]:
# Install prerequisites and setup defaults

!python3 -m pip install --user boto3 requests paramiko > /dev/null

import boto3
from botocore.exceptions import ClientError

USERNAME = os.environ.get('USER')
ec2 = boto3.client('ec2')

KEY_PAIR_NAME = USERNAME + "-ez-keypair"
SECURITY_GROUP_NAME = USERNAME + "-ez-securitygroup"
INSTANCE_NAME = USERNAME + "-ez-node"
KEYFILE = "./ez_aws_ec2_key.pem"
CLUSTER_NAME= "demo.df.io"
print("Done")

Done


In [2]:
# Helper functions

# Key Pair
def create_key_pair():
    try:
        keypairs = ec2.describe_key_pairs(KeyNames=[KEY_PAIR_NAME])
    except ClientError as e:
        if e.response['Error']['Code'] == "InvalidKeyPair.NotFound":
            print("Creating new keypair")
            key_pair = ec2.create_key_pair(KeyName=KEY_PAIR_NAME)
            with os.fdopen(os.open(KEYFILE, os.O_WRONLY | os.O_CREAT, 0o400), "w+") as handle:
                handle.write(key_pair['KeyMaterial'])
        else:
            print(e)
            raise

def destroy_all():
    instanceid = find_instance()
    if instanceid is not None:
        instance = ec2.describe_instances(InstanceIds=[]).get(
            'Reservations', [{}])[0].get('Instances', '')[0]
        pip = instance['PublicIpAddress']
        pdns = instance['PublicDnsName']
        !ssh-keygen -R $pip > /dev/null
        !ssh-keygen -R $pdns > /dev/null
    delete_security_group(get_security_group())
    ec2.delete_key_pair(KeyName=KEY_PAIR_NAME)
    !rm -f $KEYFILE
    print("Done")

def find_instance():
    instances = (instance
                 for reservation in ec2.describe_instances()['Reservations']
                 for instance in reservation['Instances'])
    for i in instances:
        if SECURITY_GROUP_NAME in [g['GroupName'] for g in i['SecurityGroups']]:
            return i['InstanceId']
    return None

def delete_instance(): # Will terminate all instances under the Security Group
    id = find_instance()
    while id:
        ec2.terminate_instances( InstanceIds = [id] )
        print("Shutting down existing instance %s ..." % id)
        ec2.get_waiter("instance_terminated").wait(InstanceIds=[id])
        print("Deleted Existing Instance %s" % id)
        id = find_instance()

# Security Group
def get_security_group():
    '''
    Returns SecurityGroup ID if found else returns None
    Searches by GroupName = SECURITY_GROUP_NAME

    params: none

    return: 
        SecurityGroupId (str): String representation of AWS Security Group in default VPC
    '''
    SECURITY_GROUP_ID = None
    try:
        response = ec2.describe_security_groups(GroupNames=[SECURITY_GROUP_NAME])
        SECURITY_GROUP_ID = response.get("SecurityGroups", [{}])[0].get("GroupId", "")
        print("Found SG", SECURITY_GROUP_ID)
    except ClientError as e:
        print("Security Group %s not found" % SECURITY_GROUP_NAME)

    return SECURITY_GROUP_ID

def delete_security_group(sgid):
    '''
    Deletes Security Group and instances assigned to it!
    
    params: SecurityGroupId
    '''
    if sgid is not None:
        try:
            delete_instance()
            response = ec2.delete_security_group(GroupId=sgid)
            print("Deleted Existing Security Group %s" % sgid)
        except ClientError as e:
            print("ERROR", e)

def create_security_group():
    # Get default VPC
    response = ec2.describe_vpcs()
    vpc_id = response.get("Vpcs", [{}])[0].get("VpcId", "")

    sgid = get_security_group()
    if sgid is not None:
        delete_security_group(sgid)

    # Create SG with inbound rule - all traffic from my IP
    try:
        import requests
        MYIP = requests.get("https://ifconfig.me/ip").text + "/32"
        response = ec2.create_security_group(GroupName=SECURITY_GROUP_NAME,
                                                Description="Allow my IP",
                                                VpcId=vpc_id)
        SECURITY_GROUP_ID = response['GroupId']
        print("Created Security Group %s in %s." % (SECURITY_GROUP_ID, vpc_id))

        data = ec2.authorize_security_group_ingress(
            GroupId=SECURITY_GROUP_ID,
            IpPermissions=[
                {'IpProtocol': "-1",
                'IpRanges': [{'CidrIp': MYIP}]}
            ])
        print("Ingress Successfully Set for IP: %s" % data.get("SecurityGroupRules", [{}])[0].get('CidrIpv4', ''))
    except ClientError as e:
        print(e)

# EC2 Instance
def create_instance():
    response = ec2.run_instances(
        ImageId="ami-0bd2099338bc55e6d",
        MinCount=1,
        MaxCount=1,
        InstanceType="m5.4xlarge",
        # InstanceType="t2.nano",
        KeyName=KEY_PAIR_NAME,
        SecurityGroupIds=[
            get_security_group(),
        ],
        BlockDeviceMappings=[
            {
                'DeviceName': "/dev/sda1",
                'Ebs': {
                    'DeleteOnTermination': True,
                    'VolumeSize': 120,
                    'VolumeType': "gp2"
                }
            },
            {
                'DeviceName': "/dev/xvda",
                'Ebs': {
                    'DeleteOnTermination': True,
                    'VolumeSize': 150,
                    'VolumeType': "gp2"
                }
            },
        ],
        TagSpecifications=[
            {
                'ResourceType': "instance",
                'Tags': [
                    {
                        'Key': "Name",
                        'Value': INSTANCE_NAME
                    },
                ]
            },
        ],
    )
    for i in response['Instances']:
        print("Waiting for instance %s to start..." % i['InstanceId'])
        ec2.get_waiter("instance_status_ok").wait(
            InstanceIds=[i['InstanceId']])
        return i['InstanceId']

print("Done")

Done


In [3]:
# Setup AWS env - default VPC, SG & keypair

create_key_pair()
create_security_group()
print("Done")

Creating new keypair
Security Group erdincka-ez-securitygroup not found
Created Security Group sg-0c190aecbe0887e38 in vpc-0b2432f981555660c.
Ingress Successfully Set for IP: 90.243.56.76/32
Done


In [4]:
# Create VM
instanceid = create_instance()
instance = ec2.describe_instances(InstanceIds=[instanceid]).get(
    'Reservations', [{}])[0].get('Instances', '')[0]

print("Public IP: %s" % instance['PublicIpAddress'])
print("Public DNS: %s" % instance['PublicDnsName'])

Found SG sg-0c190aecbe0887e38
Waiting for instance i-0a85b13ddc65edeab to start...
Public IP: 18.133.184.71
Public DNS: ec2-18-133-184-71.eu-west-2.compute.amazonaws.com


In [5]:
STANZA = """
environment:
  mapr_core_version: 7.0.0
config:
  admin_id: mapr
  cluster_name: {}
  db_admin_password_set: true
  db_admin_password: mapr
  db_admin_user: root
  debug_set: false
  elasticsearch_path: /opt/mapr/es_db
  enable_encryption_at_rest: true
  enable_min_metrics_collection: true
  enable_nfs: true
  hosts:
    - {}
  license_type: M7
  log_admin_password: mapr
  mep_version: 8.1.0
  metrics_ui_admin_password: mapr
  nfs_type: "NFSv4"
  security: true
  ssh_id: ubuntu
  ssh_key_file: /home/ubuntu/private_key
  disks:
    - /dev/nvme1n1
  disk_format: true
  disk_stripe: 1
  services:
    template-05-converged:
    mapr-hivemetastore:
      database:
        create: true
        name: hive
        user: hive
        password: mapr
    mapr-hue-livy:
          enabled: true
    mapr-grafana:
      enabled: true
    mapr-opentsdb:
      enabled: true
    mapr-collectd:
    mapr-fluentd:
    mapr-kibana:
      enabled: true
    mapr-elasticsearch:
      enabled: true
    mapr-data-access-gateway:
    mapr-mastgateway:
""".format(CLUSTER_NAME, instance['PrivateDnsName'])


In [7]:
# Install and Configure Data Fabric
from paramiko import SSHClient, AutoAddPolicy, RSAKey
client = SSHClient()
client.set_missing_host_key_policy(AutoAddPolicy())
with open(KEYFILE, 'r') as file:
    PRIVATE_KEY = file.read()

def run_commands(commands):
    try:
        client.connect(instance['PublicIpAddress'], port=22,
                    username="ubuntu", key_filename=KEYFILE)

        for command in commands:
            stdin, stdout, stderr = client.exec_command(command)
            for line in iter(stdout.readline, ""):
                print(line, end="")

        stdin.close()
        stdout.close()
        stderr.close()
        client.close()

    except BaseException as e:
        print(e)

prepcommands = [
    "sudo apt update; sudo apt upgrade -y; sudo chmod u+s /sbin/unix_chkpwd",
    "echo 'ready for installer'",
    "[ -f mapr-setup.sh ] || wget -O mapr-setup.sh https://package.mapr.hpe.com/releases/installer/mapr-setup.sh; chmod +x mapr-setup.sh",
    "echo 'installer downloaded'",
    "[ -f /opt/mapr/installer/bin/mapr-installer-cli ] || sudo /home/ubuntu/mapr-setup.sh -y",
    "echo 'installer ready'",
    "echo -e '" + PRIVATE_KEY + "' > private_key ; chmod 600 private_key",
    "echo 'key file copied'",
    "echo -e '" + STANZA + "' > mapr.stanza",
    "echo 'stanza copied'",
    "echo 'reboot if needed'",
    "[ -f /var/run/reboot-required ] && /sbin/reboot - f > /dev/null 2 > &1 & "
]

run_commands(prepcommands)

instcommands = [
    "echo 'starting installation'",
    "echo y | sudo /opt/mapr/installer/bin/mapr-installer-cli install -nv -t /home/ubuntu/mapr.stanza",
    "echo 'install complete'",
    "[ -f /opt/mapr/bin/maprlogin ] || exit 1",
    "echo 'waiting for services'; sleep 30",
    "[ -f /tmp/maprticket_$(id - u)] || (echo mapr | maprlogin password -user mapr)",
    "echo 'mapr user is authenticated'",
]

run_commands(instcommands)

Get:1 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal InRelease [265 kB]
Hit:2 https://package.mapr.com/releases/installer/ubuntu binary InRelease
Get:3 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]
Get:4 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]
Get:5 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
Get:6 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal/main amd64 Packages [970 kB]
Get:7 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal/main Translation-en [506 kB]
Get:8 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal/main amd64 c-n-f Metadata [29.5 kB]
Get:9 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [22.0 kB]
Get:10 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal/restricted Translation-en [6212 B]
Get:11 http://eu-west-2.ec2.archive.ubuntu.com/ubuntu focal/restricted amd64 c-n-f Metadata [392 B]
Get:12 http://eu-west-2.ec2.archive.ubu

In [64]:
# Access to the cluster
prvdns = instance['PrivateDnsName']
prvip = instance['PrivateIpAddress']
pubip = instance['PublicIpAddress']

sedptrn = "'/^%s/d'" % pubip
!sed $sedptrn /etc/hosts | sudo tee /etc/hosts > /dev/null
!echo $pubip $prvip $prvdns | sudo tee -a /etc/hosts > /dev/null

mcs = "https://" + prvdns + ":8443/"
installer = "https://" + prvdns + ":9443/"
print("Connect to Data Fabric Management Console:", mcs)
print("Discover Available Services or Incremental Install at:", installer)


https://ip-172-31-4-55.eu-west-2.compute.internal:8443/
Connect to Data Fabric Management Console: https://ip-172-31-4-55.eu-west-2.compute.internal:8443/
Discover Available Services or Incremental Install at: https://ip-172-31-4-55.eu-west-2.compute.internal:9443/


In [65]:
### Destroy when done

# Uncomment and run following line to destroy the environment
destroy_all()

# Host 18.133.184.71 found: line 1
/Users/erdincka/.ssh/known_hosts updated.
Original contents retained as /Users/erdincka/.ssh/known_hosts.old
Host ec2-18-133-184-71.eu-west-2.compute.amazonaws.com not found in /Users/erdincka/.ssh/known_hosts
Found SG sg-0c190aecbe0887e38
Shutting down existing instance i-0a85b13ddc65edeab ...
Deleted Existing Instance i-0a85b13ddc65edeab
Deleted Existing Security Group sg-0c190aecbe0887e38
Done
