In [1]:
# !pip install paramiko
# !pip install scp

In [8]:
import os
import boto3
import subprocess
import numpy as np
import time
import datetime
import paramiko
import io
from scp import SCPClient, SCPException
import sys
from ast import literal_eval

In [9]:
np.set_printoptions(threshold=sys.maxsize)

In [10]:
INSTANCE_SIZE = 6

In [11]:
client = boto3.client('ec2', region_name='us-east-1')
# Create SQS client
sqs = boto3.resource('sqs')

In [12]:
def get_default_security_group(client, key_name):
    #extract key_name attribute from the security groups returned
    response = [group[key_name] for group in client.describe_security_groups()['SecurityGroups'] if group['GroupName'] == 'default']

    return response

def get_key_pairs(client, removeExisting=False):
    if removeExisting:
        client.delete_key_pair(KeyName='airscholar-key')

    keypairs = client.describe_key_pairs()['KeyPairs']
    keypair = list(filter(lambda x: x['KeyName'] == 'airscholar-key', keypairs))

    if not keypair:
        keypair = client.create_key_pair(KeyName='airscholar-key')
        f = io.StringIO(keypair['KeyMaterial'])
        data = f.read()
        file = open('labsuser.pem', 'w')
        file.write(data)
        file.close()
    else:
        keypair = keypair[0]

    return keypair

def launch_new_instance(client, keypair, count):
    response = client.run_instances(
        ImageId='ami-05723c3b9cf4bf4ff',
        InstanceType='t2.micro',
        KeyName=keypair,
        MaxCount=count,
        MinCount=count,
        Monitoring={
            'Enabled': True
        },
        SecurityGroupIds= get_default_security_group(client, key_name='GroupId')
    )
    ec2_inst_ids = [res["InstanceId"] for res in response]
    waiter = client.get_waiter('instance_running')
    waiter.wait(InstanceIds=[ec2_inst_ids])
    return ec2_inst_ids

def prepare_instances(client, keypair, count):
    ec2 = boto3.resource('ec2')
    ec2_inst_ids = []

    deployed_count = 0
    for instance in ec2.instances.all():
        deployed_count += 1
        if instance.state['Name'] == 'running':
            ec2_inst_ids.append(instance.id)

    if deployed_count < count:
        ec2_inst_ids.append(launch_new_instance(client, keypair, (count - deployed_count)))

    if not ec2_inst_ids:
        ec2_inst_ids.append(launch_new_instance(client, keypair, count))

    return ec2, ec2_inst_ids

def configure_ssh():
    sshs = []
    for count in range(0, INSTANCE_SIZE):
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        sshs.append(ssh)
    return sshs

def ssh_connect_with_retry(ssh, ip_address, retries):
    if retries > 3:
        return False
    f = open('labsuser.pem', 'r')
    privkey = paramiko.RSAKey.from_private_key(f)
    # print(privkey)
    interval = 5
    try:
        retries += 1
        print('SSH into the instance: {}'.format(ip_address))
        ssh.connect(hostname=ip_address,
                    username='ec2-user', pkey=privkey)
        return True
    except Exception as e:
        print(e)
        time.sleep(interval)
        print('Retrying SSH connection to {}'.format(ip_address))
        ssh_connect_with_retry(ssh, ip_address, retries)

def ssh_disconnect(ssh):
        """Close ssh connection."""
        if ssh:
            ssh.close()

def get_public_address(ec2, instance_id):
    # ec2 = boto3.resource('ec2', region_name='us-east-1')
    instance = ec2.Instance(id=instance_id)
    instance.wait_until_running()
    current_instance = list(ec2.instances.filter(InstanceIds=[instance_id]))
    ip_address = current_instance[0].public_ip_address
    return ip_address

def get_queue(sqs, queue_name):
    # Get the queue. This returns an SQS.Queue instance
    # There is no queue, create a new SQS queue
    attributes = {
        'DelaySeconds': '0',
        'MessageRetentionPeriod': '86400',
        "ReceiveMessageWaitTimeSeconds": "0"
    }

    for idx in range(INSTANCE_SIZE):
        sqs.create_queue(
            QueueName=f"{queue_name}{idx}",
            Attributes=attributes
        )

        sqs.create_queue(
            QueueName=f'result-queue-{idx}',
            Attributes=attributes
        )

def send_message_to_queue(sqs, queue_name, message):
    queue = sqs.get_queue_by_name(QueueName=queue_name)

    # Send message to SQS queue
    response = queue.send_messages(
        Entries=message
    )
    return response

def install_required_packages(ssh):
    stdin, stdout, stderr = ssh.exec_command("sudo yum install pip -y && sudo pip install numpy boto3")
    return stdout, stderr

def get_messages_from_queue(instance_size, queue, message_size=10):
    messages = []
    sqs = boto3.resource('sqs')
    queue = sqs.get_queue_by_name(QueueName=queue)

    for message in queue.receive_messages(MaxNumberOfMessages=message_size, MessageAttributeNames=['All'], WaitTimeSeconds=0):
        messages.append(message.body)
        message.delete()
    return messages

def split_row(array, nrows, ncols):
    """
    Return an array of shape (n, nrows, ncols) where
    n * nrows * ncols = arr.size

    If arr is a 2D array, the returned array should look like n subblocks with
    each subblock preserving the "physical" layout of arr.
    """
    h, w = array.shape
    assert h % nrows == 0, f"{h} rows is not evenly divisible by {nrows}"
    assert w % ncols == 0, f"{w} cols is not evenly divisible by {ncols}"
    return (array.reshape(h//nrows, nrows, -1, ncols)
               .swapaxes(1,2)
               .reshape(-1, nrows, ncols))

def split_col(array, nrows, ncols):
    """Split a matrix into sub-matrices."""
    r, h = array.shape
    return [np.vsplit(i, 5) for i in np.hsplit(arr1, r)]

def generate_array(nrows, ncols):
    arr = np.random.randint(10, size=(nrows, ncols))
    # # print('arr 1:\n', arr)
    # arr = split_row(arr, 1, split_size)
    arr1 = np.random.randint(20, size=(nrows, ncols))
    # print('arr 2:\n', arr1)
    # arr1 = split_col(arr1, 1, split_size)

    return arr, arr1

def upload_file_to_s3(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except:
        # logging.error(e)
        return False
    return True

def bulk_upload(scp, filepaths: list[str], remote_path, host):
        """
        Upload multiple files to a remote directory.

        :param List[str] filepaths: List of local files to be uploaded.
        """
        try:
            scp.put(
                filepaths,
                remote_path=remote_path,
                recursive=True
            )
            print(f"Finished uploading {len(filepaths)} files to {remote_path} on {host}")
        except SCPException as e:
            print(f"SCPException during bulk upload: {e}")
        except Exception as e:
            print(f"Unexpected exception during bulk upload: {e}")

def configure_aws_access_for_ssh(ssh, ip_address):
    """
    This function extracts the AWS configuration you have locally and push to the server
    :param ssh:ssh object
    :return:
    """
    output = subprocess.getoutput("cat ~/.aws/credentials")
    ssh.exec_command(f'mkdir ~/.aws && touch ~/.aws/credentials')
    ssh.exec_command(f"echo '{output}' > ~/.aws/credentials")
    print(f'SSH AWS configuration done for {ip_address}')

def matrix_dot_product(matrix_a, matrix_b):
    start_time = datetime.datetime.now()
    result = []
    for i in range(len(matrix_a)):
        row = []
        for j in range(len(matrix_b[0])):
            sum = 0
            for k in range(len(matrix_b)):
                sum += matrix_a[i][k] * matrix_b[k][j]
            row.append(sum)
        result.append(row)
    print('Computation time', datetime.datetime.now() - start_time)

    return result

def matrix_add(matrix_1, matrix_2):
    start_time = datetime.datetime.now()
    result = []
    for idx_row in range(0, len(matrix_1)):
        row = matrix_1[idx_row]
        row1 = matrix_2[idx_row]
        cols = []
        for idx_col in range(0, len(row)):
            cols.append(row[idx_col] + row1[idx_col])
        result.append(cols)
    print('Computation time', datetime.datetime.now() - start_time)
    return result

In [13]:
QUEUE_NAME = 'queue'

In [14]:
get_queue(sqs, QUEUE_NAME)

In [15]:
sshs = configure_ssh() 
keypair = get_key_pairs(client, False)
ec2, instances = prepare_instances(client, keypair['KeyName'], INSTANCE_SIZE)
ip_addresses = [get_public_address(ec2, instance) for instance in instances]

for idx in range(0, len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    ssh_connect_with_retry(ssh, ip_address, 0)

SSH into the instance: 54.221.82.103
SSH into the instance: 50.17.68.227
SSH into the instance: 54.205.169.35
SSH into the instance: 54.159.35.23
SSH into the instance: 3.90.164.133
SSH into the instance: 54.226.83.253


In [16]:
# ssh_disconnect(ssh)
for idx in range(len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    stdout, stderr = install_required_packages(ssh)
    print(stdout.read().decode('utf-8'))
    print(stderr.read().decode('utf-8'))
    # configure_aws_access_for_ssh(ssh, ip_address)

Updating Subscription Management repositories.
Unable to read consumer identity

This system is not registered with an entitlement server. You can use subscription-manager to register.

Red Hat Enterprise Linux 9 for x86_64 - AppStre  24 kB/s | 4.5 kB     00:00    
Red Hat Enterprise Linux 9 for x86_64 - BaseOS   66 kB/s | 4.1 kB     00:00    
Red Hat Enterprise Linux 9 Client Configuration  28 kB/s | 2.0 kB     00:00    
Package python3-pip-21.2.3-6.el9.noarch is already installed.
Dependencies resolved.
Nothing to do.
Complete!


Updating Subscription Management repositories.
Unable to read consumer identity

This system is not registered with an entitlement server. You can use subscription-manager to register.

Red Hat Enterprise Linux 9 for x86_64 - AppStre  25 kB/s | 4.5 kB     00:00    
Red Hat Enterprise Linux 9 for x86_64 - BaseOS   67 kB/s | 4.1 kB     00:00    
Red Hat Enterprise Linux 9 Client Configuration  34 kB/s | 2.0 kB     00:00    
Package python3-pip-21.2.3-6.el9.noa

In [52]:
from files.file_helper import fetch_local_files
for idx in range(len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    scp = SCPClient(ssh.get_transport())
    bulk_upload(scp, fetch_local_files('./worker'), '~', ip_address)

Finished uploading 3 files to ~ on 54.221.82.103
Finished uploading 3 files to ~ on 50.17.68.227
Finished uploading 3 files to ~ on 54.205.169.35
Finished uploading 3 files to ~ on 54.159.35.23
Finished uploading 3 files to ~ on 3.90.164.133
Finished uploading 3 files to ~ on 54.226.83.253


In [68]:
def start_worker(ssh, worker_id):
    stdin, stdout, stderr = ssh.exec_command(f'python ./worker.py {worker_id}')
    print(stdout.read().decode('utf-8'))
    print(stderr.read().decode('utf-8'))

In [69]:
for idx in range(len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    start_worker(ssh, idx)
    # stdin, stdout, stderr = ssh.exec_command(f'python ./worker.py {idx}')

Worker 0 started
Queue url: https://sqs.us-east-1.amazonaws.com/469282757936/queue0

Traceback (most recent call last):
  File "/home/ec2-user/./worker.py", line 33, in <module>
    matrix_b = literal_eval(matrix_b)
  File "/usr/lib64/python3.9/ast.py", line 62, in literal_eval
    node_or_string = parse(node_or_string, mode='eval')
  File "/usr/lib64/python3.9/ast.py", line 50, in parse
    return compile(source, filename, mode, flags,
  File "<unknown>", line 1
    [[12,10,13,16,14,15,15,5,19,5,0,14,0,0,16,2,11,3,13,17,7,14,14,12,10,14,5,16,6,9,17,2,1,17,14,17,2,6,3,3,10,18,14,8,3,12,0,10,,1,12],[13,2,13,7,10,9,11,14,13,11,3,15,0,14,5,13,9,16,16,17,19,10,15,14,17,13,10,7,3,18,4,0,10,18,10,8,12,9,10,5,10,15,15,2,17,10,9,16,19,7],[18,4,19,15,5,5,16,4,6,0,16,17,15,3,12,7,5,1,12,7,10,13,8,11,,6,11,10,10,14,18,11,18,7,13,11,12,13,7,16,8,6,6,3,19,12,8,17,6,,1,0],[17,1,0,8,13,4,10,4,10,6,6,18,10,3,1,1,6,12,12,0,10,8,11,15,12,13,1,7,4,4,13,4,2,19,12,1,6,16,19,13,3,13,7,11,10,7,10,6,,2,3],[7,

### ADDITION

In [64]:
ARRAY_SIZE = 1000

In [65]:
arr, arr1 = generate_array(ARRAY_SIZE,ARRAY_SIZE)

## MANUAL COMPUTATION

In [57]:
addition = matrix_add(arr, arr1)
# addition

Computation time 0:00:00.169016


In [66]:
INT_ARRAY_SIZE = 50 #int(ARRAY_SIZE/INSTANCE_SIZE)
s_arr = split_row(arr, INT_ARRAY_SIZE, INT_ARRAY_SIZE)
s_arr1 = split_row(arr1, INT_ARRAY_SIZE, INT_ARRAY_SIZE)

In [67]:
def reformat_data(data):
    return str(data).replace('\n', '')

start_time = datetime.datetime.now()
data = np.arange(0, len(s_arr))
for id, dt in enumerate(np.array_split(data, INSTANCE_SIZE)):
    [send_message_to_queue(sqs, f'queue{id}', [{"Id": f"{idx+1}", "MessageBody": str((idx, (reformat_data(s_arr[idx]), reformat_data(s_arr1[idx])))) }])
                for idx in range(min(dt), max(dt)+1)]


In [39]:
final_res = []
def format_data(data):
        return data.replace('  ', ' ').replace('  ', ',').replace('[ ', '[').replace(' ', ',')

for a,b in enumerate(np.array_split(data, INSTANCE_SIZE)):
    compute_res = []
    for i in range(len(b)):
        res = get_messages_from_queue(INSTANCE_SIZE, f'result-queue-{a}', 1)
        [compute_res.append(msg) for msg in res]
    compute_res = [literal_eval(compute) for compute in compute_res]
    compute_res.sort()

    [final_res.append(literal_eval(format_data(res)))  for (index, res) in compute_res]


print('Computation time', datetime.datetime.now() - start_time)
final_res


Computation time 0:06:28.474720


[[[296, 502, 422, 347, 364],
  [258, 408, 400, 300, 240],
  [165, 338, 259, 206, 230],
  [126, 332, 228, 199, 222],
  [240, 481, 432, 332, 275]],
 [[73, 162, 204, 235, 261],
  [41, 103, 158, 195, 148],
  [89, 232, 252, 253, 278],
  [53, 132, 180, 236, 177],
  [89, 201, 158, 234, 190]],
 [[221, 194, 161, 196, 114],
  [196, 134, 76, 254, 96],
  [212, 166, 122, 214, 74],
  [187, 152, 78, 180, 95],
  [237, 216, 173, 214, 82]],
 [[134, 152, 223, 187, 102],
  [199, 335, 390, 187, 212],
  [293, 388, 386, 294, 309],
  [201, 230, 201, 210, 212],
  [223, 222, 240, 193, 183]],
 [[133, 165, 281, 295, 220],
  [282, 237, 432, 384, 330],
  [85, 126, 190, 156, 128],
  [179, 171, 351, 317, 266],
  [171, 83, 282, 154, 197]],
 [[237, 420, 261, 471, 342],
  [208, 304, 139, 319, 243],
  [177, 333, 234, 356, 306],
  [158, 482, 336, 486, 309],
  [206, 275, 145, 313, 252]],
 [[259, 106, 199, 200, 307],
  [212, 155, 193, 125, 237],
  [271, 126, 242, 63, 221],
  [399, 131, 320, 159, 367],
  [282, 93, 240, 58, 2

In [19]:
#create instance
#configure instance
#create matrix
#split matrix
#send matrix to the queue
#read matrix from the queue on the instance created
#compute matrix
#send result to base


In [71]:
!git add .
!git commit -am "Updated implementation"
!git push --set-upstream origin main

[main 225dd4d] Fixed result gathering from the queue
 3 files changed, 470 insertions(+), 141 deletions(-)
 create mode 100644 .ipynb_checkpoints/untitled-checkpoint.py
 create mode 100644 untitled.py
Enumerating objects: 8, done.
Counting objects: 100% (8/8), done.
Delta compression using up to 8 threads
Compressing objects: 100% (5/5), done.
Writing objects: 100% (5/5), 39.42 KiB | 3.94 MiB/s, done.
Total 5 (delta 2), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (2/2), completed with 2 local objects.[K
To https://github.com/airscholar/MLCloudComputing-python.git
 + 8be741a...225dd4d main -> main (forced update)
branch 'main' set up to track 'origin/main'.
