In [1]:
# !pip install paramiko
# !pip install scp

In [1]:
import os
import boto3
import subprocess
import numpy as np
import time
import datetime
import paramiko
import io
from scp import SCPClient, SCPException
import sys
from ast import literal_eval

In [2]:
np.set_printoptions(threshold=sys.maxsize)

In [129]:
INSTANCE_SIZE = 6

In [130]:
client = boto3.client('ec2', region_name='us-east-1')
# Create SQS client
sqs = boto3.resource('sqs')

In [316]:
def get_default_security_group(client, key_name):
    #extract key_name attribute from the security groups returned
    response = [group[key_name] for group in client.describe_security_groups()['SecurityGroups'] if group['GroupName'] == 'default']

    return response

def get_key_pairs(client, removeExisting=False):
    if removeExisting:
        client.delete_key_pair(KeyName='airscholar-key')

    keypairs = client.describe_key_pairs()['KeyPairs']
    keypair = list(filter(lambda x: x['KeyName'] == 'airscholar-key', keypairs))

    if not keypair:
        keypair = client.create_key_pair(KeyName='airscholar-key')
        f = io.StringIO(keypair['KeyMaterial'])
        data = f.read()
        file = open('labsuser.pem', 'w')
        file.write(data)
        file.close()
    else:
        keypair = keypair[0]

    return keypair

def launch_new_instance(client, keypair, count):
    response = client.run_instances(
        ImageId='ami-05723c3b9cf4bf4ff',
        InstanceType='t2.micro',
        KeyName=keypair,
        MaxCount=count,
        MinCount=count,
        Monitoring={
            'Enabled': True
        },
        SecurityGroupIds= get_default_security_group(client, key_name='GroupId')
    )
    ec2_inst_ids = [res["InstanceId"] for res in response]
    waiter = client.get_waiter('instance_running')
    waiter.wait(InstanceIds=[ec2_inst_ids])
    return ec2_inst_ids

def prepare_instances(client, keypair, count):
    ec2 = boto3.resource('ec2')
    ec2_inst_ids = []

    deployed_count = 0
    for instance in ec2.instances.all():
        deployed_count += 1
        if instance.state['Name'] == 'running':
            ec2_inst_ids.append(instance.id)

    if deployed_count < count:
        ec2_inst_ids.append(launch_new_instance(client, keypair, (count - deployed_count)))

    if not ec2_inst_ids:
        ec2_inst_ids.append(launch_new_instance(client, keypair, count))

    return ec2, ec2_inst_ids

def configure_ssh():
    sshs = []
    for count in range(0, INSTANCE_SIZE):
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        sshs.append(ssh)
    return sshs

def ssh_connect_with_retry(ssh, ip_address, retries):
    if retries > 3:
        return False
    f = open('labsuser.pem', 'r')
    privkey = paramiko.RSAKey.from_private_key(f)
    # print(privkey)
    interval = 5
    try:
        retries += 1
        print('SSH into the instance: {}'.format(ip_address))
        ssh.connect(hostname=ip_address,
                    username='ec2-user', pkey=privkey)
        return True
    except Exception as e:
        print(e)
        time.sleep(interval)
        print('Retrying SSH connection to {}'.format(ip_address))
        ssh_connect_with_retry(ssh, ip_address, retries)

def ssh_disconnect(ssh):
        """Close ssh connection."""
        if ssh:
            ssh.close()

def get_public_address(ec2, instance_id):
    # ec2 = boto3.resource('ec2', region_name='us-east-1')
    instance = ec2.Instance(id=instance_id)
    instance.wait_until_running()
    current_instance = list(ec2.instances.filter(InstanceIds=[instance_id]))
    ip_address = current_instance[0].public_ip_address
    return ip_address

def get_queue(sqs, queue_name):
    # Get the queue. This returns an SQS.Queue instance
    # There is no queue, create a new SQS queue
    attributes = {
        'DelaySeconds': '0',
        'MessageRetentionPeriod': '86400',
        "ReceiveMessageWaitTimeSeconds": "0"
    }

    for idx in range(INSTANCE_SIZE):
        sqs.create_queue(
            QueueName=f"{queue_name}{idx}",
            Attributes=attributes
        )

        sqs.create_queue(
            QueueName=f'result-queue-{idx}',
            Attributes=attributes
        )

def send_message_to_queue(sqs, queue_name, message):
    queue = sqs.get_queue_by_name(QueueName=queue_name)

    # Send message to SQS queue
    response = queue.send_messages(
        Entries=message
    )
    return response

def install_required_packages(ssh):
    stdin, stdout, stderr = ssh.exec_command("sudo yum install pip -y && sudo pip install numpy boto3 AST")
    return stdout, stderr

def get_messages_from_queue(instance_size):
    messages = []

    for idx in range(0, instance_size):
        queue_name = f'result-queue-{idx}'
        print(queue_name)
        sqs = boto3.resource('sqs')
        queue = sqs.get_queue_by_name(QueueName=queue_name)

        for message in queue.receive_messages():
            messages.append(message.body)

    return messages

def split_row(array, nrows, ncols):
    """Split a matrix into sub-matrices."""

    r, h = array.shape
    return (array.reshape(h//nrows, nrows, -1, ncols)
                 .swapaxes(1, 2)
                 .reshape(-1, nrows, ncols))

def split_col(array, nrows, ncols):
    """Split a matrix into sub-matrices."""
    r, h = array.shape
    return [np.vsplit(i, 5) for i in np.hsplit(arr1, r)]

def generate_array(nrows, ncols):
    arr = np.random.randint(10, size=(nrows, ncols))
    # # print('arr 1:\n', arr)
    # arr = split_row(arr, 1, split_size)
    arr1 = np.random.randint(10, size=(nrows, ncols))
    # print('arr 2:\n', arr1)
    # arr1 = split_col(arr1, 1, split_size)

    return arr, arr1

def upload_file_to_s3(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

def bulk_upload(scp, filepaths: list[str], remote_path, host):
        """
        Upload multiple files to a remote directory.

        :param List[str] filepaths: List of local files to be uploaded.
        """
        try:
            scp.put(
                filepaths,
                remote_path=remote_path,
                recursive=True
            )
            print(f"Finished uploading {len(filepaths)} files to {remote_path} on {host}")
        except SCPException as e:
            print(f"SCPException during bulk upload: {e}")
        except Exception as e:
            print(f"Unexpected exception during bulk upload: {e}")

def configure_aws_access_for_ssh(ssh, ip_address):
    """
    This function extracts the AWS configuration you have locally and push to the server
    :param ssh:ssh object
    :return:
    """
    output = subprocess.getoutput("cat ~/.aws/credentials")
    ssh.exec_command(f'mkdir ~/.aws && touch ~/.aws/credentials')
    ssh.exec_command(f"echo '{output}' > ~/.aws/credentials")
    print(f'SSH AWS configuration done for {ip_address}')

def matrix_dot_product(matrix_a, matrix_b):
    start_time = datetime.datetime.now()
    result = []
    for i in range(len(matrix_a)):
        row = []
        for j in range(len(matrix_b[0])):
            sum = 0
            for k in range(len(matrix_b)):
                sum += matrix_a[i][k] * matrix_b[k][j]
            row.append(sum)
        result.append(row)
    print('Computation time', datetime.datetime.now() - start_time)

    return result

def matrix_add(matrix_1, matrix_2):
    start_time = datetime.datetime.now()
    result = []
    for idx_row in range(0, len(matrix_1)):
        row = matrix_1[idx_row]
        row1 = matrix_2[idx_row]
        cols = []
        for idx_col in range(0, len(row)):
            cols.append(row[idx_col] + row1[idx_col])
        result.append(cols)
    print('Computation time', datetime.datetime.now() - start_time)
    return result

In [262]:
QUEUE_NAME = 'queue'

In [263]:
get_queue(sqs, QUEUE_NAME)

In [269]:
sshs = configure_ssh() 
keypair = get_key_pairs(client, False)
ec2, instances = prepare_instances(client, keypair['KeyName'], INSTANCE_SIZE)
ip_addresses = [get_public_address(ec2, instance) for instance in instances]

for idx in range(0, len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    ssh_connect_with_retry(ssh, ip_address, 0)

SSH into the instance: 44.211.190.176
SSH into the instance: 54.90.71.247
SSH into the instance: 44.204.43.180
SSH into the instance: 3.84.117.248
SSH into the instance: 54.157.61.34
SSH into the instance: 18.206.158.101


In [265]:
# ssh_disconnect(ssh)
for idx in range(len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    stdout, stderr = install_required_packages(ssh)
    print(stdout.read().decode('utf-8'))
    print(stderr.read().decode('utf-8'))
    configure_aws_access_for_ssh(ssh, ip_address)

Updating Subscription Management repositories.
Unable to read consumer identity

This system is not registered with an entitlement server. You can use subscription-manager to register.

Red Hat Enterprise Linux 9 for x86_64 - AppStre  23 kB/s | 4.5 kB     00:00    
Red Hat Enterprise Linux 9 for x86_64 - BaseOS   68 kB/s | 4.1 kB     00:00    
Red Hat Enterprise Linux 9 Client Configuration  31 kB/s | 2.0 kB     00:00    
Package python3-pip-21.2.3-6.el9.noarch is already installed.
Dependencies resolved.
Nothing to do.
Complete!
Collecting numpy
  Using cached numpy-1.23.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
Collecting boto3
  Using cached boto3-1.26.28-py3-none-any.whl (132 kB)
Collecting AST
  Using cached AST-0.0.2.tar.gz (19 kB)
  Using cached AST-0.0.1.tar.gz (19 kB)

    ERROR: Command errored out with exit status 1:
     command: /usr/bin/python3 -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-vnhgqo51/ast_95bf36d

In [274]:
from files.filesss import fetch_local_files
for idx in range(len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    scp = SCPClient(ssh.get_transport())
    bulk_upload(scp, fetch_local_files('./worker'), '~', ip_address)

test
Finished uploading 3 files to ~ on 44.211.190.176
test
Finished uploading 3 files to ~ on 54.90.71.247
test
Finished uploading 3 files to ~ on 44.204.43.180
test
Finished uploading 3 files to ~ on 3.84.117.248
test
Finished uploading 3 files to ~ on 54.157.61.34
test
Finished uploading 3 files to ~ on 18.206.158.101


## TODO: START THE WORKER ON ALL THE INSTANCES

In [266]:
stdin, stdout, stderr = sshs[0].exec_command('ls')
print(stdout.read().decode('utf-8'))

helper.py
queue_helper.py
worker.py



In [280]:
def start_worker(ssh, worker_id):
    stdin, stdout, stderr = ssh.exec_command(f'python ./worker.py {worker_id}')
    # result = stdout.read().decode('utf-8')
    # return result

In [276]:
stdin, stdout, stderr = sshs[1].exec_command(f'python ./worker.py 1')
# print(stdout.read().decode('utf-8'))
# print(stderr.read().decode('utf-8'))

In [282]:
for idx in range(len(sshs)):
    ssh = sshs[idx]
    ip_address = ip_addresses[idx]
    start_worker(ssh, idx)
# sqs = boto3.resource('sqs')
# sqs
# response = send_message_to_queue(sqs, QUEUE_NAME, "Hi there! This is my queue message4!", 1, 0)
# response = send_message_to_queue(sqs, QUEUE_NAME, 'X', 1, 0)

### ADDITION

In [148]:
int(1000/6)

166

In [287]:
ARRAY_SIZE = 1000

In [288]:
arr, arr1 = generate_array(ARRAY_SIZE,ARRAY_SIZE)

In [289]:
INT_ARRAY_SIZE = 200 #int(ARRAY_SIZE/INSTANCE_SIZE)
s_arr = split_row(arr, INT_ARRAY_SIZE, INT_ARRAY_SIZE)
s_arr1 = split_row(arr1, INT_ARRAY_SIZE, INT_ARRAY_SIZE)

In [68]:
# for row in range(0, INT_ARRAY_SIZE):
#     print(s_arr[row])
#     break

In [18]:
# iter = len(s_arr)/100

In [55]:

# b = str(b).replace('\n', ',').replace(' ', '')

In [57]:
# for y in range(0, 20)
# for x in range(0, iter):
# a = arr[0]
# b = arr[1]
# a = str(a).replace(' ', ',').replace('\n', '')
# b = str(b).replace('\n', ',').replace(' ', '')
    
    # stdin, stdout, stderr = ssh.exec_command(f'python dot_prod.py {a} {b}')
    # result = stdout.read().decode("utf-8").replace('\n', '')
    # literal_eval(result)[0][0]
    # print(stderr.read().decode("utf-8"))

25

In [290]:
def reformat_data(data):
    return str(data).replace('\n', '')
# a = s_arr[0]
# b = s_arr[1]
# a = str(a).replace(' ', ',').replace('\n', '')
# b = str(b).replace(' ', ',').replace('\n', '')
ROW_LENGTH = int(len(s_arr)/INSTANCE_SIZE)
messages = []
for index in range(24):
    messages.append([{"Id": f"{idx+1}", "MessageBody": str((idx, (reformat_data(s_arr[idx]), reformat_data(s_arr1[idx])))) }
                for idx in range(1 * index , 1 * (index+1))])
    # if index <4:
    #     print(messages)
# messages1 = [{"Id": f"{idx}", "MessageBody": str((reformat_data(s_arr[idx]), reformat_data(s_arr1[idx]))) } for idx in range(5, 10)]
# messages2 = [{"Id": f"{idx}", "MessageBody": str((reformat_data(s_arr[idx]), reformat_data(s_arr1[idx]))) } for idx in range(10, 15)]
# messages3 = [{"Id": f"{idx}", "MessageBody": str((reformat_data(s_arr[idx]), reformat_data(s_arr1[idx]))) } for idx in range(15, 20)]
# messages4 = [{"Id": f"{idx}", "MessageBody": str((reformat_data(s_arr[idx]), reformat_data(s_arr1[idx]))) } for idx in range(20, 25)]

In [212]:
len(messages)

24

In [213]:
messages[3]

[{'Id': '4',
  'MessageBody': "(3, ('[[6 1 6 8 7 4 0 3 9 0 3 4 0 2 2 5 4 4 8 4 4 5 6 3 7 5 1 7 0 4 5 9 9 1 0 6  3 7 0 6 7 7 5 4 2 1 4 3 7 0 7 7 3 4 4 3 4 5 8 4 4 7 0 0 4 9 5 9 0 7 1 6  7 6 0 3 7 7 4 0 5 0 2 2 2 7 6 8 0 2 3 7 9 4 7 9 8 1 5 0 2 5 5 7 8 0 2 4  5 8 2 0 6 2 4 3 5 3 6 4 5 3 4 6 3 5 8 8 1 8 8 0 2 8 9 7 4 2 0 7 0 4 4 1  9 6 4 9 9 0 6 6 2 2 9 3 4 4 5 9 9 0 1 4 9 8 6 8 1 5 8 1 7 2 7 6 3 2 2 9  1 4 0 6 9 5 4 8 9 2 1 0 5 2 0 1 0 1 6 3] [9 0 0 9 8 3 5 4 1 8 3 2 0 7 0 2 2 9 1 4 6 6 2 3 4 7 4 4 0 5 1 3 3 8 8 7  0 1 5 9 0 7 8 5 7 8 6 3 3 7 6 0 2 9 0 0 1 1 5 7 8 2 8 5 4 3 7 3 8 2 9 0  0 7 1 5 4 8 7 9 4 5 2 3 7 3 2 6 3 5 3 9 7 5 7 9 6 6 0 2 9 4 1 0 9 6 6 8  0 6 7 2 6 4 4 7 1 3 6 8 5 3 6 1 5 7 8 2 6 1 8 1 2 1 1 6 0 1 0 6 5 1 7 2  1 0 5 6 3 9 4 2 5 7 3 3 8 6 7 9 2 8 5 4 3 8 8 1 3 0 8 5 8 6 9 1 1 7 3 5  1 0 8 7 9 6 1 3 0 5 1 3 0 6 4 8 3 6 7 0] [1 5 7 0 7 6 0 0 5 3 3 2 8 0 5 8 0 6 9 8 9 0 2 3 1 3 5 9 8 2 7 3 9 3 3 5  5 2 4 8 6 4 0 3 8 5 9 9 7 0 8 3 4 6 3 5 3 3 1 8 4 1 5 2 7 5 6 1 3 3 8 8  3

In [291]:
[send_message_to_queue(sqs, 'queue0', messages[idx]) for idx in range(0, 4)]
[send_message_to_queue(sqs, 'queue1', messages[idx]) for idx in range(4, 8)]
[send_message_to_queue(sqs, 'queue2', messages[idx]) for idx in range(8, 12)]
[send_message_to_queue(sqs, 'queue3', messages[idx]) for idx in range(12, 16)]
[send_message_to_queue(sqs, 'queue4', messages[idx]) for idx in range(16, 20)]
[send_message_to_queue(sqs, 'queue5', messages[idx]) for idx in range(20, 24)]

    # print(f'queue{int((idx * INSTANCE_SIZE) % INSTANCE_SIZE)}')
        # send_message_to_queue(sqs, f'queue{int(idx/2)}', messages[idx])

[{'Successful': [{'Id': '21',
    'MessageId': 'e8f61493-df44-433d-bf28-1cec1d21f98c',
    'MD5OfMessageBody': '9883dff93b795e3097e330f5365f9af5'}],
  'ResponseMetadata': {'RequestId': '190d5549-2e9b-535c-9b2e-095bc3209967',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amzn-requestid': '190d5549-2e9b-535c-9b2e-095bc3209967',
    'date': 'Tue, 13 Dec 2022 21:15:39 GMT',
    'content-type': 'text/xml',
    'content-length': '468'},
   'RetryAttempts': 0}},
 {'Successful': [{'Id': '22',
    'MessageId': '31afcc05-bc87-4d66-aeaf-317360ff6177',
    'MD5OfMessageBody': '7186bcbc8f78c14eedcc3cc25345c2d0'}],
  'ResponseMetadata': {'RequestId': '460b9aac-41e1-51b5-82bc-c6beac5aeb8e',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amzn-requestid': '460b9aac-41e1-51b5-82bc-c6beac5aeb8e',
    'date': 'Tue, 13 Dec 2022 21:15:39 GMT',
    'content-type': 'text/xml',
    'content-length': '468'},
   'RetryAttempts': 0}},
 {'Successful': [{'Id': '23',
    'MessageId': 'db3af912-8066-4925-8499-0b9a

In [317]:
compute_res = get_messages_from_queue(INSTANCE_SIZE)

result-queue-0
result-queue-1
result-queue-2
result-queue-3
result-queue-4
result-queue-5


In [322]:
compute_res = [literal_eval(compute) for compute in compute_res]

In [332]:
len(compute_res)

6

In [204]:
# # sqs = boto3.resource('sqs')
# queue = sqs.(QueueName='airscholar-queue')
# for message in sqs.receive_message(
#             MaxNumberOfMessages=10):
#         # process message body
#         body = json.loads(message.body)
#         print(body)


In [143]:
# message = response['Messages']
# receipt_handle = message['ReceiptHandle']

# # Delete received message from queue
# sqs.delete_message(
#     QueueUrl=queue_url,
#     ReceiptHandle=receipt_handle
# )


In [196]:
# print(json.dumps(message, indent=4))

In [45]:
# # ec2_inst_id
# bucketName = 'airscholar-mlbd-bucket'

In [146]:
# s3 = boto3.client('s3')

In [None]:
# response = s3.get_object(Bucket=bucketName,
#                          Key='data.json')
# print("Done, response body:")
# print(response['Body'].read())

In [None]:
#create instance
#configure instance
#create matrix
#split matrix
#send matrix to the queue
#read matrix from the queue on the instance created
#compute matrix
#send result to base


In [59]:
!git add .
!git commit -am "Update infrastructure implementation"
!git push --set-upstream origin main

[main d2098e2] Update infrastructure implementation
 1 file changed, 37 insertions(+), 3 deletions(-)
Enumerating objects: 35, done.
Counting objects: 100% (35/35), done.
Delta compression using up to 8 threads
Compressing objects: 100% (28/28), done.
Writing objects: 100% (31/31), 12.27 KiB | 6.14 MiB/s, done.
Total 31 (delta 5), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (5/5), completed with 1 local object.[K
To https://github.com/airscholar/MLCloudComputing-python.git
   3576c57..d2098e2  main -> main
branch 'main' set up to track 'origin/main'.
